This runbook provides step-by-step instructions for deploying Kindo on your self-managed Kubernetes cluster using Helm charts.
Table of Contents
Overview
Deployment Method: Pure Helm charts Target: Any Kubernetes cluster (on-premises, AWS, GCP, Azure, etc.)
Estimated Time: 2-3 hours for complete deployment
Deployment Order:
Prepare Kubernetes cluster (namespaces, storage)
Configure registry access (Kindo private registry)
Deploy peripheries (third-party services: Unleash, External Secrets, Qdrant, Cerbos)
Configure secrets (Kubernetes secrets and ConfigMaps)
Deploy Kindo applications (from registry.kindo.ai)
Configure DNS and ingress
Post-deployment configuration
Terminology:
Peripheries: Third-party/open-source services (Unleash, External Secrets, Qdrant)
Kindo Applications: Services packaged by Kindo (API, Next.js, LiteLLM, workers)
Prerequisites Verification
Before starting, verify all prerequisites from the Prerequisites Guide:
#!/bin/bash
echo "=== Kindo Prerequisites Verification ==="
# Check kubectl access
echo -n "Kubernetes cluster access: "
if kubectl cluster-info > /dev/null; then
echo "ā"
# Check Kubernetes version
server_version=$(kubectl version -o json 2>/dev/null | jq -r '.serverVersion.gitVersion' | sed 's/v//')
echo " Kubernetes version: v$server_version"
if [[ "$server_version" < "1.32" ]]; then
echo " ā ERROR: Kubernetes version must be >= 1.32"
exit 1
fi
kubectl get nodes
else
echo "ā Cannot access cluster"
exit 1
fi
# Check storage class
echo -n "Storage class: "
if kubectl get storageclass > /dev/null; then
kubectl get storageclass
echo "ā"
else
echo "ā No storage class found"
fi
# Check ingress controller
echo -n "Ingress controller: "
if kubectl get ingressclass > /dev/null; then
kubectl get ingressclass
echo "ā"
else
echo "ā No ingress controller found"
fi
# Check cert-manager
echo -n "cert-manager: "
if kubectl get pods -n cert-manager > /dev/null; then
echo "ā"
else
echo "ā cert-manager not installed (optional but recommended)"
fi
# Check for GPU nodes (if planning self-hosted models)
gpu_nodes=$(kubectl get nodes -o json | jq -r '.items[] | select(.status.capacity."nvidia.com/gpu" != null) | .metadata.name' | wc -l)
if [ "$gpu_nodes" -gt 0 ]; then
echo "ā GPU nodes found: $gpu_nodes (for self-hosted LLMs)"
else
echo "ā¹ No GPU nodes (required only for self-hosted LLMs/embeddings)"
fi
echo ""
echo "Please ensure you have all credentials ready:"
echo "- PostgreSQL connection strings (kindo, unleash, litellm, ssoready)"
echo "- Redis connection string"
echo "- RabbitMQ connection string"
echo "- S3/MinIO credentials"
echo "- External service API keys (Pinecone or Qdrant, OpenAI/Anthropic, etc.)"
Step 1: Prepare Kubernetes Cluster
1.1 Create Namespaces
# Create namespace for each component
# Periphery services (third-party)
kubectl create namespace unleash # Feature flags (required)
kubectl create namespace presidio # PII detection (required)
kubectl create namespace speaches # Text-to-speech (required)
kubectl create namespace external-secrets # Secret sync (optional)
# kubectl create namespace qdrant # Vector DB (optional - if not using Pinecone)
# Kindo application services
kubectl create namespace api
kubectl create namespace next
kubectl create namespace litellm
kubectl create namespace llama-indexer
# Kindo worker services
kubectl create namespace external-poller
kubectl create namespace external-sync
kubectl create namespace credits
kubectl create namespace audit-log-exporter
kubectl create namespace task-worker-ts
# Kindo supporting services
kubectl create namespace ssoready
kubectl create namespace cerbos
# Optional Kindo GPU services (if deploying self-hosted models)
# kubectl create namespace text-embeddings
# kubectl create namespace inference
# Label all Kindo-managed namespaces (peripheries + Kindo apps)
for ns in api next litellm llama-indexer external-poller external-sync credits \\
audit-log-exporter task-worker-ts ssoready cerbos unleash presidio speaches; do
kubectl label namespace $ns app.kubernetes.io/managed-by=kindo
kubectl label namespace $ns app.kubernetes.io/part-of=kindo
done
# Verify namespaces
kubectl get namespaces -l app.kubernetes.io/managed-by=kindo
1.2 Configure Storage Class (if needed)
# Check available storage classes
kubectl get storageclasses
# If no default storage class, create one
# Example for standard persistent storage
cat <<EOF | kubectl apply -f -
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: standard
annotations:
storageclass.kubernetes.io/is-default-class: "true"
provisioner: kubernetes.io/no-provisioner # Replace with your provisioner
volumeBindingMode: WaitForFirstConsumer
allowVolumeExpansion: true
EOF
Step 2: Configure Registry Access
2.1 Create Registry Secret
Kindo Helm charts are stored in a private OCI registry. Create a secret to access them:
# Set your Kindo registry credentials
REGISTRY_USERNAME="your-username" # Provided by Kindo
REGISTRY_PASSWORD="your-password" # Provided by Kindo
# Create registry secret in each namespace
for namespace in api next litellm llama-indexer external-poller external-sync \\
credits audit-log-exporter task-worker-ts ssoready cerbos; do
kubectl create secret docker-registry kindo-registry \\
--docker-server=registry.kindo.ai \\
--docker-username="$REGISTRY_USERNAME" \\
--docker-password="$REGISTRY_PASSWORD" \\
--namespace=$namespace
done
# Create for GPU namespaces if deploying self-hosted models
# kubectl create secret docker-registry kindo-registry \\
# --docker-server=registry.kindo.ai \\
# --docker-username="$REGISTRY_USERNAME" \\
# --docker-password="$REGISTRY_PASSWORD" \\
# --namespace=text-embeddings
# kubectl create secret docker-registry kindo-registry \\
# --docker-server=registry.kindo.ai \\
# --docker-username="$REGISTRY_USERNAME" \\
# --docker-password="$REGISTRY_PASSWORD" \\
# --namespace=inference
# Verify secrets
kubectl get secret kindo-registry -n api
kubectl get secret kindo-registry -n next
2.2 Test Registry Access
# Test pulling a chart (won't install, just verify access)
helm pull oci://registry.kindo.ai/kindo-helm/api \\
--version 2025.08.2 \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
# Cleanup test download
rm -f api-*.tgz
echo "ā Registry access confirmed"
Step 3: Deploy Peripheries (Third-Party Services)
What are Peripheries?
Peripheries are open-source and third-party services that Kindo applications depend on. These are NOT packaged or provided by Kindo. You install them from their own public Helm repositories or container registries.
Peripheries include:
External Secrets Operator (secrets management) - optional
Unleash (feature flags) - required
Unleash Edge (feature flag edge proxy) - required
Presidio (PII detection and anonymization) - required
Speaches (text-to-speech service) - required
Qdrant (vector database, if not using Pinecone) - optional
Whatās NOT a Periphery?
Kindo-packaged applications (deployed in Step 5) are provided by Kindo in registry.kindo.ai and include: API, Next.js, LiteLLM, Llama Indexer, SSOready, Cerbos, and workers.
3.1 Deploy External Secrets Operator (Recommended)
Source: External Secrets community Helm repository
Purpose: Sync secrets from external secret stores (AWS Secrets Manager, Vault, etc.) to Kubernetes
External Secrets Operator syncs secrets from external secret stores to Kubernetes:
# Add the External Secrets Helm repository
helm repo add external-secrets <https://charts.external-secrets.io>
# Install External Secrets Operator
helm install external-secrets \\
external-secrets/external-secrets \\
--namespace external-secrets \\
--create-namespace \\
--set installCRDs=true
# Verify installation
kubectl get pods -n external-secrets
Configure Secret Store (example for AWS Secrets Manager):
# Configure Secret Store (example for AWS Secrets Manager)
cat <<EOF | kubectl apply -f -
apiVersion: external-secrets.io/v1beta1
kind: ClusterSecretStore
metadata:
name: aws-secrets-manager
spec:
provider:
aws:
service: SecretsManager
region: us-west-2
auth:
jwt:
serviceAccountRef:
name: external-secrets
namespace: external-secrets
EOF
# For other backends (Vault, GCP, Azure), see: <https://external-secrets.io/latest/provider/>
kubectl apply -f secretstore.yaml
3.2 Deploy Unleash (Feature Flags)
Source: Unleash official Helm repository (https://docs.getunleash.io/helm-charts)
Purpose: Feature flag management platform
Note: This is the official open-source Unleash, not a Kindo-packaged version
# Add Unleash Helm repository
helm repo add unleash <https://docs.getunleash.io/helm-charts>
# Generate secure admin password
UNLEASH_ADMIN_PASSWORD=$(openssl rand -base64 16)
echo "Unleash admin password: $UNLEASH_ADMIN_PASSWORD"
echo "Save this password securely!"
# Create values file for Unleash
cat > unleash-values.yaml <<EOF
postgresql:
enabled: false # We use external PostgreSQL
database:
type: postgres
host: YOUR_POSTGRES_HOST
port: 5432
name: unleash
user: unleash
pass: YOUR_UNLEASH_DB_PASSWORD
ssl:
enabled: false # Set true if using SSL/TLS
ingress:
enabled: true
className: nginx # or your ingress class
hosts:
- host: unleash.kindo.company.com
paths:
- path: /
pathType: Prefix
tls:
- secretName: unleash-tls
hosts:
- unleash.kindo.company.com
# Set admin credentials
env:
- name: UNLEASH_DEFAULT_ADMIN_USERNAME
value: admin
- name: UNLEASH_DEFAULT_ADMIN_PASSWORD
value: "$UNLEASH_ADMIN_PASSWORD"
resources:
requests:
cpu: 250m
memory: 512Mi
limits:
cpu: 1000m
memory: 2Gi
EOF
# Install Unleash
helm install unleash unleash/unleash \\
--namespace unleash \\
--create-namespace \\
--values unleash-values.yaml
# Wait for Unleash to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=unleash -n unleash --timeout=300s
echo "ā Unleash deployed"
echo " URL: <https://unleash.kindo.company.com>"
echo " Username: admin"
echo " Password: $UNLEASH_ADMIN_PASSWORD"
3.3 Deploy Unleash Edge (Feature Flag Edge Proxy)
Source: Unleash official Docker image
Purpose: Edge proxy for faster feature flag evaluation
Note: Required for Kindo applications
# Create Unleash Edge values file
cat > unleash-edge-values.yaml <<EOF
image:
repository: unleashorg/unleash-edge
tag: "18.0.0"
pullPolicy: IfNotPresent
replicaCount: 2
service:
type: ClusterIP
port: 3063
env:
- name: UNLEASH_URL
value: "<http://unleash.unleash.svc.cluster.local:4242/api>"
- name: UNLEASH_API_TOKEN
value: "$UNLEASH_CLIENT_TOKEN" # From generated secrets
- name: UNLEASH_INSTANCE_ID
value: "kindo-edge"
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
livenessProbe:
httpGet:
path: /health
port: 3063
initialDelaySeconds: 10
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 3063
initialDelaySeconds: 5
periodSeconds: 5
EOF
# Create deployment manually (no official Helm chart)
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: unleash-edge
namespace: unleash
labels:
app.kubernetes.io/name: unleash-edge
app.kubernetes.io/managed-by: kindo
spec:
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: unleash-edge
template:
metadata:
labels:
app.kubernetes.io/name: unleash-edge
spec:
containers:
- name: unleash-edge
image: unleashorg/unleash-edge:18.0.0
ports:
- containerPort: 3063
name: http
env:
- name: UNLEASH_URL
value: "<http://unleash.unleash.svc.cluster.local:4242/api>"
- name: UNLEASH_API_TOKEN
value: "$UNLEASH_CLIENT_TOKEN"
- name: UNLEASH_INSTANCE_ID
value: "kindo-edge"
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
livenessProbe:
httpGet:
path: /health
port: 3063
initialDelaySeconds: 10
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 3063
initialDelaySeconds: 5
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: unleash-edge
namespace: unleash
labels:
app.kubernetes.io/name: unleash-edge
spec:
type: ClusterIP
ports:
- port: 3063
targetPort: 3063
protocol: TCP
name: http
selector:
app.kubernetes.io/name: unleash-edge
EOF
# Wait for Unleash Edge to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=unleash-edge -n unleash --timeout=300s
echo "ā Unleash Edge deployed"
echo " Internal URL: <http://unleash-edge.unleash.svc.cluster.local:3063>"
3.4 Deploy Presidio (PII Detection)
Source: Microsoft Presidio official Docker images
Purpose: PII detection and anonymization
Note: Required for data privacy features
Presidio requires two components: Analyzer and Anonymizer
# Create Presidio namespace
kubectl create namespace presidio
kubectl label namespace presidio app.kubernetes.io/managed-by=kindo
# Deploy Presidio Analyzer
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: presidio-analyzer
namespace: presidio
labels:
app.kubernetes.io/name: presidio-analyzer
app.kubernetes.io/managed-by: kindo
spec:
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: presidio-analyzer
template:
metadata:
labels:
app.kubernetes.io/name: presidio-analyzer
spec:
containers:
- name: presidio-analyzer
image: mcr.microsoft.com/presidio-analyzer:2.2.360
ports:
- containerPort: 3000
name: http
env:
- name: GRPC_PORT
value: "3001"
resources:
requests:
cpu: 250m
memory: 512Mi
limits:
cpu: 1000m
memory: 2Gi
livenessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 10
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: presidio-analyzer
namespace: presidio
labels:
app.kubernetes.io/name: presidio-analyzer
spec:
type: ClusterIP
ports:
- port: 3000
targetPort: 3000
protocol: TCP
name: http
- port: 3001
targetPort: 3001
protocol: TCP
name: grpc
selector:
app.kubernetes.io/name: presidio-analyzer
EOF
# Deploy Presidio Anonymizer
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: presidio-anonymizer
namespace: presidio
labels:
app.kubernetes.io/name: presidio-anonymizer
app.kubernetes.io/managed-by: kindo
spec:
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: presidio-anonymizer
template:
metadata:
labels:
app.kubernetes.io/name: presidio-anonymizer
spec:
containers:
- name: presidio-anonymizer
image: mcr.microsoft.com/presidio-anonymizer:2.2.360
ports:
- containerPort: 3000
name: http
env:
- name: GRPC_PORT
value: "3001"
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 1Gi
livenessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 10
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: presidio-anonymizer
namespace: presidio
labels:
app.kubernetes.io/name: presidio-anonymizer
spec:
type: ClusterIP
ports:
- port: 3000
targetPort: 3000
protocol: TCP
name: http
- port: 3001
targetPort: 3001
protocol: TCP
name: grpc
selector:
app.kubernetes.io/name: presidio-anonymizer
EOF
# Wait for Presidio to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=presidio-analyzer -n presidio --timeout=300s
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=presidio-anonymizer -n presidio --timeout=300s
echo "ā Presidio deployed"
echo " Analyzer URL: <http://presidio-analyzer.presidio.svc.cluster.local:3000>"
echo " Anonymizer URL: <http://presidio-anonymizer.presidio.svc.cluster.local:3000>"
3.5 Deploy Speaches (Text-to-Speech Service)
Source: Speaches AI (speaches-ai/speaches)
Purpose: Speech-to-text and text-to-speech API
Note: Required for voice features
Choose CPU or GPU variant based on your infrastructure:
Option A: Speaches (CPU Version)
For deployments without GPU:
# Create Speaches namespace
kubectl create namespace speaches
kubectl label namespace speaches app.kubernetes.io/managed-by=kindo
# Deploy Speaches (CPU)
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: speaches
namespace: speaches
labels:
app.kubernetes.io/name: speaches
app.kubernetes.io/managed-by: kindo
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: speaches
template:
metadata:
labels:
app.kubernetes.io/name: speaches
spec:
containers:
- name: speaches
image: ghcr.io/speaches-ai/speaches:latest
ports:
- containerPort: 5002
name: http
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: 2000m
memory: 4Gi
---
apiVersion: v1
kind: Service
metadata:
name: speaches
namespace: speaches
labels:
app.kubernetes.io/name: speaches
spec:
type: ClusterIP
ports:
- port: 5002
targetPort: 5002
protocol: TCP
name: http
selector:
app.kubernetes.io/name: speaches
EOF
echo "ā Speaches (CPU) deployed"
echo " Internal URL: <http://speaches.speaches.svc.cluster.local:5002>"
Option B: Speaches (GPU Version)
For better performance with GPU:
# Create Speaches namespace (skip if already created above)
# kubectl create namespace speaches
# kubectl label namespace speaches app.kubernetes.io/managed-by=kindo
# Deploy Speaches (GPU)
cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
name: speaches
namespace: speaches
labels:
app.kubernetes.io/name: speaches
app.kubernetes.io/managed-by: kindo
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: speaches
template:
metadata:
labels:
app.kubernetes.io/name: speaches
spec:
containers:
- name: speaches
image: ghcr.io/speaches-ai/speaches:latest-cuda
ports:
- containerPort: 5002
name: http
env:
- name: STT_MODEL
value: "guillaumekln/faster-whisper-small"
- name: USE_CUDA
value: "true"
resources:
requests:
cpu: 500m
memory: 2Gi
nvidia.com/gpu: 1
limits:
cpu: 2000m
memory: 8Gi
nvidia.com/gpu: 1
nodeSelector:
nvidia.com/gpu: "true"
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
---
apiVersion: v1
kind: Service
metadata:
name: speaches
namespace: speaches
labels:
app.kubernetes.io/name: speaches
spec:
type: ClusterIP
ports:
- port: 5002
targetPort: 5002
protocol: TCP
name: http
selector:
app.kubernetes.io/name: speaches
EOF
echo "ā Speaches (GPU) deployed"
echo " Internal URL: <http://speaches.speaches.svc.cluster.local:5002>"
3.6 Deploy Vector Database (Choose One)
Option A: Use Pinecone (Managed)
If using Pinecone, no deployment needed - just ensure you have:
Pinecone API key
Pod-based index created
Environment and index name documented
Skip to Step 3.7 (Summary).
Option B: Deploy Qdrant (Self-Hosted)
Source: Qdrant official Helm repository (https://qdrant.github.io/qdrant-helm)
Purpose: Self-hosted vector database
Note: Open-source Qdrant, not Kindo-packaged
If using self-hosted Qdrant for full data control:
# Add Qdrant Helm repository
helm repo add qdrant <https://qdrant.github.io/qdrant-helm>
helm repo update
# Create Qdrant values file
cat > qdrant-values.yaml <<EOF
replicaCount: 3 # For high availability
image:
repository: qdrant/qdrant
tag: "v1.7.0"
service:
type: ClusterIP
port: 6333
grpcPort: 6334
persistence:
enabled: true
size: 100Gi
storageClass: "" # Use default storage class
resources:
requests:
cpu: 1000m
memory: 2Gi
limits:
cpu: 4000m
memory: 8Gi
# Enable API key authentication (optional but recommended)
apiKey:
enabled: true
value: "$(openssl rand -hex 32)"
config:
cluster:
enabled: true # Enable clustering for HA
EOF
# Install Qdrant
helm install qdrant qdrant/qdrant \\
--namespace qdrant \\
--create-namespace \\
--values qdrant-values.yaml
# Wait for Qdrant to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=qdrant -n qdrant --timeout=300s
# Create collection for embeddings
QDRANT_URL="<http://qdrant.qdrant.svc.cluster.local:6333>"
# Create collection
kubectl run -it --rm qdrant-setup --image=curlimages/curl --restart=Never -- \\
curl -X PUT "$QDRANT_URL/collections/kindo-embeddings" \\
-H 'Content-Type: application/json' \\
-d '{
"vectors": {
"size": 1536,
"distance": "Cosine"
},
"optimizers_config": {
"indexing_threshold": 10000
}
}'
echo "ā Qdrant deployed with collection: kindo-embeddings"
echo " Internal URL: $QDRANT_URL"
3.7 Summary of Deployed Peripheries
At this point, you should have deployed these third-party services:
Service | Source | Namespace | Required? | Purpose |
|---|---|---|---|---|
External Secrets Operator | External Secrets community |
| Optional | Secret sync |
Unleash | Unleash official |
| Required | Feature flags |
Unleash Edge | Unleash official |
| Required | Feature flag edge proxy |
Presidio Analyzer | Microsoft Presidio |
| Required | PII detection |
Presidio Anonymizer | Microsoft Presidio |
| Required | PII anonymization |
Speaches | Speaches AI |
| Required | Speech-to-text & text-to-speech |
Qdrant | Qdrant official |
| Optional | Vector DB (if not Pinecone) |
Verification:
echo "=== Checking Peripheries ==="
# Required peripheries
echo "Unleash:"
kubectl get pods -n unleash
echo ""
echo "Presidio:"
kubectl get pods -n presidio
echo ""
echo "Speaches:"
kubectl get pods -n speaches
# Optional peripheries
echo ""
echo "External Secrets (optional):"
kubectl get pods -n external-secrets 2>/dev/null || echo " Not deployed"
echo ""
echo "Qdrant (optional):"
kubectl get pods -n qdrant 2>/dev/null || echo " Not deployed (using Pinecone)"
echo ""
echo "ā
All required peripheries deployed"
Summary of Deployment Time:
External Secrets: ~2 minutes (if deployed)
Unleash: ~3 minutes
Unleash Edge: ~1 minute
Presidio (both services): ~2 minutes
Speaches: ~2 minutes
Qdrant: ~3 minutes (if deployed)
Total: ~10-15 minutes for all peripheries
Whatās Next:
Step 4: Configure secrets (including URLs for peripheries)
Step 5: Deploy Kindo-packaged applications from
registry.kindo.ai
Step 4: Configure Secrets
4.1 Generate Random Secrets
Generate secure random secrets for internal use:
# Generate required secrets
export NEXTAUTH_SECRET=$(openssl rand -base64 32)
export KEY_ENCRYPTION_KEY=$(openssl rand -base64 32)
export UM_INTERNAL_API_KEY=$(uuidgen | tr '[:upper:]' '[:lower:]')
export LITELLM_MASTER_KEY=$(openssl rand -hex 32)
export UNLEASH_ADMIN_TOKEN="*:*.$(openssl rand -hex 32)"
export UNLEASH_CLIENT_TOKEN="default:development.$(openssl rand -hex 32)"
export UNLEASH_FRONTEND_TOKEN="*:development.$(openssl rand -hex 32)"
# Save these to a secure location!
cat > generated-secrets.env <<EOF
# CRITICAL: Store this file securely. Do not commit to git.
NEXTAUTH_SECRET=$NEXTAUTH_SECRET
KEY_ENCRYPTION_KEY=$KEY_ENCRYPTION_KEY
UM_INTERNAL_API_KEY=$UM_INTERNAL_API_KEY
LITELLM_MASTER_KEY=$LITELLM_MASTER_KEY
UNLEASH_ADMIN_TOKEN=$UNLEASH_ADMIN_TOKEN
UNLEASH_CLIENT_TOKEN=$UNLEASH_CLIENT_TOKEN
UNLEASH_FRONTEND_TOKEN=$UNLEASH_FRONTEND_TOKEN
EOF
chmod 600 generated-secrets.env
echo "ā Generated secrets saved to generated-secrets.env"
4.2 Create Kubernetes Secrets
Option A: Using External Secrets Operator (Recommended)
If using External Secrets Operator, create secrets in your secret backend first, then create ExternalSecret resources:
# api-external-secret.yaml
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: api-config
namespace: kindo-apps
spec:
refreshInterval: 1h
secretStoreRef:
name: aws-secrets-manager # or your secret store
kind: ClusterSecretStore
target:
name: api-config
creationPolicy: Owner
data:
- secretKey: config
remoteRef:
key: kindo/production/api-config
Option B: Manual Kubernetes Secrets
If not using External Secrets Operator, create secrets directly:
# Source your credentials
source generated-secrets.env
# Set your external service credentials
export POSTGRES_MAIN_URL="postgresql://kindo:PASSWORD@HOST:5432/kindo"
export POSTGRES_UNLEASH_URL="postgresql://unleash:PASSWORD@HOST:5432/unleash"
export POSTGRES_LITELLM_URL="postgresql://litellm:PASSWORD@HOST:5432/litellm"
export POSTGRES_SSOREADY_URL="postgresql://ssoready:PASSWORD@HOST:5432/ssoready"
export REDIS_URL="redis://:PASSWORD@HOST:6379"
export RABBITMQ_URL="amqp://USER:PASSWORD@HOST:5672"
export S3_ACCESS_KEY_ID="your-access-key"
export S3_SECRET_ACCESS_KEY="your-secret-key"
export S3_BUCKET_NAME="kindo-uploads"
export S3_ENDPOINT="<https://s3.amazonaws.com>" # or MinIO endpoint
export S3_REGION="us-west-2"
# Vector database (choose one)
export PINECONE_API_KEY="pc-xxxxx" # If using Pinecone
export PINECONE_ENVIRONMENT="us-east-1-aws"
export PINECONE_INDEX_NAME="kindo-embeddings"
# OR
export QDRANT_URL="<http://qdrant.qdrant.svc.cluster.local:6333>" # If using Qdrant
export QDRANT_API_KEY="" # Optional
export QDRANT_COLLECTION_NAME="kindo-embeddings"
# AI providers (at least one required, or use self-hosted)
export OPENAI_API_KEY="sk-xxxxx" # If using OpenAI
export ANTHROPIC_API_KEY="sk-ant-xxxxx" # If using Anthropic
# export AZURE_OPENAI_API_KEY="" # If using Azure OpenAI
# export AZURE_OPENAI_ENDPOINT=""
# export AZURE_OPENAI_DEPLOYMENT=""
# Email service
export SMTP_HOST="smtp.gmail.com"
export SMTP_PORT="587"
export SMTP_USER="[email protected]"
export SMTP_PASSWORD="your-smtp-password"
export SMTP_FROM_ADDRESS="[email protected]"
# Audit logging
export SYSLOG_HOST="syslog.company.com"
export SYSLOG_PORT="514"
export SYSLOG_PROTOCOL="udp" # or "tcp"
# Create API config secret
kubectl create secret generic api-config \\
--from-literal=DATABASE_URL="$POSTGRES_MAIN_URL" \\
--from-literal=REDIS_URL="$REDIS_URL" \\
--from-literal=RABBITMQ_URL="$RABBITMQ_URL" \\
--from-literal=S3_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" \\
--from-literal=S3_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" \\
--from-literal=S3_BUCKET_NAME="$S3_BUCKET_NAME" \\
--from-literal=S3_ENDPOINT="$S3_ENDPOINT" \\
--from-literal=S3_REGION="$S3_REGION" \\
--from-literal=NEXTAUTH_SECRET="$NEXTAUTH_SECRET" \\
--from-literal=KEY_ENCRYPTION_KEY="$KEY_ENCRYPTION_KEY" \\
--from-literal=UM_INTERNAL_API_KEY="$UM_INTERNAL_API_KEY" \\
--from-literal=PINECONE_API_KEY="${PINECONE_API_KEY:-}" \\
--from-literal=PINECONE_ENVIRONMENT="${PINECONE_ENVIRONMENT:-}" \\
--from-literal=PINECONE_INDEX_NAME="${PINECONE_INDEX_NAME:-}" \\
--from-literal=QDRANT_URL="${QDRANT_URL:-}" \\
--from-literal=QDRANT_API_KEY="${QDRANT_API_KEY:-}" \\
--from-literal=QDRANT_COLLECTION_NAME="${QDRANT_COLLECTION_NAME:-}" \\
--from-literal=OPENAI_API_KEY="${OPENAI_API_KEY:-}" \\
--from-literal=ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}" \\
--from-literal=SMTP_HOST="$SMTP_HOST" \\
--from-literal=SMTP_PORT="$SMTP_PORT" \\
--from-literal=SMTP_USER="$SMTP_USER" \\
--from-literal=SMTP_PASSWORD="$SMTP_PASSWORD" \\
--from-literal=SMTP_FROM_ADDRESS="$SMTP_FROM_ADDRESS" \\
--from-literal=SYSLOG_HOST="$SYSLOG_HOST" \\
--from-literal=SYSLOG_PORT="$SYSLOG_PORT" \\
--from-literal=SYSLOG_PROTOCOL="$SYSLOG_PROTOCOL" \\
--from-literal=UNLEASH_ADMIN_TOKEN="$UNLEASH_ADMIN_TOKEN" \\
--from-literal=UNLEASH_CLIENT_TOKEN="$UNLEASH_CLIENT_TOKEN" \\
--from-literal=SSOREADY_DATABASE_URL="$POSTGRES_SSOREADY_URL" \\
--namespace api
# Create Next.js config secret
kubectl create secret generic next-config \\
--from-literal=NEXTAUTH_SECRET="$NEXTAUTH_SECRET" \\
--from-literal=API_URL="<https://api.kindo.company.com>" \\
--from-literal=UNLEASH_FRONTEND_TOKEN="$UNLEASH_FRONTEND_TOKEN" \\
--from-literal=SSOREADY_BASE_URL="<https://app.kindo.company.com>" \\
--namespace next
# Create LiteLLM config secret
kubectl create secret generic litellm-config \\
--from-literal=DATABASE_URL="$POSTGRES_LITELLM_URL" \\
--from-literal=LITELLM_MASTER_KEY="$LITELLM_MASTER_KEY" \\
--from-literal=OPENAI_API_KEY="${OPENAI_API_KEY:-}" \\
--from-literal=ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}" \\
--namespace litellm
# Create Llama Indexer config secret
kubectl create secret generic llama-indexer-config \\
--from-literal=REDIS_URL="$REDIS_URL" \\
--from-literal=RABBITMQ_URL="$RABBITMQ_URL" \\
--from-literal=S3_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" \\
--from-literal=S3_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" \\
--from-literal=S3_BUCKET_NAME="$S3_BUCKET_NAME" \\
--from-literal=S3_REGION="$S3_REGION" \\
--from-literal=PINECONE_API_KEY="${PINECONE_API_KEY:-}" \\
--from-literal=PINECONE_ENVIRONMENT="${PINECONE_ENVIRONMENT:-}" \\
--from-literal=PINECONE_INDEX_NAME="${PINECONE_INDEX_NAME:-}" \\
--from-literal=QDRANT_URL="${QDRANT_URL:-}" \\
--from-literal=QDRANT_API_KEY="${QDRANT_API_KEY:-}" \\
--from-literal=QDRANT_COLLECTION_NAME="${QDRANT_COLLECTION_NAME:-}" \\
--from-literal=OPENAI_API_KEY="${OPENAI_API_KEY:-}" \\
--from-literal=ANTHROPIC_API_KEY="${ANTHROPIC_API_KEY:-}" \\
--namespace llama-indexer
# Create shared worker config secret (for external-poller, external-sync, credits, audit-log-exporter, task-worker-ts)
# These services share the API configuration
for ns in external-poller external-sync credits audit-log-exporter task-worker-ts; do
kubectl create secret generic worker-config \\
--from-literal=DATABASE_URL="$POSTGRES_MAIN_URL" \\
--from-literal=REDIS_URL="$REDIS_URL" \\
--from-literal=RABBITMQ_URL="$RABBITMQ_URL" \\
--from-literal=S3_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" \\
--from-literal=S3_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" \\
--from-literal=S3_BUCKET_NAME="$S3_BUCKET_NAME" \\
--from-literal=S3_ENDPOINT="$S3_ENDPOINT" \\
--from-literal=S3_REGION="$S3_REGION" \\
--from-literal=NEXTAUTH_SECRET="$NEXTAUTH_SECRET" \\
--from-literal=KEY_ENCRYPTION_KEY="$KEY_ENCRYPTION_KEY" \\
--from-literal=UM_INTERNAL_API_KEY="$UM_INTERNAL_API_KEY" \\
--from-literal=SMTP_HOST="$SMTP_HOST" \\
--from-literal=SMTP_PORT="$SMTP_PORT" \\
--from-literal=SMTP_USER="$SMTP_USER" \\
--from-literal=SMTP_PASSWORD="$SMTP_PASSWORD" \\
--from-literal=SMTP_FROM_ADDRESS="$SMTP_FROM_ADDRESS" \\
--from-literal=SYSLOG_HOST="$SYSLOG_HOST" \\
--from-literal=SYSLOG_PORT="$SYSLOG_PORT" \\
--from-literal=SYSLOG_PROTOCOL="$SYSLOG_PROTOCOL" \\
--from-literal=UNLEASH_ADMIN_TOKEN="$UNLEASH_ADMIN_TOKEN" \\
--from-literal=UNLEASH_CLIENT_TOKEN="$UNLEASH_CLIENT_TOKEN" \\
--namespace $ns
done
# Create SSOready config secret
kubectl create secret generic ssoready-config \\
--from-literal=DATABASE_URL="$POSTGRES_SSOREADY_URL" \\
--from-literal=SSOREADY_SECRET_KEY="$(openssl rand -hex 32)" \\
--from-literal=SSOREADY_BASE_URL="<https://app.kindo.company.com>" \\
--namespace ssoready
# Verify secrets created
kubectl get secrets -n api
kubectl get secrets -n next
kubectl get secrets -n litellm
kubectl get secrets -n external-poller
Step 5: Deploy Kindo Applications
Critical Distinction: This step deploys Kindo-packaged applications from the private Kindo registry (registry.kindo.ai). These are NOT open-source peripheries.
All Kindo applications:
Are deployed from:
oci://registry.kindo.ai/kindo-helm/<app-name>Require Kindo registry credentials (configured in Step 2)
Are maintained and versioned by Kindo
Include: API, Next.js, LiteLLM, Llama Indexer, workers, SSOready, Cerbos
Pattern for all Kindo apps:
helm install <app> oci://registry.kindo.ai/kindo-helm/<app> \\ --version 2025.08.2 \\ --namespace <app> \\ --values <app>-values.yaml \\ --username "$REGISTRY_USERNAME" \\ --password "$REGISTRY_PASSWORD"
5.1 Deploy Cerbos (Authorization Engine)
Source: Kindo-packaged (includes custom authorization policies)
Registry: registry.kindo.ai/kindo/cerbos
# Create Cerbos values file
cat > cerbos-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/cerbos
tag: "2025.08.2"
pullPolicy: IfNotPresent
imagePullSecrets:
- name: kindo-registry
replicaCount: 2
service:
type: ClusterIP
port: 3592
grpcPort: 3593
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
EOF
# Deploy Cerbos
helm install cerbos oci://registry.kindo.ai/kindo-helm/cerbos \\
--version 2025.08.2 \\
--namespace cerbos \\
--create-namespace \\
--values cerbos-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
# Verify
kubectl get pods -n cerbos
5.2 Deploy API Service
Source: Kindo-packaged Node.js application
Registry: registry.kindo.ai/kindo/api
# Create API values file
cat > api-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/api
tag: "2025.08.2"
pullPolicy: IfNotPresent
imagePullSecrets:
- name: kindo-registry
replicaCount: 2
service:
type: ClusterIP
port: 3000
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: 2000m
memory: 4Gi
# Reference secrets and configmaps
envFrom:
- secretRef:
name: api-config
- configMapRef:
name: api-config
# Health checks
livenessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 3000
initialDelaySeconds: 10
periodSeconds: 5
EOF
# Deploy API
helm install api oci://registry.kindo.ai/kindo-helm/api \\
--version 2025.08.2 \\
--namespace api \\
--values api-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
# Wait for API to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=api -n api --timeout=300s
5.3 Deploy Next.js Frontend
Source: Kindo-packaged Next.js application
Registry: registry.kindo.ai/kindo/next
# Create Next.js values file
cat > next-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/next
tag: "2025.08.2"
pullPolicy: IfNotPresent
imagePullSecrets:
- name: kindo-registry
replicaCount: 2
service:
type: ClusterIP
port: 3000
resources:
requests:
cpu: 250m
memory: 512Mi
limits:
cpu: 1000m
memory: 2Gi
envFrom:
- secretRef:
name: next-config
- configMapRef:
name: next-config
livenessProbe:
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 10
periodSeconds: 5
EOF
# Deploy Next.js
helm install next oci://registry.kindo.ai/kindo-helm/next \\
--version 2025.08.2 \\
--namespace next \\
--values next-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
# Wait for Next.js to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=next -n next --timeout=300s
5.4 Deploy LiteLLM (AI Proxy)
Source: Kindo-packaged Python application
Registry: registry.kindo.ai/kindo/litellm
# Create LiteLLM values file
cat > litellm-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/litellm
tag: "2025.08.2"
pullPolicy: IfNotPresent
imagePullSecrets:
- name: kindo-registry
replicaCount: 2
service:
type: ClusterIP
port: 8000
resources:
requests:
cpu: 1000m
memory: 2Gi
limits:
cpu: 4000m
memory: 8Gi
envFrom:
- secretRef:
name: litellm-config
- configMapRef:
name: litellm-config
livenessProbe:
httpGet:
path: /health
port: 8000
initialDelaySeconds: 30
periodSeconds: 10
EOF
# Deploy LiteLLM
helm install litellm oci://registry.kindo.ai/kindo-helm/litellm \\
--version 2025.08.2 \\
--namespace litellm \\
--values litellm-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
# Wait for LiteLLM to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=litellm -n litellm --timeout=300s
5.5 Deploy Llama Indexer (Document Processing)
Source: Kindo-packaged Python application
Registry: registry.kindo.ai/kindo/llama-indexer
# Create Llama Indexer values file
cat > llama-indexer-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/llama-indexer
tag: "2025.08.2"
pullPolicy: IfNotPresent
imagePullSecrets:
- name: kindo-registry
replicaCount: 1
service:
type: ClusterIP
port: 8000
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: 2000m
memory: 4Gi
envFrom:
- secretRef:
name: llama-indexer-config
EOF
# Deploy Llama Indexer
helm install llama-indexer oci://registry.kindo.ai/kindo-helm/llama-indexer \\
--version 2025.08.2 \\
--namespace llama-indexer \\
--values llama-indexer-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
# Wait for Llama Indexer to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=llama-indexer -n llama-indexer --timeout=300s
5.6 Deploy Worker Services
All worker services are Kindo-packaged applications from registry.kindo.ai.
5.6a External Poller (Background Jobs)
Source: Kindo-packaged Node.js worker
Registry: registry.kindo.ai/kindo/external-poller
cat > external-poller-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/external-poller
tag: "2025.08.2"
imagePullSecrets:
- name: kindo-registry
replicaCount: 1
envFrom:
- secretRef:
name: worker-config
- configMapRef:
name: worker-config
resources:
requests:
cpu: 250m
memory: 512Mi
limits:
cpu: 1000m
memory: 2Gi
EOF
helm install external-poller oci://registry.kindo.ai/kindo-helm/external-poller \\
--version 2025.08.2 \\
--namespace external-poller \\
--values external-poller-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
5.6b External Sync (Data Synchronization)
Source: Kindo-packaged Node.js worker
Registry: registry.kindo.ai/kindo/external-sync
cat > external-sync-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/external-sync
tag: "2025.08.2"
imagePullSecrets:
- name: kindo-registry
replicaCount: 1
envFrom:
- secretRef:
name: worker-config
resources:
requests:
cpu: 250m
memory: 512Mi
limits:
cpu: 1000m
memory: 2Gi
EOF
helm install external-sync oci://registry.kindo.ai/kindo-helm/external-sync \\
--version 2025.08.2 \\
--namespace external-sync \\
--values external-sync-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
5.6c Credits Service
Source: Kindo-packaged Node.js service
Registry: registry.kindo.ai/kindo/credits
5.6d Audit Log Exporter
Source: Kindo-packaged Node.js service
Registry: registry.kindo.ai/kindo/audit-log-exporter
cat > audit-log-exporter-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/audit-log-exporter
tag: "2025.08.2"
imagePullSecrets:
- name: kindo-registry
replicaCount: 1
envFrom:
- secretRef:
name: worker-config
- configMapRef:
name: worker-config
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 1Gi
EOF
helm install audit-log-exporter oci://registry.kindo.ai/kindo-helm/audit-log-exporter \\
--version 2025.08.2 \\
--namespace audit-log-exporter \\
--values audit-log-exporter-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
5.7 Deploy SSOready (Authentication)
Source: Kindo-packaged authentication service
Registry: registry.kindo.ai/kindo/ssoready
Note: Based on open-source SSOready but packaged and configured for Kindo
cat > ssoready-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/ssoready
tag: "2025.08.2"
imagePullSecrets:
- name: kindo-registry
replicaCount: 2
service:
type: ClusterIP
port: 8080
envFrom:
- secretRef:
name: ssoready-config
env:
- name: SSOREADY_ADMIN_EMAIL
value: "[email protected]" # Change this
- name: SSOREADY_ADMIN_PASSWORD
valueFrom:
secretKeyRef:
name: ssoready-config
key: SSOREADY_SECRET_KEY # Will be hashed for admin login
resources:
requests:
cpu: 250m
memory: 512Mi
limits:
cpu: 1000m
memory: 2Gi
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 10
periodSeconds: 5
EOF
helm install ssoready oci://registry.kindo.ai/kindo-helm/ssoready \\
--version 2025.08.2 \\
--namespace ssoready \\
--values ssoready-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
# Wait for SSOready to be ready
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=ssoready -n ssoready --timeout=300s
5.8 Deploy Task Worker (TypeScript Workers)
Source: Kindo-packaged Node.js/TypeScript worker
Registry: registry.kindo.ai/kindo/task-worker-ts
cat > task-worker-ts-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/task-worker-ts
tag: "2025.08.2"
imagePullSecrets:
- name: kindo-registry
replicaCount: 2
envFrom:
- secretRef:
name: worker-config
- configMapRef:
name: worker-config
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: 2000m
memory: 4Gi
EOF
helm install task-worker-ts oci://registry.kindo.ai/kindo-helm/task-worker-ts \\
--version 2025.08.2 \\
--namespace task-worker-ts \\
--values task-worker-ts-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
5.9 Deploy Self-Hosted AI Inference (Optional - GPU Required)
Only follow this section if you have GPU nodes and want to run self-hosted LLMs or embeddings.
These are Kindo-packaged inference services optimized for the platform.
5.9a Deploy Text Embeddings Inference (Kindo-Packaged)
Source: Kindo-packaged embeddings service
Registry: registry.kindo.ai/kindo/text-embeddings
GPU Required: 1 GPU with 8GB+ VRAM
For self-hosted embeddings:
cat > text-embeddings-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/text-embeddings
tag: "2025.08.2"
imagePullSecrets:
- name: kindo-registry
replicaCount: 1
service:
type: ClusterIP
port: 8080
env:
- name: MODEL_ID
value: "BAAI/bge-large-en-v1.5" # BGE embeddings model
- name: REVISION
value: "main"
- name: MAX_BATCH_SIZE
value: "32"
- name: MAX_INPUT_LENGTH
value: "512"
resources:
requests:
cpu: 2000m
memory: 4Gi
nvidia.com/gpu: 1
limits:
cpu: 4000m
memory: 8Gi
nvidia.com/gpu: 1
# Ensure pods are scheduled on GPU nodes
nodeSelector:
nvidia.com/gpu: "true"
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
EOF
helm install text-embeddings oci://registry.kindo.ai/kindo-helm/text-embeddings \\
--version 2025.08.2 \\
--namespace text-embeddings \\
--create-namespace \\
--values text-embeddings-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
echo "ā Text embeddings service deployed on GPU"
echo " Internal URL: <http://text-embeddings.text-embeddings.svc.cluster.local:8080>"
5.9b Deploy vLLM Inference Server (Kindo-Packaged)
Source: Kindo-packaged vLLM inference service
Registry: registry.kindo.ai/kindo/inference
GPU Required: 1-8 GPUs depending on model size
For running LLMs locally:
cat > inference-values.yaml <<EOF
image:
repository: registry.kindo.ai/kindo/inference
tag: "2025.08.2"
imagePullSecrets:
- name: kindo-registry
replicaCount: 1
service:
type: ClusterIP
port: 8000
env:
- name: MODEL_NAME
value: "gpt-oss-20b" # Small model - or use "gpt-oss-120b" for large
- name: GPU_MEMORY_UTILIZATION
value: "0.9"
- name: MAX_MODEL_LEN
value: "8192"
- name: DTYPE
value: "auto"
- name: TENSOR_PARALLEL_SIZE
value: "1" # Increase for gpt-oss-120b with multiple GPUs
resources:
requests:
cpu: 4000m
memory: 16Gi
nvidia.com/gpu: 1 # Increase based on model size
limits:
cpu: 8000m
memory: 32Gi
nvidia.com/gpu: 1
nodeSelector:
nvidia.com/gpu: "true"
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
# For larger models, use multiple GPUs
# resources:
# nvidia.com/gpu: 4 # For 70B models
EOF
helm install inference oci://registry.kindo.ai/kindo-helm/inference \\
--version 2025.08.2 \\
--namespace inference \\
--create-namespace \\
--values inference-values.yaml \\
--username "$REGISTRY_USERNAME" \\
--password "$REGISTRY_PASSWORD"
# Wait for model to load (can take 5-10 minutes for large models)
kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=inference -n inference --timeout=600s
echo "ā Self-hosted inference service deployed"
echo " Internal URL: <http://inference.inference.svc.cluster.local:8000>"
echo " Configure this URL in LiteLLM as a model endpoint"
GPU Requirements by Model:
GPT-OSS-20B (Small): 1x GPU with 24GB+ VRAM (RTX 4090, A10, L4)
GPT-OSS-120B (Large): 4-8x GPUs with 40GB+ VRAM each (A100 40GB, H100)
5.10 Verify All Kindo Application Deployments
echo "=== Verifying Kindo Applications ==="
echo ""
# Check Kindo-packaged applications (from registry.kindo.ai)
echo "Core Applications:"
for ns in api next litellm llama-indexer; do
status=$(kubectl get pods -n $ns -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "Not Found")
echo " $ns: $status"
done
echo ""
echo "Worker Services:"
for ns in external-poller external-sync credits audit-log-exporter task-worker-ts; do
status=$(kubectl get pods -n $ns -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "Not Found")
echo " $ns: $status"
done
echo ""
echo "Authentication & Authorization:"
for ns in ssoready cerbos; do
status=$(kubectl get pods -n $ns -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "Not Found")
echo " $ns: $status"
done
echo ""
echo "GPU Services (if deployed):"
for ns in text-embeddings inference; do
status=$(kubectl get pods -n $ns -o jsonpath='{.items[0].status.phase}' 2>/dev/null || echo "Not Deployed")
echo " $ns: $status"
done
# Detailed view
echo ""
echo "=== All Kindo Pods ==="
kubectl get pods -A -l app.kubernetes.io/managed-by=kindo -o wide
# Expected output (all Running in their respective namespaces):
# Namespace: api
# NAME READY STATUS RESTARTS
# api-xxxxx 1/1 Running 0
#
# Namespace: next
# NAME READY STATUS RESTARTS
# next-xxxxx 1/1 Running 0
#
# ... and so on for each service
# Check all services
kubectl get svc -A -l app.kubernetes.io/managed-by=kindo
# Verify GPU allocation (if using self-hosted models)
kubectl get pods -n text-embeddings -o custom-columns=NAME:.metadata.name,GPU:.spec.containers[0].resources.limits.'nvidia\\.com/gpu' 2>/dev/null || true
kubectl get pods -n inference -o custom-columns=NAME:.metadata.name,GPU:.spec.containers[0].resources.limits.'nvidia\\.com/gpu' 2>/dev/null || true
Step 6: Configure DNS and Ingress
6.1 Create Ingress Resources
# Create API Ingress
cat <<EOF | kubectl apply -f -
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: api
namespace: api
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod # If using cert-manager
nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
ingressClassName: nginx # Or your ingress class
tls:
- hosts:
- api.kindo.company.com
secretName: api-tls
rules:
- host: api.kindo.company.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: api
port:
number: 3000
EOF
# Create Frontend Ingress
cat <<EOF | kubectl apply -f -
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: next
namespace: next
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
ingressClassName: nginx
tls:
- hosts:
- app.kindo.company.com
secretName: next-tls
rules:
- host: app.kindo.company.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: next
port:
number: 3000
EOF
# Create SSOready Ingress
cat <<EOF | kubectl apply -f -
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: ssoready
namespace: ssoready
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
ingressClassName: nginx
tls:
- hosts:
- sso.kindo.company.com
secretName: ssoready-tls
rules:
- host: sso.kindo.company.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: ssoready
port:
number: 8080
EOF
# Create Unleash Ingress
cat <<EOF | kubectl apply -f -
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: unleash
namespace: unleash
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
nginx.ingress.kubernetes.io/ssl-redirect: "true"
spec:
ingressClassName: nginx
tls:
- hosts:
- unleash.kindo.company.com
secretName: unleash-tls
rules:
- host: unleash.kindo.company.com
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: unleash
port:
number: 4242
EOF
6.2 Get Ingress Load Balancer IP/Hostname
# Get the LoadBalancer IP or hostname
kubectl get svc -n ingress-nginx
# For cloud providers (AWS/GCP/Azure)
INGRESS_HOSTNAME=$(kubectl get svc ingress-nginx-controller -n ingress-nginx -o jsonpath='{.status.loadBalancer.ingress[0].hostname}')
INGRESS_IP=$(kubectl get svc ingress-nginx-controller -n ingress-nginx -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
echo "Ingress endpoint: ${INGRESS_HOSTNAME:-$INGRESS_IP}"
6.3 Configure DNS Records
Create these DNS records in your DNS provider:
Type | Name | Value | TTL |
|---|---|---|---|
A or CNAME |
| LoadBalancer IP or hostname | 300 |
A or CNAME |
| LoadBalancer IP or hostname | 300 |
A or CNAME |
| LoadBalancer IP or hostname | 300 |
A or CNAME |
| LoadBalancer IP or hostname | 300 |
If using LoadBalancer hostname (AWS):
Type: CNAME
Value: abc123.us-west-2.elb.amazonaws.com
If using LoadBalancer IP:
Type: A
Value: 203.0.113.42
Note: All subdomains should point to the same ingress controller LoadBalancer. The ingress controller will route traffic based on the Host header.
6.4 Verify DNS Propagation
# Check DNS resolutionnslookup app.kindo.company.com
nslookup api.kindo.company.com
nslookup sso.kindo.company.com
nslookup unleash.kindo.company.com
# Test HTTPS accesscurl -I <https://api.kindo.company.com/health>
curl -I <https://app.kindo.company.com>
curl -I <https://sso.kindo.company.com/health>
curl -I <https://unleash.kindo.company.com>
Step 7: Post-Deployment Configuration
7.1 Configure SSOready
# Verify SSOready is accessible
echo "SSOready admin UI: <https://sso.kindo.company.com>"
curl -I <https://sso.kindo.company.com/health>
# First-time setup:
# The admin user is configured using SSOREADY_ADMIN_EMAIL from Step 5.6
# Admin password was set in the ssoready-config secret
# Get admin email (if needed)
kubectl get deployment ssoready -n ssoready -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="SSOREADY_ADMIN_EMAIL")].value}'
Configure SSO Providers:
Access SSOready admin: https://sso.kindo.company.com
Log in with admin credentials (from Step 5.6)
Add your identity providers:
Azure AD / Microsoft Entra ID
Okta
Google Workspace
OneLogin
Auth0
Generic SAML 2.0 provider
Generic OIDC provider
Configure SAML settings (for each SAML provider):
Upload IdP metadata XML or enter manually
Set Entity ID:
https://sso.kindo.company.comSet ACS URL:
https://sso.kindo.company.com/saml/acsSet Single Logout URL:
https://sso.kindo.company.com/saml/slo
Configure OIDC settings (for each OIDC provider):
Set Redirect URI:
https://sso.kindo.company.com/oauth/callbackConfigure Client ID and Client Secret from your IdP
Set up Kindo application redirect URIs:
Primary callback:
https://app.kindo.company.com/auth/sso/callbackLogout callback:
https://app.kindo.company.com/auth/logout
Test SSO login:
# Navigate to application echo "Test SSO: <https://app.kindo.company.com>" # Click "Sign in with SSO" and select your configured provider
Resources:
SSOready Documentation: https://ssoready.com/docs
SAML Configuration Guide: https://ssoready.com/docs/saml
OIDC Configuration Guide: https://ssoready.com/docs/oidc
7.2 Import Unleash Feature Flags
# Get Unleash admin credentials
echo "Unleash URL: <https://unleash.kindo.company.com>"
echo "Username: admin"
echo "Password: (from Step 3.2 - check your saved credentials)"
# Access Unleash UI to import feature flags
# Import feature flags using the Kindo-provided feature_flags.json
# This should be done via the Unleash UI or API
# To import via API:
curl -X POST <https://unleash.kindo.company.com/api/admin/features-batch/import> \\
-H "Authorization: Bearer $UNLEASH_ADMIN_TOKEN" \\
-H "Content-Type: application/json" \\
-d @feature_flags.json
7.3 Configure Base Models in Unleash
Important: You must configure model feature flags in Unleash before the application will function properly.
Step 1: Create Models via API
# First, create models using the admin API
# Example: Add Claude 3.5 Sonnet
curl -X POST <https://api.kindo.company.com/internal/openapi/admin/model/new> \\
-H 'Content-Type: application/json' \\
-H "Authorization: Bearer $UM_INTERNAL_API_KEY" \\
-d '{
"orgId": "default-org-id",
"userId": "admin-user-id",
"displayName": "Claude 3.5 Sonnet",
"modelProviderDisplayName": "Anthropic",
"type": "CHAT",
"contextWindow": 200000,
"metadata": {
"type": "Text Generation",
"costTier": "HIGH",
"usageTag": "Chat + Agents",
"description": "Most intelligent model for complex tasks",
"modelCreator": "Anthropic"
},
"litellmModelName": "claude-3-5-sonnet",
"litellmParams": {
"model": "anthropic/claude-3-5-sonnet-latest",
"api_key": "'"$ANTHROPIC_API_KEY"'"
}
}'
# Save the returned model ID for Unleash configuration
Step 2: Configure Unleash Feature Flags
Required feature flags (configure in Unleash UI):
Feature Flag | Description | Configure With |
|---|---|---|
| Default model for workflows | Model IDs from API |
| Chat model dropdown | Model IDs from API |
| Text embedding models | Embedding model IDs |
| General use models | Multiple model IDs |
See the Application Deployment Guide Model Management section for complete model configuration.
7.4 Configure Self-Hosted Models in LiteLLM (If Using GPU Infrastructure)
If you deployed self-hosted inference services, configure them in LiteLLM:
# Example 1: Add self-hosted embedding model (BGE)
curl -X POST <https://api.kindo.company.com/internal/openapi/admin/model/new> \\
-H 'Content-Type: application/json' \\
-H "Authorization: Bearer $UM_INTERNAL_API_KEY" \\
-d '{
"orgId": "<YOUR_ORG_ID>",
"userId": "<YOUR_USER_ID>",
"displayName": "BGE Large (Self-Hosted)",
"modelProviderDisplayName": "Internal",
"type": "EMBEDDING",
"contextWindow": 512,
"metadata": {
"type": "Embeddings",
"costTier": "FREE",
"usageTag": "Self-hosted embeddings",
"description": "BGE embedding model on GPU",
"modelCreator": "BAAI"
},
"litellmModelName": "bge-large-self-hosted",
"litellmParams": {
"model": "openai/bge-large",
"api_base": "<http://text-embeddings.text-embeddings.svc.cluster.local:8080>",
"api_key": "not-required"
}
}'
# Example 2: Add managed embedding model (OpenAI)
curl -X POST <https://api.kindo.company.com/internal/openapi/admin/model/new> \\
-H 'Content-Type: application/json' \\
-H "Authorization: Bearer $UM_INTERNAL_API_KEY" \\
-d '{
"orgId": "<YOUR_ORG_ID>",
"userId": "<YOUR_USER_ID>",
"displayName": "OpenAI text-embedding-3-large",
"modelProviderDisplayName": "OpenAI",
"type": "EMBEDDING",
"contextWindow": 8191,
"metadata": {
"type": "Embeddings",
"costTier": "LOW",
"usageTag": "OpenAI embeddings",
"description": "OpenAI embedding model - high quality",
"modelCreator": "OpenAI"
},
"litellmModelName": "openai-embedding-3-large",
"litellmParams": {
"model": "text-embedding-3-large",
"api_key": "'"$OPENAI_API_KEY"'"
}
}'
# Example 3: Add Amazon Titan embeddings (if using Bedrock)
curl -X POST <https://api.kindo.company.com/internal/openapi/admin/model/new> \\
-H 'Content-Type: application/json' \\
-H "Authorization: Bearer $UM_INTERNAL_API_KEY" \\
-d '{
"orgId": "<YOUR_ORG_ID>",
"userId": "<YOUR_USER_ID>",
"displayName": "Amazon Titan Embeddings v2",
"modelProviderDisplayName": "Amazon Bedrock",
"type": "EMBEDDING",
"contextWindow": 8192,
"metadata": {
"type": "Embeddings",
"costTier": "LOW",
"usageTag": "Bedrock embeddings",
"description": "Amazon Titan embedding model v2",
"modelCreator": "Amazon"
},
"litellmModelName": "bedrock-titan-embed-v2",
"litellmParams": {
"model": "amazon.titan-embed-text-v2:0",
"aws_access_key_id": "'"$AWS_ACCESS_KEY_ID"'",
"aws_secret_access_key": "'"$AWS_SECRET_ACCESS_KEY"'",
"aws_region_name": "us-west-2"
}
}'
# Example 4: Add self-hosted LLM (GPT-OSS-20B)
curl -X POST <https://api.kindo.company.com/internal/openapi/admin/model/new> \\
-H 'Content-Type: application/json' \\
-H "Authorization: Bearer $UM_INTERNAL_API_KEY" \\
-d '{
"orgId": "<YOUR_ORG_ID>",
"userId": "<YOUR_USER_ID>",
"displayName": "GPT-OSS-20B (Self-Hosted)",
"modelProviderDisplayName": "Internal",
"type": "CHAT",
"contextWindow": 8192,
"metadata": {
"type": "Text Generation",
"costTier": "FREE",
"usageTag": "Self-hosted chat",
"description": "Internal GPT-OSS model on GPU",
"modelCreator": "Internal"
},
"litellmModelName": "gpt-oss-20b-self-hosted",
"litellmParams": {
"model": "openai/gpt-oss-20b",
"api_base": "<http://inference.inference.svc.cluster.local:8000/v1>",
"api_key": "not-required"
}
}'
echo "ā Models registered in Kindo"
echo " - BGE embeddings (self-hosted on GPU)"
echo " - OpenAI embeddings (managed API)"
echo " - Titan embeddings (Bedrock, if configured)"
echo " - GPT-OSS-20B LLM (self-hosted on GPU)"
7.5 Create Initial Admin User
# Access the applicationecho "Application URL: <https://app.kindo.company.com>"# The first user to sign up via SSOready will be the admin# Configure your SSO provider in SSOready first, then sign in
Step 8: Verification and Testing
8.1 Health Check All Services
#!/bin/bash
echo "=== Kindo Deployment Health Check ==="
# Check pods across all Kindo namespaces
echo -e "\\nš¦ Pod Status:"
kubectl get pods -A -l app.kubernetes.io/managed-by=kindo -o wide
# Check services
echo -e "\\nš§ Services:"
kubectl get svc -A -l app.kubernetes.io/managed-by=kindo
# Check ingress
echo -e "\\nš Ingress:"
kubectl get ingress -A -l app.kubernetes.io/managed-by=kindo
# Test API health
echo -e "\\nš„ API Health:"
curl -s <https://api.kindo.company.com/health> | jq .
# Test Frontend
echo -e "\\nš„ļø Frontend:"
curl -I <https://app.kindo.company.com>
# Test LiteLLM
echo -e "\\nš¤ LiteLLM:"
curl -s <https://api.kindo.company.com/litellm/health> | jq .
# Test SSOready
echo -e "\\nš SSOready:"
kubectl exec -n kindo-apps deployment/ssoready -- wget -q -O- <http://localhost:8080/health> | jq . || echo "Check SSOready logs"
# Test vector database
echo -e "\\nš Vector Database:"
if [ ! -z "$PINECONE_API_KEY" ]; then
echo " Using Pinecone (managed service)"
elif [ ! -z "$QDRANT_URL" ]; then
echo " Using Qdrant"
curl -s $QDRANT_URL/collections/kindo-embeddings | jq .
fi
# Test database connectivity
echo -e "\\nš¾ Database Connections:"
kubectl exec -n api deployment/api -- \\
psql "$POSTGRES_MAIN_URL" -c "SELECT 1" && echo " ā Main database" || echo " ā Main database failed"
kubectl exec -n ssoready deployment/ssoready -- \\
psql "$POSTGRES_SSOREADY_URL" -c "SELECT 1" && echo " ā SSOready database" || echo " ā SSOready database failed"
# Check GPU utilization (if using self-hosted models)
echo -e "\\nš® GPU Services:"
if kubectl get namespace text-embeddings &>/dev/null; then
gpu_pod=$(kubectl get pods -n text-embeddings -o jsonpath='{.items[0].metadata.name}')
echo " Text Embeddings: $gpu_pod"
kubectl exec -n text-embeddings $gpu_pod -- nvidia-smi 2>/dev/null || echo " ā¹ nvidia-smi not available"
fi
if kubectl get namespace inference &>/dev/null; then
gpu_pod=$(kubectl get pods -n inference -o jsonpath='{.items[0].metadata.name}')
echo " Inference: $gpu_pod"
kubectl exec -n inference $gpu_pod -- nvidia-smi 2>/dev/null || echo " ā¹ nvidia-smi not available"
fi
# Check recent errors across all Kindo namespaces
echo -e "\\nā Recent Errors:"
kubectl get events -A -l app.kubernetes.io/managed-by=kindo --field-selector type=Warning --sort-by='.lastTimestamp' | tail -10
8.2 Verify Vector Database
If using Qdrant:
# Check Qdrant health
curl <http://qdrant.qdrant.svc.cluster.local:6333/healthz>
# List collections
curl <http://qdrant.qdrant.svc.cluster.local:6333/collections> | jq .
# Check collection info
curl <http://qdrant.qdrant.svc.cluster.local:6333/collections/kindo-embeddings> | jq .
If using Pinecone:
# Verify from application logs
kubectl logs -n llama-indexer deployment/llama-indexer --tail=50 | grep -i pinecone
8.3 Verify Feature Flags
# Check Unleash is accessible
curl -H "Authorization: $UNLEASH_ADMIN_TOKEN" \\ <https://unleash.kindo.company.com/api/admin/projects> | jq .
# List feature flags
curl -H "Authorization: $UNLEASH_ADMIN_TOKEN" \\ <https://unleash.kindo.company.com/api/admin/projects/default/features> | jq '.features[].name'
8.4 Test Complete User Flow
Access Application: Navigate to
https://app.kindo.company.comConfigure SSO: Set up your SSO provider in SSOready
Sign In: Test SSO authentication
Create Workspace: Create a new workspace/organization
Test AI Features: Try a chat interaction
Test File Upload: Upload a document
Verify Audit Logs: Check syslog server for audit events
8.5 Monitor Resource Usage
# Check resource usagekubectl top nodes
kubectl top pods -n kindo-apps
# Check persistent volumeskubectl get pv
kubectl get pvc -n kindo-apps