Prerequisites
- Kubernetes 1.28+
- Helm 3+
- kubectl configured
- Storage class for persistent volumes
Quick Start
Copy
# Add Helm repo
helm repo add aip https://charts.runtools.ai/aip
helm repo update
# Install
helm install aip-hub aip/aip-hub \
--namespace aip \
--create-namespace \
--set config.jwtSecret=your-secret-key
Configuration
values.yaml
Copy
# AIP Hub
hub:
replicas: 2
resources:
requests:
cpu: "500m"
memory: "1Gi"
limits:
cpu: "1"
memory: "2Gi"
# Database (external recommended for production)
postgresql:
enabled: false # Use external
external:
host: your-postgres-host
port: 5432
database: aip
username: aip
password: your-password
# Redis (optional but recommended)
redis:
enabled: true
resources:
requests:
cpu: "100m"
memory: "128Mi"
# Configuration
config:
jwtSecret: "your-jwt-secret-min-32-chars"
# Embeddings
openaiApiKey: "sk-..."
embeddingModel: "text-embedding-3-small"
# Auth (optional)
authProvider: "oidc"
oidcIssuer: "https://auth.yourcompany.com"
oidcClientId: "aip-hub"
oidcClientSecret: "..."
# RunTools connection
runtoolsApiKey: "rt_..."
# Ingress
ingress:
enabled: true
className: nginx
annotations:
cert-manager.io/cluster-issuer: letsencrypt-prod
hosts:
- host: aip.yourcompany.com
paths:
- path: /
pathType: Prefix
tls:
- hosts:
- aip.yourcompany.com
secretName: aip-hub-tls
# Storage
storage:
class: "standard"
size: "10Gi"
Install
Copy
helm install aip-hub aip/aip-hub \
--namespace aip \
--create-namespace \
-f values.yaml
Verify
Copy
# Check pods
kubectl get pods -n aip
# Check services
kubectl get svc -n aip
# Check ingress
kubectl get ingress -n aip
# API health
kubectl port-forward svc/aip-hub 3000:3000 -n aip
curl http://localhost:3000/health
Scaling
Copy
# Scale hub replicas
kubectl scale deployment aip-hub --replicas=4 -n aip
Autoscaling
Copy
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: aip-hub-hpa
namespace: aip
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: aip-hub
minReplicas: 2
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
Monitoring
Prometheus
Copy
# values.yaml
monitoring:
prometheus:
enabled: true
serviceMonitor:
enabled: true
aip_invocations_totalaip_sessions_activeaip_blocks_totalaip_embedding_latency_seconds
Backup
Copy
# Backup PostgreSQL (if using in-cluster)
kubectl exec -it aip-postgresql-0 -n aip -- \
pg_dump -U aip aip > backup.sql
# Backup with Velero
velero backup create aip-backup \
--include-namespaces aip
Upgrading
Copy
# Update repo
helm repo update
# Upgrade
helm upgrade aip-hub aip/aip-hub \
--namespace aip \
-f values.yaml
Troubleshooting
Pods not starting
Copy
# Check events
kubectl describe pod <pod-name> -n aip
# Check logs
kubectl logs <pod-name> -n aip
Database connection issues
Copy
# Test connection from pod
kubectl exec -it <pod-name> -n aip -- \
psql $DATABASE_URL -c "SELECT 1"
Storage issues
Copy
# Check PVCs
kubectl get pvc -n aip
# Check storage class
kubectl get sc