Skip to content

Instantly share code, notes, and snippets.

@nerdalert
Created October 10, 2025 15:54
Show Gist options
  • Save nerdalert/fd8e41572a2690913033944137c30e16 to your computer and use it in GitHub Desktop.
Save nerdalert/fd8e41572a2690913033944137c30e16 to your computer and use it in GitHub Desktop.
#!/bin/bash
# OpenShift MaaS Platform Deployment Script
# This script automates the complete deployment of the MaaS platform on OpenShift
set -e
# Helper function to wait for CRD to be established
wait_for_crd() {
local crd="$1"
local timeout="${2:-60}" # timeout in seconds
local interval=2
local elapsed=0
echo "⏳ Waiting for CRD ${crd} to appear (timeout: ${timeout}s)…"
while [ $elapsed -lt $timeout ]; do
if kubectl get crd "$crd" &>/dev/null; then
echo "✅ CRD ${crd} detected, waiting for it to become Established..."
kubectl wait --for=condition=Established --timeout="${timeout}s" "crd/$crd" 2>/dev/null
return 0
fi
sleep $interval
elapsed=$((elapsed + interval))
done
echo "❌ Timed out after ${timeout}s waiting for CRD $crd to appear." >&2
return 1
}
# Helper function to wait for CSV to reach Succeeded state
wait_for_csv() {
local csv_name="$1"
local namespace="${2:-kuadrant-system}"
local timeout="${3:-180}" # timeout in seconds
local interval=5
local elapsed=0
local last_status_print=0
echo "⏳ Waiting for CSV ${csv_name} to succeed (timeout: ${timeout}s)..."
while [ $elapsed -lt $timeout ]; do
local phase=$(kubectl get csv -n "$namespace" "$csv_name" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
case "$phase" in
"Succeeded")
echo "✅ CSV ${csv_name} succeeded"
return 0
;;
"Failed")
echo "❌ CSV ${csv_name} failed" >&2
kubectl get csv -n "$namespace" "$csv_name" -o jsonpath='{.status.message}' 2>/dev/null
return 1
;;
*)
if [ $((elapsed - last_status_print)) -ge 30 ]; then
echo " CSV ${csv_name} status: ${phase} (${elapsed}s elapsed)"
last_status_print=$elapsed
fi
;;
esac
sleep $interval
elapsed=$((elapsed + interval))
done
echo "❌ Timed out after ${timeout}s waiting for CSV ${csv_name}" >&2
return 1
}
# Helper function to wait for pods in a namespace to be ready
wait_for_pods() {
local namespace="$1"
local timeout="${2:-120}"
kubectl get namespace "$namespace" &>/dev/null || return 0
echo "⏳ Waiting for pods in $namespace to be ready..."
local end=$((SECONDS + timeout))
while [ $SECONDS -lt $end ]; do
local not_ready=$(kubectl get pods -n "$namespace" --no-headers 2>/dev/null | grep -v -E 'Running|Completed|Succeeded' | wc -l)
[ "$not_ready" -eq 0 ] && return 0
sleep 5
done
echo "⚠️ Timeout waiting for pods in $namespace" >&2
return 1
}
# version_compare <version1> <version2>
# Compares two version strings in semantic version format (e.g., "4.19.9")
# Returns 0 if version1 >= version2, 1 otherwise
version_compare() {
local version1="$1"
local version2="$2"
local v1=$(echo "$version1" | awk -F. '{printf "%d%03d%03d", $1, $2, $3}')
local v2=$(echo "$version2" | awk -F. '{printf "%d%03d%03d", $1, $2, $3}')
[ "$v1" -ge "$v2" ]
}
wait_for_validating_webhooks() {
local namespace="$1"
local timeout="${2:-60}"
local interval=2
local end=$((SECONDS+timeout))
echo "⏳ Waiting for validating webhooks in namespace $namespace (timeout: $timeout sec)..."
while [ $SECONDS -lt $end ]; do
local not_ready=0
local services
services=$(kubectl get validatingwebhookconfigurations \
-o jsonpath='{range .items[*].webhooks[*].clientConfig.service}{.namespace}/{.name}{"\n"}{end}' \
| grep "^$namespace/" | sort -u)
if [ -z "$services" ]; then
echo "⚠️ No validating webhooks found in namespace $namespace"
return 0
fi
for svc in $services; do
local ns name ready
ns=$(echo "$svc" | cut -d/ -f1)
name=$(echo "$svc" | cut -d/ -f2)
ready=$(kubectl get endpoints -n "$ns" "$name" -o jsonpath='{.subsets[*].addresses[*].ip}' 2>/dev/null || true)
if [ -z "$ready" ]; then
echo "🔴 Webhook service $ns/$name not ready"
not_ready=1
else
echo "✅ Webhook service $ns/$name has ready endpoints"
fi
done
if [ "$not_ready" -eq 0 ]; then
echo "🎉 All validating webhook services in $namespace are ready"
return 0
fi
sleep $interval
done
echo "❌ Timed out waiting for validating webhooks in $namespace"
return 1
}
echo "========================================="
echo "🚀 MaaS Platform OpenShift Deployment"
echo "========================================="
echo ""
# Check if running on OpenShift
if ! kubectl api-resources | grep -q "route.openshift.io"; then
echo "❌ This script is for OpenShift clusters only."
exit 1
fi
# Check prerequisites
echo "📋 Checking prerequisites..."
echo ""
echo "Required tools:"
echo " - oc: $(oc version --client --short 2>/dev/null | head -n1 || echo 'not found')"
echo " - jq: $(jq --version 2>/dev/null || echo 'not found')"
echo " - kustomize: $(kustomize version --short 2>/dev/null || echo 'not found')"
echo ""
echo "ℹ️ Note: OpenShift Service Mesh should be automatically installed when GatewayClass is created."
echo " If the Gateway gets stuck in 'Waiting for controller', you may need to manually"
echo " install the Red Hat OpenShift Service Mesh operator from OperatorHub."
echo ""
echo "1️⃣ Checking OpenShift version and Gateway API requirements..."
# Get OpenShift version
OCP_VERSION=$(oc get clusterversion version -o jsonpath='{.status.desired.version}' 2>/dev/null || echo "unknown")
echo " OpenShift version: $OCP_VERSION"
# Check if version is 4.19.9 or higher
if [[ "$OCP_VERSION" == "unknown" ]]; then
echo " ⚠️ Could not determine OpenShift version, applying feature gates to be safe"
oc patch featuregate/cluster --type='merge' \
-p '{"spec":{"featureSet":"CustomNoUpgrade","customNoUpgrade":{"enabled":["GatewayAPI","GatewayAPIController"]}}}' || true
echo " Waiting for feature gates to reconcile (30 seconds)..."
sleep 30
elif version_compare "$OCP_VERSION" "4.19.9"; then
echo " ✅ OpenShift $OCP_VERSION supports Gateway API via GatewayClass (no feature gates needed)"
else
echo " Applying Gateway API feature gates for OpenShift < 4.19.9"
oc patch featuregate/cluster --type='merge' \
-p '{"spec":{"featureSet":"CustomNoUpgrade","customNoUpgrade":{"enabled":["GatewayAPI","GatewayAPIController"]}}}' || true
echo " Waiting for feature gates to reconcile (30 seconds)..."
sleep 30
fi
echo ""
echo "2️⃣ Creating namespaces..."
echo " ℹ️ Note: If ODH/RHOAI is already installed, some namespaces may already exist"
for ns in opendatahub kserve kuadrant-system llm maas-api; do
kubectl create namespace $ns 2>/dev/null || echo " Namespace $ns already exists"
done
echo ""
echo "3️⃣ Installing dependencies..."
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Only clean up leftover CRDs if Kuadrant operators are NOT already installed
echo " Checking for existing Kuadrant installation..."
if ! kubectl get csv -n kuadrant-system kuadrant-operator.v1.3.0-rc2 &>/dev/null 2>&1; then
echo " No existing installation found, checking for leftover CRDs..."
LEFTOVER_CRDS=$(kubectl get crd 2>/dev/null | grep -E "kuadrant|authorino|limitador" | awk '{print $1}')
if [ -n "$LEFTOVER_CRDS" ]; then
echo " Found leftover CRDs, cleaning up before installation..."
echo "$LEFTOVER_CRDS" | xargs -r kubectl delete crd --timeout=30s 2>/dev/null || true
sleep 5 # Brief wait for cleanup to complete
fi
else
echo " ✅ Kuadrant operator already installed, skipping CRD cleanup"
fi
echo " Installing cert-manager..."
"$SCRIPT_DIR/install-dependencies.sh" --cert-manager
# Wait for cert-manager CRDs to be ready
echo " Waiting for cert-manager CRDs to be established..."
wait_for_crd "certificates.cert-manager.io" 120 || \
echo " ⚠️ Certificate CRD not yet available"
echo " Installing Kuadrant..."
"$SCRIPT_DIR/install-dependencies.sh" --kuadrant
echo ""
echo "4️⃣ Deploying Gateway infrastructure..."
CLUSTER_DOMAIN=$(kubectl get ingresses.config.openshift.io cluster -o jsonpath='{.spec.domain}')
if [ -z "$CLUSTER_DOMAIN" ]; then
echo "❌ Failed to retrieve cluster domain from OpenShift"
exit 1
fi
export CLUSTER_DOMAIN
echo " Cluster domain: $CLUSTER_DOMAIN"
echo " Deploying Gateway and GatewayClass..."
cd "$PROJECT_ROOT"
envsubst < deployment/base/networking/gateway-api.yaml | kubectl apply --server-side=true --force-conflicts -f -
# Wait for Gateway API CRDs if not already present
if ! kubectl get crd gateways.gateway.networking.k8s.io &>/dev/null 2>&1; then
echo " Waiting for Gateway API CRDs..."
wait_for_crd "gateways.gateway.networking.k8s.io" 120 || \
echo " ⚠️ Gateway API CRDs not yet available"
fi
echo ""
echo "5️⃣ Checking for OpenDataHub/RHOAI KServe..."
if kubectl get crd llminferenceservices.serving.kserve.io &>/dev/null 2>&1; then
echo " ✅ KServe CRDs already present (ODH/RHOAI detected)"
else
echo " ⚠️ KServe not detected. Deploying ODH KServe components..."
echo " Note: This may require multiple attempts as CRDs need to be established first."
# First attempt
echo " Attempting ODH KServe deployment (attempt 1/2)..."
if kustomize build "$PROJECT_ROOT/deployment/components/odh/kserve" | kubectl apply --server-side=true --force-conflicts -f - 2>/dev/null; then
echo " ✅ Initial deployment successful"
else
echo " ⚠️ First attempt failed (expected if CRDs not yet ready)"
fi
# Wait for CRDs and operator pods, then retry
echo " Waiting for KServe CRDs to be established..."
if wait_for_crd "llminferenceservices.serving.kserve.io" 120; then
wait_for_pods "opendatahub" 120 || true
wait_for_validating_webhooks opendatahub 90 || true
echo " Retrying deployment (attempt 2/2)..."
kustomize build "$PROJECT_ROOT/deployment/components/odh/kserve" | kubectl apply --server-side=true --force-conflicts -f - && \
echo " ✅ ODH KServe components deployed successfully" || \
echo " ⚠️ ODH KServe deployment failed. This may be expected if ODH operator manages these resources."
else
echo " ⚠️ CRDs did not become ready in time. Continuing anyway..."
echo " Run: kustomize build $PROJECT_ROOT/deployment/components/odh/kserve | kubectl apply --server-side=true --force-conflicts -f -"
fi
fi
echo ""
echo "6️⃣ Waiting for Kuadrant operators to be installed by OLM..."
# Wait for CSVs to reach Succeeded state (this ensures CRDs are created and deployments are ready)
wait_for_csv "kuadrant-operator.v1.3.0-rc2" "kuadrant-system" 300 || \
echo " ⚠️ Kuadrant operator CSV did not succeed, continuing anyway..."
wait_for_csv "authorino-operator.v0.22.0" "kuadrant-system" 60 || \
echo " ⚠️ Authorino operator CSV did not succeed"
wait_for_csv "limitador-operator.v0.16.0" "kuadrant-system" 60 || \
echo " ⚠️ Limitador operator CSV did not succeed"
wait_for_csv "dns-operator.v0.15.0" "kuadrant-system" 60 || \
echo " ⚠️ DNS operator CSV did not succeed"
# Verify CRDs are present
echo " Verifying Kuadrant CRDs are available..."
wait_for_crd "kuadrants.kuadrant.io" 30 || echo " ⚠️ kuadrants.kuadrant.io CRD not found"
wait_for_crd "authpolicies.kuadrant.io" 10 || echo " ⚠️ authpolicies.kuadrant.io CRD not found"
wait_for_crd "ratelimitpolicies.kuadrant.io" 10 || echo " ⚠️ ratelimitpolicies.kuadrant.io CRD not found"
wait_for_crd "tokenratelimitpolicies.kuadrant.io" 10 || echo " ⚠️ tokenratelimitpolicies.kuadrant.io CRD not found"
echo ""
echo "7️⃣ Deploying Kuadrant configuration (now that CRDs exist)..."
cd "$PROJECT_ROOT"
kubectl apply -f deployment/base/networking/kuadrant.yaml
echo ""
echo "8️⃣ Deploying MaaS API..."
cd "$PROJECT_ROOT"
kustomize build deployment/base/maas-api | envsubst | kubectl apply -f -
echo ""
echo "9️⃣ Applying OpenShift-specific configurations..."
# Patch Kuadrant for OpenShift Gateway Controller
echo " Patching Kuadrant operator..."
if ! kubectl -n kuadrant-system get deployment kuadrant-operator-controller-manager -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="ISTIO_GATEWAY_CONTROLLER_NAMES")]}' | grep -q "ISTIO_GATEWAY_CONTROLLER_NAMES"; then
kubectl get csv kuadrant-operator.v1.3.0-rc2 -n kuadrant-system -o json | \
jq '.spec.install.spec.deployments[0].spec.template.spec.containers[0].env |= map(if .name == "ISTIO_GATEWAY_CONTROLLER_NAMES" then . + {"value": "istio.io/gateway-controller,openshift.io/gateway-controller/v1"} else . end)' | \
kubectl apply -f -
echo " ✅ Kuadrant operator patched"
else
echo " ✅ Kuadrant operator already configured"
fi
echo ""
echo "🔟 Waiting for Gateway to be ready..."
echo " Note: This may take a few minutes if Service Mesh is being automatically installed..."
# Wait for Service Mesh CRDs to be established
if kubectl get crd istios.sailoperator.io &>/dev/null 2>&1; then
echo " ✅ Service Mesh operator already detected"
else
echo " Waiting for automatic Service Mesh installation..."
if wait_for_crd "istios.sailoperator.io" 300; then
echo " ✅ Service Mesh operator installed"
else
echo " ⚠️ Service Mesh CRD not detected within timeout"
echo " Gateway may take longer to become ready or require manual Service Mesh installation"
fi
fi
echo " Waiting for Gateway to become ready..."
kubectl wait --for=condition=Programmed gateway maas-default-gateway -n openshift-ingress --timeout=300s || \
echo " ⚠️ Gateway is taking longer than expected, continuing..."
echo ""
echo "1️⃣1️⃣ Applying Gateway Policies..."
cd "$PROJECT_ROOT"
kustomize build deployment/base/policies | kubectl apply --server-side=true --force-conflicts -f -
echo ""
echo "1️⃣2️⃣ Deploying OpenShift Routes..."
cd "$PROJECT_ROOT"
envsubst < deployment/overlays/openshift/openshift-routes.yaml | kubectl apply -f -
envsubst < deployment/overlays/openshift/gateway-route.yaml | kubectl apply -f -
echo ""
echo "1️⃣3️⃣ Patching AuthPolicy with correct audience..."
AUD="$(kubectl create token default --duration=10m 2>/dev/null | cut -d. -f2 | base64 -d 2>/dev/null | jq -r '.aud[0]' 2>/dev/null)"
if [ -n "$AUD" ] && [ "$AUD" != "null" ]; then
echo " Detected audience: $AUD"
kubectl patch authpolicy maas-api-auth-policy -n maas-api \
--type='json' \
-p "$(jq -nc --arg aud "$AUD" '[{
op:"replace",
path:"/spec/rules/authentication/openshift-identities/kubernetesTokenReview/audiences/0",
value:$aud
}]')" 2>/dev/null && echo " ✅ AuthPolicy patched" || echo " ⚠️ Failed to patch AuthPolicy (may need manual configuration)"
else
echo " ⚠️ Could not detect audience, skipping AuthPolicy patch"
echo " You may need to manually configure the audience later"
fi
echo ""
echo "1️⃣4️⃣ Updating Limitador image for metrics exposure..."
kubectl -n kuadrant-system patch limitador limitador --type merge \
-p '{"spec":{"image":"quay.io/kuadrant/limitador:1a28eac1b42c63658a291056a62b5d940596fd4c","version":""}}' 2>/dev/null && \
echo " ✅ Limitador image updated" || \
echo " ⚠️ Could not update Limitador image (may not be critical)"
# Verification
echo ""
echo "========================================="
echo "✅ Deployment Complete!"
echo "========================================="
echo ""
echo "📊 Status Check:"
echo ""
# Check component status
echo "Component Status:"
kubectl get pods -n maas-api --no-headers | grep Running | wc -l | xargs echo " MaaS API pods running:"
kubectl get pods -n kuadrant-system --no-headers | grep Running | wc -l | xargs echo " Kuadrant pods running:"
kubectl get pods -n opendatahub --no-headers | grep Running | wc -l | xargs echo " KServe pods running:"
echo ""
echo "Gateway Status:"
kubectl get gateway -n openshift-ingress maas-default-gateway -o jsonpath='{.status.conditions[?(@.type=="Accepted")].status}' | xargs echo " Accepted:"
kubectl get gateway -n openshift-ingress maas-default-gateway -o jsonpath='{.status.conditions[?(@.type=="Programmed")].status}' | xargs echo " Programmed:"
echo ""
echo "Policy Status:"
kubectl get authpolicy -n openshift-ingress gateway-auth-policy -o jsonpath='{.status.conditions[?(@.type=="Accepted")].status}' 2>/dev/null | xargs echo " AuthPolicy:"
kubectl get tokenratelimitpolicy -n openshift-ingress gateway-token-rate-limits -o jsonpath='{.status.conditions[?(@.type=="Accepted")].status}' 2>/dev/null | xargs echo " TokenRateLimitPolicy:"
echo ""
echo "========================================="
echo "📝 Next Steps:"
echo "========================================="
echo ""
echo "1. Deploy a sample model:"
echo " kustomize build docs/samples/models/simulator | kubectl apply -f -"
echo ""
echo "2. Get OpenShift router hostname:"
ROUTER_HOST=$(oc get route maas-api-route -n maas-api -o jsonpath='{.status.ingress[0].routerCanonicalHostname}' 2>/dev/null || echo "router-default.$CLUSTER_DOMAIN")
echo " Router: $ROUTER_HOST"
echo ""
echo "3. Get a token:"
echo " ℹ️ Note: If DNS is not configured for *.${CLUSTER_DOMAIN}, use the router hostname with a Host header:"
echo ""
echo " curl -sSk \\"
echo " -H \"Host: maas-api.$CLUSTER_DOMAIN\" \\"
echo " -H \"Authorization: Bearer \$(oc whoami -t)\" \\"
echo " -H \"Content-Type: application/json\" \\"
echo " -X POST \\"
echo " -d '{\"expiration\": \"10m\"}' \\"
echo " \"https://$ROUTER_HOST/v1/tokens\""
echo ""
echo "4. Access endpoints:"
echo " MaaS API: https://maas-api.$CLUSTER_DOMAIN (via router: https://$ROUTER_HOST with Host header)"
echo " Gateway: https://gateway.$CLUSTER_DOMAIN (via router: https://$ROUTER_HOST with Host header)"
echo ""
echo "For troubleshooting, check the deployment guide at deployment/README.md"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment