Last active
February 6, 2026 16:07
-
-
Save irizzant/4674e7eca3e981c74f2e6afe6f78410a to your computer and use it in GitHub Desktop.
Vcluster issue https://github.com/loft-sh/vcluster/issues/3560
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # Runbook: Simula il problema vcluster endpoint sync in locale con Kind | |
| # Questo script replica l'ambiente di produzione per testare il bug | |
| set -euo pipefail | |
| # Colori per output | |
| RED='\033[0;31m' | |
| GREEN='\033[0;32m' | |
| YELLOW='\033[1;33m' | |
| NC='\033[0m' # No Color | |
| # Configurazione | |
| CLUSTER_NAME="vcluster-test" | |
| NAMESPACE="test-env-development" | |
| VCLUSTER_NAME="development-vcluster" | |
| log_info() { | |
| echo -e "${GREEN}[INFO]${NC} $1" | |
| } | |
| log_warn() { | |
| echo -e "${YELLOW}[WARN]${NC} $1" | |
| } | |
| log_error() { | |
| echo -e "${RED}[ERROR]${NC} $1" | |
| } | |
| # Step 1: Crea cluster Kind con 3 worker nodes | |
| create_kind_cluster() { | |
| log_info "Creazione cluster Kind con 3 worker nodes..." | |
| cat <<EOF | kind create cluster --name ${CLUSTER_NAME} --config=- | |
| kind: Cluster | |
| apiVersion: kind.x-k8s.io/v1alpha4 | |
| networking: | |
| disableDefaultCNI: true # Disabiliteremo il CNI di default per installare Cilium | |
| podSubnet: "10.244.0.0/16" | |
| serviceSubnet: "10.96.0.0/12" | |
| nodes: | |
| - role: control-plane | |
| kubeadmConfigPatches: | |
| - | | |
| kind: InitConfiguration | |
| nodeRegistration: | |
| kubeletExtraArgs: | |
| node-labels: "ingress-ready=true" | |
| - role: worker | |
| labels: | |
| team: "test-project" | |
| eng.it/project: "test-project" | |
| eng.it/karpenter-role: "project-test-project-env" | |
| - role: worker | |
| labels: | |
| team: "test-project" | |
| eng.it/project: "test-project" | |
| eng.it/karpenter-role: "project-test-project-env" | |
| - role: worker | |
| labels: | |
| team: "test-project" | |
| eng.it/project: "test-project" | |
| eng.it/karpenter-role: "project-test-project-env" | |
| EOF | |
| log_info "Cluster Kind creato con successo" | |
| } | |
| # Step 2: Installa Cilium (configurazione adattata per Kind) | |
| install_cilium() { | |
| log_info "Installazione Cilium..." | |
| # Aggiungi repo Helm (ignora se già esiste) | |
| helm repo add cilium https://helm.cilium.io/ 2>/dev/null || true | |
| helm repo update | |
| # Installa Cilium con configurazione simile a EKS ma adattata per Kind | |
| # Nota: Non usiamo ENI mode su Kind, usiamo il tunnel mode | |
| helm install cilium cilium/cilium \ | |
| --namespace kube-system \ | |
| --version 1.18.5 \ | |
| --set ipam.mode=kubernetes \ | |
| --set hubble.relay.enabled=true \ | |
| --set hubble.ui.enabled=true \ | |
| --set kubeProxyReplacement=true \ | |
| --set k8sServiceHost=${CLUSTER_NAME}-control-plane \ | |
| --set k8sServicePort=6443 \ | |
| --set socketLB.hostNamespaceOnly=true \ | |
| --set localRedirectPolicy=true \ | |
| --set loadBalancer.serviceTopology=true \ | |
| --wait | |
| log_info "Cilium installato con successo" | |
| # Attendi che Cilium sia pronto | |
| log_info "Attesa ready Cilium..." | |
| kubectl wait --for=condition=ready pod -l k8s-app=cilium -n kube-system --timeout=300s | |
| } | |
| # Step 3: Crea namespace con taint (simula il NodePool di Karpenter) | |
| prepare_environment() { | |
| log_info "Preparazione ambiente..." | |
| # Crea namespace con label per nodeSelector | |
| kubectl create namespace ${NAMESPACE} | |
| kubectl label namespace ${NAMESPACE} team=test-project | |
| # Applica taint ai worker nodes (simula il comportamento Karpenter) | |
| log_info "Applicazione taints ai worker nodes..." | |
| for node in $(kubectl get nodes -l role=worker -o jsonpath='{.items[*].metadata.name}'); do | |
| kubectl taint nodes ${node} eng.it/karpenter-role=project-test-project-env:NoSchedule --overwrite | |
| done | |
| log_info "Ambiente preparato" | |
| } | |
| # Step 4: Installa vcluster con configurazione identica a produzione | |
| install_vcluster() { | |
| log_info "Installazione vcluster con configurazione produzione..." | |
| # Crea file values | |
| cat > /tmp/vcluster-values.yaml <<EOF | |
| # Configurazione semplificata per Kind - senza hostname esterno | |
| exportKubeConfig: | |
| server: "https://localhost:8443" | |
| controlPlane: | |
| distro: | |
| k8s: | |
| enabled: true | |
| version: "v1.32.2" | |
| coredns: | |
| deployment: | |
| replicas: 2 | |
| backingStore: | |
| etcd: | |
| deploy: | |
| enabled: true | |
| statefulSet: | |
| highAvailability: | |
| replicas: 3 | |
| extraArgs: | |
| - "--auto-compaction-retention=30m" | |
| - "--auto-compaction-mode=periodic" | |
| - "--quota-backend-bytes=8589934592" | |
| scheduling: | |
| tolerations: | |
| - effect: "NoSchedule" | |
| key: "eng.it/karpenter-role" | |
| operator: "Equal" | |
| value: "project-test-project-env" | |
| nodeSelector: | |
| team: "test-project" | |
| "eng.it/project": "test-project" | |
| "eng.it/karpenter-role": "project-test-project-env" | |
| affinity: | |
| podAffinity: | |
| preferredDuringSchedulingIgnoredDuringExecution: | |
| - weight: 100 | |
| podAffinityTerm: | |
| labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster-etcd"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["${VCLUSTER_NAME}"] | |
| topologyKey: "topology.kubernetes.io/zone" | |
| podAntiAffinity: | |
| requiredDuringSchedulingIgnoredDuringExecution: | |
| - labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster-etcd"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["${VCLUSTER_NAME}"] | |
| topologyKey: "kubernetes.io/hostname" | |
| statefulSet: | |
| highAvailability: | |
| replicas: 3 | |
| scheduling: | |
| tolerations: | |
| - effect: "NoSchedule" | |
| key: "eng.it/karpenter-role" | |
| operator: "Equal" | |
| value: "project-test-project-env" | |
| nodeSelector: | |
| team: "test-project" | |
| "eng.it/project": "test-project" | |
| "eng.it/karpenter-role": "project-test-project-env" | |
| affinity: | |
| podAffinity: | |
| preferredDuringSchedulingIgnoredDuringExecution: | |
| - weight: 100 | |
| podAffinityTerm: | |
| labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["${VCLUSTER_NAME}"] | |
| topologyKey: "topology.kubernetes.io/zone" | |
| podAntiAffinity: | |
| requiredDuringSchedulingIgnoredDuringExecution: | |
| - labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["${VCLUSTER_NAME}"] | |
| topologyKey: "kubernetes.io/hostname" | |
| sync: | |
| toHost: | |
| podDisruptionBudgets: | |
| enabled: true | |
| pods: | |
| enabled: true | |
| enforceTolerations: | |
| - "eng.it/karpenter-role=project-test-project-env:NoSchedule" | |
| serviceAccounts: | |
| enabled: true | |
| ingresses: | |
| enabled: true | |
| fromHost: | |
| nodes: | |
| enabled: true | |
| selector: | |
| labels: | |
| team: "test-project" | |
| "eng.it/project": "test-project" | |
| "eng.it/karpenter-role": "project-test-project-env" | |
| integrations: | |
| metricsServer: | |
| enabled: true | |
| nodes: true | |
| pods: true | |
| EOF | |
| # Aggiungi repo vcluster | |
| helm repo add loft-sh https://charts.loft.sh | |
| helm repo update | |
| # Installa vcluster | |
| helm install ${VCLUSTER_NAME} loft-sh/vcluster \ | |
| --namespace ${NAMESPACE} \ | |
| --version 0.31.0 \ | |
| --values /tmp/vcluster-values.yaml \ | |
| --wait \ | |
| --timeout 600s | |
| log_info "vcluster installato con successo" | |
| # Attendi che tutti i pod siano ready | |
| log_info "Attesa ready vcluster..." | |
| kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s | |
| kubectl wait --for=condition=ready pod -l app=vcluster-etcd -n ${NAMESPACE} --timeout=300s | |
| } | |
| # Step 5: Crea workload di test | |
| create_test_workload() { | |
| log_info "Creazione workload di test..." | |
| # Ottieni kubeconfig del vcluster | |
| kubectl get secret vc-${VCLUSTER_NAME} -n ${NAMESPACE} -o jsonpath='{.data.config}' | base64 -d > /tmp/vcluster-kubeconfig.yaml | |
| # Verifica che il secret esista | |
| if [ ! -s /tmp/vcluster-kubeconfig.yaml ]; then | |
| log_error "Impossibile ottenere kubeconfig dal secret vc-${VCLUSTER_NAME}" | |
| exit 1 | |
| fi | |
| log_info "Kubeconfig salvata in /tmp/vcluster-kubeconfig.yaml" | |
| # Modifica il kubeconfig per usare port-forward invece dell'hostname | |
| # Sostituisci il server con localhost:8443 per il port-forward | |
| sed -i "s|server: https://.*|server: https://localhost:8443|" /tmp/vcluster-kubeconfig.yaml | |
| # Avvia port-forward in background | |
| log_info "Avvio port-forward verso il vcluster..." | |
| kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 & | |
| PORT_FORWARD_PID=$! | |
| # Attendi che il port-forward sia pronto | |
| log_info "Attesa port-forward (10s)..." | |
| sleep 10 | |
| # Esporta il kubeconfig modificato | |
| export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml | |
| # Crea namespace test-app | |
| log_info "Creazione namespace test-app..." | |
| kubectl create namespace test-app 2>/dev/null || log_info "Namespace test-app esiste già" | |
| # Crea deployment nginx | |
| log_info "Creazione deployment nginx..." | |
| kubectl apply -f - <<'DEPLOYMENT' | |
| apiVersion: apps/v1 | |
| kind: Deployment | |
| metadata: | |
| name: nginx-test | |
| namespace: test-app | |
| spec: | |
| replicas: 2 | |
| selector: | |
| matchLabels: | |
| app: nginx | |
| template: | |
| metadata: | |
| labels: | |
| app: nginx | |
| spec: | |
| containers: | |
| - name: nginx | |
| image: nginx:alpine | |
| ports: | |
| - containerPort: 80 | |
| readinessProbe: | |
| httpGet: | |
| path: / | |
| port: 80 | |
| initialDelaySeconds: 5 | |
| periodSeconds: 5 | |
| DEPLOYMENT | |
| # Crea service nginx | |
| log_info "Creazione service nginx..." | |
| kubectl apply -f - <<'SERVICE' | |
| apiVersion: v1 | |
| kind: Service | |
| metadata: | |
| name: nginx | |
| namespace: test-app | |
| spec: | |
| selector: | |
| app: nginx | |
| ports: | |
| - port: 80 | |
| targetPort: 80 | |
| type: ClusterIP | |
| SERVICE | |
| log_info "Workload di test creato" | |
| # Attendi che i pod siano ready | |
| log_info "Attesa ready pod (30s)..." | |
| sleep 30 | |
| # Verifica stato | |
| log_info "Verifica stato workload..." | |
| kubectl get pods -n test-app | |
| log_info "Stato endpoint:" | |
| kubectl get endpoints -n test-app | |
| # Salva il PID del port-forward per pulizia successiva | |
| echo ${PORT_FORWARD_PID} > /tmp/vcluster-port-forward.pid | |
| } | |
| # Step 6: Simula il problema (elimina syncer + etcd + restart workload test) | |
| simulate_problem() { | |
| log_warn "SIMULAZIONE: Eliminazione syncer + etcd + restart workload test..." | |
| ORIGINAL_KUBECONFIG=${KUBECONFIG:-} | |
| unset KUBECONFIG | |
| SYNCER_POD=$(kubectl get pods -n ${NAMESPACE} -l app=vcluster -o jsonpath='{.items[0].metadata.name}') | |
| ETCD_POD=$(kubectl get pods -n ${NAMESPACE} -l app=vcluster-etcd -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") | |
| if [ -f /tmp/vcluster-port-forward.pid ]; then | |
| OLD_PID=$(cat /tmp/vcluster-port-forward.pid) | |
| kill ${OLD_PID} 2>/dev/null || true | |
| sleep 2 | |
| fi | |
| # STEP 1: Elimina syncer + etcd | |
| if [ -n "$ETCD_POD" ]; then | |
| log_info "Modalità DEPLOYED etcd rilevata" | |
| log_info "Eliminazione pod: ${SYNCER_POD} e ${ETCD_POD}" | |
| kubectl delete pod ${SYNCER_POD} -n ${NAMESPACE} --grace-period=0 --force & | |
| kubectl delete pod ${ETCD_POD} -n ${NAMESPACE} --grace-period=0 --force & | |
| wait | |
| else | |
| log_info "Modalità EMBEDDED etcd rilevata" | |
| log_info "Eliminazione pod: ${SYNCER_POD}" | |
| kubectl delete pod ${SYNCER_POD} -n ${NAMESPACE} --grace-period=0 --force | |
| fi | |
| # STEP 2: Riavvia port-forward PRIMA di eliminare i pod nginx | |
| log_info "Riavvio port-forward..." | |
| kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 & | |
| PORT_FORWARD_PID=$! | |
| echo ${PORT_FORWARD_PID} > /tmp/vcluster-port-forward.pid | |
| sleep 10 | |
| # STEP 3: IMMEDIATAMENTE riavvia pod test | |
| log_info "Riavvio workload test..." | |
| export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml | |
| kubectl delete pod -l app=nginx -n test-app --grace-period=0 --force | |
| unset KUBECONFIG | |
| # STEP 4: Attendi ripresa vcluster | |
| log_info "Attesa ripresa syncer + etcd..." | |
| if [ -n "$ETCD_POD" ]; then | |
| kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s | |
| kubectl wait --for=condition=ready pod -l app=vcluster-etcd -n ${NAMESPACE} --timeout=300s | |
| else | |
| kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s | |
| fi | |
| # STEP 5: Attesa stabilizzazione prima verifica | |
| sleep 15 | |
| verify_state | |
| } | |
| # Step 7: Verifica lo stato e rileva il problema | |
| verify_state() { | |
| log_info "Verifica stato post-simulazione..." | |
| # Verifica che il kubeconfig del vcluster esista | |
| if [ ! -f /tmp/vcluster-kubeconfig.yaml ]; then | |
| log_error "Kubeconfig del vcluster non trovato" | |
| return 1 | |
| fi | |
| # Usa il kubeconfig con port-forward | |
| export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml | |
| # Verifica che il port-forward sia ancora attivo | |
| if [ -f /tmp/vcluster-port-forward.pid ]; then | |
| PORT_FORWARD_PID=$(cat /tmp/vcluster-port-forward.pid) | |
| if ! kill -0 ${PORT_FORWARD_PID} 2>/dev/null; then | |
| log_warn "Port-forward non attivo, riavvio..." | |
| kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 & | |
| NEW_PID=$! | |
| echo ${NEW_PID} > /tmp/vcluster-port-forward.pid | |
| sleep 10 | |
| fi | |
| fi | |
| # Test di connettività al vcluster | |
| log_info "Test connettività al vcluster..." | |
| if ! kubectl get nodes &>/dev/null; then | |
| log_error "Impossibile connettersi al vcluster" | |
| return 1 | |
| fi | |
| # Controlla se ci sono pod con container ready ma Ready condition mancante | |
| log_info "Controllo pod readiness..." | |
| kubectl get pods -n test-app -o json 2>/dev/null | jq -r ' | |
| .items[] | | |
| select(.status.containerStatuses[0].ready == true) | | |
| select(.status.conditions | map(select(.type == "Ready")) | length == 0) | | |
| "PROBLEMA RILEVATO: Pod " + .metadata.name + " ha container ready ma Ready condition mancante!" | |
| ' 2>/dev/null || true | |
| # Controlla endpoints | |
| log_info "Controllo endpoints..." | |
| kubectl get endpoints -n test-app -o json 2>/dev/null | jq -r ' | |
| .items[] | | |
| select(.subsets[0].notReadyAddresses) | | |
| "PROBLEMA RILEVATO: Endpoint " + .metadata.name + " ha indirizzi in notReadyAddresses: " + (.subsets[0].notReadyAddresses | map(.ip) | join(", ")) | |
| ' 2>/dev/null || true | |
| # Stato dettagliato | |
| log_info "Stato endpoint nginx:" | |
| kubectl get endpoints nginx -n test-app -o yaml 2>/dev/null || log_warn "Impossibile ottenere endpoint" | |
| log_info "Stato pod nginx:" | |
| kubectl get pods -n test-app -o wide 2>/dev/null || log_warn "Impossibile ottenere pod" | |
| log_info "Condizioni pod nginx:" | |
| kubectl get pods -n test-app -o json 2>/dev/null | jq '.items[].status.conditions' || log_warn "Impossibile ottenere condizioni" | |
| # Test connettività | |
| log_info "Test connettività servizio..." | |
| kubectl run test-client --image=curlimages/curl:latest --rm -i --restart=Never -n test-app -- curl -s -m 5 http://nginx 2>&1 || log_error "Test connettività FALLITO!" | |
| } | |
| # Step 8: Pulizia | |
| cleanup() { | |
| log_warn "Pulizia ambiente..." | |
| # Ferma il port-forward se attivo | |
| if [ -f /tmp/vcluster-port-forward.pid ]; then | |
| PORT_FORWARD_PID=$(cat /tmp/vcluster-port-forward.pid) | |
| if kill -0 ${PORT_FORWARD_PID} 2>/dev/null; then | |
| log_info "Arresto port-forward..." | |
| kill ${PORT_FORWARD_PID} 2>/dev/null || true | |
| fi | |
| rm -f /tmp/vcluster-port-forward.pid | |
| fi | |
| kind delete cluster --name ${CLUSTER_NAME} | |
| rm -f /tmp/vcluster-values.yaml /tmp/vcluster-kubeconfig.yaml | |
| log_info "Pulizia completata" | |
| } | |
| # Menu principale | |
| main() { | |
| echo "================================" | |
| echo "vcluster Endpoint Sync Test Tool" | |
| echo "================================" | |
| echo "" | |
| echo "Questo script simula il problema di endpoint sync in locale" | |
| echo "" | |
| case "${1:-}" in | |
| setup) | |
| create_kind_cluster | |
| install_cilium | |
| prepare_environment | |
| install_vcluster | |
| create_test_workload | |
| log_info "Setup completato! Usa './runbook.sh test' per simulare il problema" | |
| ;; | |
| test) | |
| simulate_problem | |
| verify_state | |
| ;; | |
| cleanup) | |
| cleanup | |
| ;; | |
| full) | |
| create_kind_cluster | |
| install_cilium | |
| prepare_environment | |
| install_vcluster | |
| create_test_workload | |
| simulate_problem | |
| verify_state | |
| log_warn "Test completato. Usa './runbook.sh cleanup' per eliminare il cluster" | |
| ;; | |
| *) | |
| echo "Uso: $0 {setup|test|cleanup|full}" | |
| echo "" | |
| echo "Comandi:" | |
| echo " setup - Crea cluster Kind e installa vcluster" | |
| echo " test - Simula il problema (richiede setup)" | |
| echo " cleanup- Elimina cluster Kind" | |
| echo " full - Esegue setup + test" | |
| echo "" | |
| echo "Esempio:" | |
| echo " $0 full" | |
| exit 1 | |
| ;; | |
| esac | |
| } | |
| main "$@" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # Runbook: Simula il problema vcluster endpoint sync in locale con Kind | |
| # Questo script replica l'ambiente di produzione per testare il bug | |
| set -euo pipefail | |
| # Colori per output | |
| RED='\033[0;31m' | |
| GREEN='\033[0;32m' | |
| YELLOW='\033[1;33m' | |
| NC='\033[0m' # No Color | |
| # Configurazione | |
| CLUSTER_NAME="vcluster-test" | |
| NAMESPACE="test-env-development" | |
| VCLUSTER_NAME="development-vcluster" | |
| log_info() { | |
| echo -e "${GREEN}[INFO]${NC} $1" | |
| } | |
| log_warn() { | |
| echo -e "${YELLOW}[WARN]${NC} $1" | |
| } | |
| log_error() { | |
| echo -e "${RED}[ERROR]${NC} $1" | |
| } | |
| # Step 1: Crea cluster Kind con 3 worker nodes | |
| create_kind_cluster() { | |
| log_info "Creazione cluster Kind con 3 worker nodes..." | |
| cat <<EOF | kind create cluster --name ${CLUSTER_NAME} --config=- | |
| kind: Cluster | |
| apiVersion: kind.x-k8s.io/v1alpha4 | |
| networking: | |
| disableDefaultCNI: true # Disabiliteremo il CNI di default per installare Cilium | |
| podSubnet: "10.244.0.0/16" | |
| serviceSubnet: "10.96.0.0/12" | |
| nodes: | |
| - role: control-plane | |
| kubeadmConfigPatches: | |
| - | | |
| kind: InitConfiguration | |
| nodeRegistration: | |
| kubeletExtraArgs: | |
| node-labels: "ingress-ready=true" | |
| - role: worker | |
| labels: | |
| team: "test-project" | |
| eng.it/project: "test-project" | |
| eng.it/karpenter-role: "project-test-project-env" | |
| - role: worker | |
| labels: | |
| team: "test-project" | |
| eng.it/project: "test-project" | |
| eng.it/karpenter-role: "project-test-project-env" | |
| - role: worker | |
| labels: | |
| team: "test-project" | |
| eng.it/project: "test-project" | |
| eng.it/karpenter-role: "project-test-project-env" | |
| EOF | |
| log_info "Cluster Kind creato con successo" | |
| } | |
| # Step 2: Installa Cilium (configurazione adattata per Kind) | |
| install_cilium() { | |
| log_info "Installazione Cilium..." | |
| # Aggiungi repo Helm (ignora se già esiste) | |
| helm repo add cilium https://helm.cilium.io/ 2>/dev/null || true | |
| helm repo update | |
| # Installa Cilium con configurazione simile a EKS ma adattata per Kind | |
| # Nota: Non usiamo ENI mode su Kind, usiamo il tunnel mode | |
| helm install cilium cilium/cilium \ | |
| --namespace kube-system \ | |
| --version 1.18.5 \ | |
| --set ipam.mode=kubernetes \ | |
| --set hubble.relay.enabled=true \ | |
| --set hubble.ui.enabled=true \ | |
| --set kubeProxyReplacement=true \ | |
| --set k8sServiceHost=${CLUSTER_NAME}-control-plane \ | |
| --set k8sServicePort=6443 \ | |
| --set socketLB.hostNamespaceOnly=true \ | |
| --set localRedirectPolicy=true \ | |
| --set loadBalancer.serviceTopology=true \ | |
| --wait | |
| log_info "Cilium installato con successo" | |
| # Attendi che Cilium sia pronto | |
| log_info "Attesa ready Cilium..." | |
| kubectl wait --for=condition=ready pod -l k8s-app=cilium -n kube-system --timeout=300s | |
| } | |
| # Step 3: Crea namespace con taint (simula il NodePool di Karpenter) | |
| prepare_environment() { | |
| log_info "Preparazione ambiente..." | |
| # Crea namespace con label per nodeSelector | |
| kubectl create namespace ${NAMESPACE} | |
| kubectl label namespace ${NAMESPACE} team=test-project | |
| # Applica taint ai worker nodes (simula il comportamento Karpenter) | |
| log_info "Applicazione taints ai worker nodes..." | |
| for node in $(kubectl get nodes -l role=worker -o jsonpath='{.items[*].metadata.name}'); do | |
| kubectl taint nodes ${node} eng.it/karpenter-role=project-test-project-env:NoSchedule --overwrite | |
| done | |
| log_info "Ambiente preparato" | |
| } | |
| # Step 4: Installa vcluster con configurazione identica a produzione | |
| install_vcluster() { | |
| log_info "Installazione vcluster con configurazione produzione..." | |
| # Crea file values | |
| cat > /tmp/vcluster-values.yaml <<EOF | |
| # Configurazione semplificata per Kind - senza hostname esterno | |
| exportKubeConfig: | |
| server: "https://localhost:8443" | |
| controlPlane: | |
| distro: | |
| k8s: | |
| enabled: true | |
| version: "v1.32.2" | |
| coredns: | |
| deployment: | |
| replicas: 2 | |
| backingStore: | |
| etcd: | |
| deploy: | |
| enabled: true | |
| statefulSet: | |
| highAvailability: | |
| replicas: 3 | |
| extraArgs: | |
| - "--auto-compaction-retention=30m" | |
| - "--auto-compaction-mode=periodic" | |
| - "--quota-backend-bytes=8589934592" | |
| scheduling: | |
| tolerations: | |
| - effect: "NoSchedule" | |
| key: "eng.it/karpenter-role" | |
| operator: "Equal" | |
| value: "project-test-project-env" | |
| nodeSelector: | |
| team: "test-project" | |
| "eng.it/project": "test-project" | |
| "eng.it/karpenter-role": "project-test-project-env" | |
| affinity: | |
| podAffinity: | |
| preferredDuringSchedulingIgnoredDuringExecution: | |
| - weight: 100 | |
| podAffinityTerm: | |
| labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster-etcd"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["${VCLUSTER_NAME}"] | |
| topologyKey: "topology.kubernetes.io/zone" | |
| podAntiAffinity: | |
| requiredDuringSchedulingIgnoredDuringExecution: | |
| - labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster-etcd"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["${VCLUSTER_NAME}"] | |
| topologyKey: "kubernetes.io/hostname" | |
| statefulSet: | |
| highAvailability: | |
| replicas: 3 | |
| scheduling: | |
| tolerations: | |
| - effect: "NoSchedule" | |
| key: "eng.it/karpenter-role" | |
| operator: "Equal" | |
| value: "project-test-project-env" | |
| nodeSelector: | |
| team: "test-project" | |
| "eng.it/project": "test-project" | |
| "eng.it/karpenter-role": "project-test-project-env" | |
| affinity: | |
| podAffinity: | |
| preferredDuringSchedulingIgnoredDuringExecution: | |
| - weight: 100 | |
| podAffinityTerm: | |
| labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["${VCLUSTER_NAME}"] | |
| topologyKey: "topology.kubernetes.io/zone" | |
| podAntiAffinity: | |
| requiredDuringSchedulingIgnoredDuringExecution: | |
| - labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["${VCLUSTER_NAME}"] | |
| topologyKey: "kubernetes.io/hostname" | |
| sync: | |
| toHost: | |
| podDisruptionBudgets: | |
| enabled: true | |
| pods: | |
| enabled: true | |
| enforceTolerations: | |
| - "eng.it/karpenter-role=project-test-project-env:NoSchedule" | |
| serviceAccounts: | |
| enabled: true | |
| ingresses: | |
| enabled: true | |
| fromHost: | |
| nodes: | |
| enabled: true | |
| selector: | |
| labels: | |
| team: "test-project" | |
| "eng.it/project": "test-project" | |
| "eng.it/karpenter-role": "project-test-project-env" | |
| integrations: | |
| metricsServer: | |
| enabled: true | |
| nodes: true | |
| pods: true | |
| EOF | |
| # Aggiungi repo vcluster | |
| helm repo add loft-sh https://charts.loft.sh | |
| helm repo update | |
| # Installa vcluster | |
| helm install ${VCLUSTER_NAME} loft-sh/vcluster \ | |
| --namespace ${NAMESPACE} \ | |
| --version 0.31.0 \ | |
| --values /tmp/vcluster-values.yaml \ | |
| --wait \ | |
| --timeout 600s | |
| log_info "vcluster installato con successo" | |
| # Attendi che tutti i pod siano ready | |
| log_info "Attesa ready vcluster..." | |
| kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s | |
| kubectl wait --for=condition=ready pod -l app=vcluster-etcd -n ${NAMESPACE} --timeout=300s | |
| } | |
| # Step 5: Crea workload di test | |
| create_test_workload() { | |
| log_info "Creazione workload di test..." | |
| # Ottieni kubeconfig del vcluster | |
| kubectl get secret vc-${VCLUSTER_NAME} -n ${NAMESPACE} -o jsonpath='{.data.config}' | base64 -d > /tmp/vcluster-kubeconfig.yaml | |
| # Verifica che il secret esista | |
| if [ ! -s /tmp/vcluster-kubeconfig.yaml ]; then | |
| log_error "Impossibile ottenere kubeconfig dal secret vc-${VCLUSTER_NAME}" | |
| exit 1 | |
| fi | |
| log_info "Kubeconfig salvata in /tmp/vcluster-kubeconfig.yaml" | |
| # Modifica il kubeconfig per usare port-forward invece dell'hostname | |
| # Sostituisci il server con localhost:8443 per il port-forward | |
| sed -i "s|server: https://.*|server: https://localhost:8443|" /tmp/vcluster-kubeconfig.yaml | |
| # Avvia port-forward in background | |
| log_info "Avvio port-forward verso il vcluster..." | |
| kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 & | |
| PORT_FORWARD_PID=$! | |
| # Attendi che il port-forward sia pronto | |
| log_info "Attesa port-forward (10s)..." | |
| sleep 10 | |
| # Esporta il kubeconfig modificato | |
| export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml | |
| # Crea namespace test-app | |
| log_info "Creazione namespace test-app..." | |
| kubectl create namespace test-app 2>/dev/null || log_info "Namespace test-app esiste già" | |
| # Crea deployment nginx | |
| log_info "Creazione deployment nginx..." | |
| kubectl apply -f - <<'DEPLOYMENT' | |
| apiVersion: apps/v1 | |
| kind: Deployment | |
| metadata: | |
| name: nginx-test | |
| namespace: test-app | |
| spec: | |
| replicas: 2 | |
| selector: | |
| matchLabels: | |
| app: nginx | |
| template: | |
| metadata: | |
| labels: | |
| app: nginx | |
| spec: | |
| containers: | |
| - name: nginx | |
| image: nginx:alpine | |
| ports: | |
| - containerPort: 80 | |
| readinessProbe: | |
| httpGet: | |
| path: / | |
| port: 80 | |
| initialDelaySeconds: 5 | |
| periodSeconds: 5 | |
| DEPLOYMENT | |
| # Crea service nginx | |
| log_info "Creazione service nginx..." | |
| kubectl apply -f - <<'SERVICE' | |
| apiVersion: v1 | |
| kind: Service | |
| metadata: | |
| name: nginx | |
| namespace: test-app | |
| spec: | |
| selector: | |
| app: nginx | |
| ports: | |
| - port: 80 | |
| targetPort: 80 | |
| type: ClusterIP | |
| SERVICE | |
| log_info "Workload di test creato" | |
| # Attendi che i pod siano ready | |
| log_info "Attesa ready pod (30s)..." | |
| sleep 30 | |
| # Verifica stato | |
| log_info "Verifica stato workload..." | |
| kubectl get pods -n test-app | |
| log_info "Stato endpoint:" | |
| kubectl get endpoints -n test-app | |
| # Salva il PID del port-forward per pulizia successiva | |
| echo ${PORT_FORWARD_PID} > /tmp/vcluster-port-forward.pid | |
| } | |
| # Step 6: Simula il problema (elimina 1 syncer + 1 etcd contemporaneamente) | |
| simulate_problem() { | |
| log_warn "SIMULAZIONE PROBLEMA: Eliminazione pod vcluster..." | |
| # Salva il kubeconfig originale | |
| ORIGINAL_KUBECONFIG=${KUBECONFIG:-} | |
| unset KUBECONFIG | |
| # Seleziona un pod syncer da eliminare (usa kubeconfig di default del cluster host) | |
| SYNCER_POD=$(kubectl get pods -n ${NAMESPACE} -l app=vcluster -o jsonpath='{.items[0].metadata.name}') | |
| # Verifica se esiste un etcd separato (deployed mode) o embedded | |
| ETCD_POD=$(kubectl get pods -n ${NAMESPACE} -l app=vcluster-etcd -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") | |
| # Ferma il port-forward prima di eliminare i pod | |
| log_info "Arresto port-forward esistente..." | |
| if [ -f /tmp/vcluster-port-forward.pid ]; then | |
| OLD_PID=$(cat /tmp/vcluster-port-forward.pid) | |
| kill ${OLD_PID} 2>/dev/null || true | |
| sleep 2 | |
| fi | |
| if [ -n "$ETCD_POD" ]; then | |
| log_info "Modalità DEPLOYED etcd rilevata" | |
| log_info "Eliminazione pod: ${SYNCER_POD} e ${ETCD_POD}" | |
| # Elimina entrambi contemporaneamente (simula eviction Karpenter) | |
| kubectl delete pod ${SYNCER_POD} -n ${NAMESPACE} --grace-period=0 --force & | |
| kubectl delete pod ${ETCD_POD} -n ${NAMESPACE} --grace-period=0 --force & | |
| wait | |
| log_warn "Pod eliminati. Attesa ricreazione..." | |
| sleep 30 | |
| # Attendi ricreazione di entrambi | |
| log_info "Attesa ricreazione pod..." | |
| kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s || true | |
| kubectl wait --for=condition=ready pod -l app=vcluster-etcd -n ${NAMESPACE} --timeout=300s || true | |
| else | |
| log_info "Modalità EMBEDDED etcd rilevata" | |
| log_info "Eliminazione pod: ${SYNCER_POD} (contiene anche etcd)" | |
| # Elimina solo il pod vcluster (che contiene anche etcd embedded) | |
| kubectl delete pod ${SYNCER_POD} -n ${NAMESPACE} --grace-period=0 --force | |
| log_warn "Pod eliminato. Attesa ricreazione..." | |
| sleep 30 | |
| # Attendi ricreazione | |
| log_info "Attesa ricreazione pod..." | |
| kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s || true | |
| fi | |
| # Riavvia il port-forward dopo la ricreazione dei pod | |
| log_info "Riavvio port-forward..." | |
| kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 & | |
| PORT_FORWARD_PID=$! | |
| echo ${PORT_FORWARD_PID} > /tmp/vcluster-port-forward.pid | |
| # Ripristina kubeconfig | |
| if [ -n "$ORIGINAL_KUBECONFIG" ]; then | |
| export KUBECONFIG=$ORIGINAL_KUBECONFIG | |
| fi | |
| # Attendi che il port-forward sia pronto | |
| log_info "Attesa port-forward (15s)..." | |
| sleep 15 | |
| # Attendi ulteriormente per stabilizzazione | |
| log_info "Attesa stabilizzazione (60s)..." | |
| sleep 60 | |
| } | |
| # Step 7: Verifica lo stato e rileva il problema | |
| verify_state() { | |
| log_info "Verifica stato post-simulazione..." | |
| # Verifica che il kubeconfig del vcluster esista | |
| if [ ! -f /tmp/vcluster-kubeconfig.yaml ]; then | |
| log_error "Kubeconfig del vcluster non trovato" | |
| return 1 | |
| fi | |
| # Usa il kubeconfig con port-forward | |
| export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml | |
| # Verifica che il port-forward sia ancora attivo | |
| if [ -f /tmp/vcluster-port-forward.pid ]; then | |
| PORT_FORWARD_PID=$(cat /tmp/vcluster-port-forward.pid) | |
| if ! kill -0 ${PORT_FORWARD_PID} 2>/dev/null; then | |
| log_warn "Port-forward non attivo, riavvio..." | |
| kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 & | |
| NEW_PID=$! | |
| echo ${NEW_PID} > /tmp/vcluster-port-forward.pid | |
| sleep 10 | |
| fi | |
| fi | |
| # Test di connettività al vcluster | |
| log_info "Test connettività al vcluster..." | |
| if ! kubectl get nodes &>/dev/null; then | |
| log_error "Impossibile connettersi al vcluster" | |
| return 1 | |
| fi | |
| # Controlla se ci sono pod con container ready ma Ready condition mancante | |
| log_info "Controllo pod readiness..." | |
| kubectl get pods -n test-app -o json 2>/dev/null | jq -r ' | |
| .items[] | | |
| select(.status.containerStatuses[0].ready == true) | | |
| select(.status.conditions | map(select(.type == "Ready")) | length == 0) | | |
| "PROBLEMA RILEVATO: Pod " + .metadata.name + " ha container ready ma Ready condition mancante!" | |
| ' 2>/dev/null || true | |
| # Controlla endpoints | |
| log_info "Controllo endpoints..." | |
| kubectl get endpoints -n test-app -o json 2>/dev/null | jq -r ' | |
| .items[] | | |
| select(.subsets[0].notReadyAddresses) | | |
| "PROBLEMA RILEVATO: Endpoint " + .metadata.name + " ha indirizzi in notReadyAddresses: " + (.subsets[0].notReadyAddresses | map(.ip) | join(", ")) | |
| ' 2>/dev/null || true | |
| # Stato dettagliato | |
| log_info "Stato endpoint nginx:" | |
| kubectl get endpoints nginx -n test-app -o yaml 2>/dev/null || log_warn "Impossibile ottenere endpoint" | |
| log_info "Stato pod nginx:" | |
| kubectl get pods -n test-app -o wide 2>/dev/null || log_warn "Impossibile ottenere pod" | |
| log_info "Condizioni pod nginx:" | |
| kubectl get pods -n test-app -o json 2>/dev/null | jq '.items[].status.conditions' || log_warn "Impossibile ottenere condizioni" | |
| # Test connettività | |
| log_info "Test connettività servizio..." | |
| kubectl run test-client --image=curlimages/curl:latest --rm -i --restart=Never -n test-app -- curl -s -m 5 http://nginx 2>&1 || log_error "Test connettività FALLITO!" | |
| } | |
| # Step 8: Pulizia | |
| cleanup() { | |
| log_warn "Pulizia ambiente..." | |
| # Ferma il port-forward se attivo | |
| if [ -f /tmp/vcluster-port-forward.pid ]; then | |
| PORT_FORWARD_PID=$(cat /tmp/vcluster-port-forward.pid) | |
| if kill -0 ${PORT_FORWARD_PID} 2>/dev/null; then | |
| log_info "Arresto port-forward..." | |
| kill ${PORT_FORWARD_PID} 2>/dev/null || true | |
| fi | |
| rm -f /tmp/vcluster-port-forward.pid | |
| fi | |
| kind delete cluster --name ${CLUSTER_NAME} | |
| rm -f /tmp/vcluster-values.yaml /tmp/vcluster-kubeconfig.yaml | |
| log_info "Pulizia completata" | |
| } | |
| # Menu principale | |
| main() { | |
| echo "================================" | |
| echo "vcluster Endpoint Sync Test Tool" | |
| echo "================================" | |
| echo "" | |
| echo "Questo script simula il problema di endpoint sync in locale" | |
| echo "" | |
| case "${1:-}" in | |
| setup) | |
| create_kind_cluster | |
| install_cilium | |
| prepare_environment | |
| install_vcluster | |
| create_test_workload | |
| log_info "Setup completato! Usa './runbook.sh test' per simulare il problema" | |
| ;; | |
| test) | |
| simulate_problem | |
| verify_state | |
| ;; | |
| cleanup) | |
| cleanup | |
| ;; | |
| full) | |
| create_kind_cluster | |
| install_cilium | |
| prepare_environment | |
| install_vcluster | |
| create_test_workload | |
| simulate_problem | |
| verify_state | |
| log_warn "Test completato. Usa './runbook.sh cleanup' per eliminare il cluster" | |
| ;; | |
| *) | |
| echo "Uso: $0 {setup|test|cleanup|full}" | |
| echo "" | |
| echo "Comandi:" | |
| echo " setup - Crea cluster Kind e installa vcluster" | |
| echo " test - Simula il problema (richiede setup)" | |
| echo " cleanup- Elimina cluster Kind" | |
| echo " full - Esegue setup + test" | |
| echo "" | |
| echo "Esempio:" | |
| echo " $0 full" | |
| exit 1 | |
| ;; | |
| esac | |
| } | |
| main "$@" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| exportKubeConfig: | |
| server: "https://localhost:8443" | |
| controlPlane: | |
| distro: | |
| k8s: | |
| enabled: true | |
| version: "v1.32.2" | |
| coredns: | |
| deployment: | |
| replicas: 2 | |
| backingStore: | |
| etcd: | |
| deploy: | |
| enabled: true | |
| statefulSet: | |
| highAvailability: | |
| replicas: 3 | |
| extraArgs: | |
| - "--auto-compaction-retention=30m" | |
| - "--auto-compaction-mode=periodic" | |
| - "--quota-backend-bytes=8589934592" | |
| scheduling: | |
| tolerations: | |
| - effect: "NoSchedule" | |
| key: "eng.it/karpenter-role" | |
| operator: "Equal" | |
| value: "project-test-project-env" | |
| nodeSelector: | |
| team: "test-project" | |
| "eng.it/project": "test-project" | |
| "eng.it/karpenter-role": "project-test-project-env" | |
| affinity: | |
| podAffinity: | |
| preferredDuringSchedulingIgnoredDuringExecution: | |
| - weight: 100 | |
| podAffinityTerm: | |
| labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster-etcd"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["development-vcluster"] | |
| topologyKey: "topology.kubernetes.io/zone" | |
| podAntiAffinity: | |
| requiredDuringSchedulingIgnoredDuringExecution: | |
| - labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster-etcd"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["development-vcluster"] | |
| topologyKey: "kubernetes.io/hostname" | |
| statefulSet: | |
| probes: | |
| readinessProbe: | |
| enabled: true | |
| failureThreshold: 60 | |
| periodSeconds: 2 | |
| timeoutSeconds: 3 | |
| # Use startupProbe to give the syncer time to warm up its cache | |
| # 30 attempts * 2 seconds = 60 seconds total warmup time | |
| startupProbe: | |
| enabled: true | |
| failureThreshold: 30 | |
| periodSeconds: 2 | |
| timeoutSeconds: 3 | |
| highAvailability: | |
| replicas: 3 | |
| scheduling: | |
| tolerations: | |
| - effect: "NoSchedule" | |
| key: "eng.it/karpenter-role" | |
| operator: "Equal" | |
| value: "project-test-project-env" | |
| nodeSelector: | |
| team: "test-project" | |
| "eng.it/project": "test-project" | |
| "eng.it/karpenter-role": "project-test-project-env" | |
| affinity: | |
| podAffinity: | |
| preferredDuringSchedulingIgnoredDuringExecution: | |
| - weight: 100 | |
| podAffinityTerm: | |
| labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["development-vcluster"] | |
| topologyKey: "topology.kubernetes.io/zone" | |
| podAntiAffinity: | |
| requiredDuringSchedulingIgnoredDuringExecution: | |
| - labelSelector: | |
| matchExpressions: | |
| - key: "app" | |
| operator: "In" | |
| values: ["vcluster"] | |
| - key: "release" | |
| operator: "In" | |
| values: ["development-vcluster"] | |
| topologyKey: "kubernetes.io/hostname" | |
| sync: | |
| toHost: | |
| podDisruptionBudgets: | |
| enabled: true | |
| pods: | |
| enabled: true | |
| enforceTolerations: | |
| - "eng.it/karpenter-role=project-test-project-env:NoSchedule" | |
| serviceAccounts: | |
| enabled: true | |
| ingresses: | |
| enabled: true | |
| fromHost: | |
| nodes: | |
| enabled: true | |
| selector: | |
| labels: | |
| team: "test-project" | |
| "eng.it/project": "test-project" | |
| "eng.it/karpenter-role": "project-test-project-env" | |
| integrations: | |
| metricsServer: | |
| enabled: true | |
| nodes: true | |
| pods: true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment