irizzant · February 6, 2026 16:07
diff --git a/reproduce-v2.sh b/reproduce-v2.sh
 #!/usr/bin/env bash
 # Runbook: Simula il problema vcluster endpoint sync in locale con Kind
 # Questo script replica l'ambiente di produzione per testare il bug

 set -euo pipefail

 # Colori per output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color

 # Configurazione
 CLUSTER_NAME="vcluster-test"
 NAMESPACE="test-env-development"
 VCLUSTER_NAME="development-vcluster"

 log_info() {
    echo -e "${GREEN}[INFO]${NC} $1"
 }

 log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $1"
 }

 log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
 }

 # Step 1: Crea cluster Kind con 3 worker nodes
 create_kind_cluster() {
    log_info "Creazione cluster Kind con 3 worker nodes..."

    cat <<EOF | kind create cluster --name ${CLUSTER_NAME} --config=-
 kind: Cluster
 apiVersion: kind.x-k8s.io/v1alpha4
 networking:
  disableDefaultCNI: true  # Disabiliteremo il CNI di default per installare Cilium
  podSubnet: "10.244.0.0/16"
  serviceSubnet: "10.96.0.0/12"
 nodes:
  - role: control-plane
    kubeadmConfigPatches:
      - |
        kind: InitConfiguration
        nodeRegistration:
          kubeletExtraArgs:
            node-labels: "ingress-ready=true"
  - role: worker
    labels:
      team: "test-project"
      eng.it/project: "test-project"
      eng.it/karpenter-role: "project-test-project-env"
  - role: worker
    labels:
      team: "test-project"
      eng.it/project: "test-project"
      eng.it/karpenter-role: "project-test-project-env"
  - role: worker
    labels:
      team: "test-project"
      eng.it/project: "test-project"
      eng.it/karpenter-role: "project-test-project-env"
 EOF

    log_info "Cluster Kind creato con successo"
 }

 # Step 2: Installa Cilium (configurazione adattata per Kind)
 install_cilium() {
    log_info "Installazione Cilium..."

    # Aggiungi repo Helm (ignora se già esiste)
    helm repo add cilium https://helm.cilium.io/ 2>/dev/null || true
    helm repo update

    # Installa Cilium con configurazione simile a EKS ma adattata per Kind
    # Nota: Non usiamo ENI mode su Kind, usiamo il tunnel mode
    helm install cilium cilium/cilium \
        --namespace kube-system \
        --version 1.18.5 \
        --set ipam.mode=kubernetes \
        --set hubble.relay.enabled=true \
        --set hubble.ui.enabled=true \
        --set kubeProxyReplacement=true \
        --set k8sServiceHost=${CLUSTER_NAME}-control-plane \
        --set k8sServicePort=6443 \
        --set socketLB.hostNamespaceOnly=true \
        --set localRedirectPolicy=true \
        --set loadBalancer.serviceTopology=true \
        --wait

    log_info "Cilium installato con successo"

    # Attendi che Cilium sia pronto
    log_info "Attesa ready Cilium..."
    kubectl wait --for=condition=ready pod -l k8s-app=cilium -n kube-system --timeout=300s
 }

 # Step 3: Crea namespace con taint (simula il NodePool di Karpenter)
 prepare_environment() {
    log_info "Preparazione ambiente..."

    # Crea namespace con label per nodeSelector
    kubectl create namespace ${NAMESPACE}
    kubectl label namespace ${NAMESPACE} team=test-project

    # Applica taint ai worker nodes (simula il comportamento Karpenter)
    log_info "Applicazione taints ai worker nodes..."
    for node in $(kubectl get nodes -l role=worker -o jsonpath='{.items[*].metadata.name}'); do
        kubectl taint nodes ${node} eng.it/karpenter-role=project-test-project-env:NoSchedule --overwrite
    done

    log_info "Ambiente preparato"
 }

 # Step 4: Installa vcluster con configurazione identica a produzione
 install_vcluster() {
    log_info "Installazione vcluster con configurazione produzione..."

    # Crea file values
    cat > /tmp/vcluster-values.yaml <<EOF
 # Configurazione semplificata per Kind - senza hostname esterno
 exportKubeConfig:
  server: "https://localhost:8443"

 controlPlane:
  distro:
    k8s:
      enabled: true
      version: "v1.32.2"
  coredns:
    deployment:
      replicas: 2
  backingStore:
    etcd:
      deploy:
        enabled: true
        statefulSet:
          highAvailability:
            replicas: 3
          extraArgs:
            - "--auto-compaction-retention=30m"
            - "--auto-compaction-mode=periodic"
            - "--quota-backend-bytes=8589934592"
          scheduling:
            tolerations:
              - effect: "NoSchedule"
                key: "eng.it/karpenter-role"
                operator: "Equal"
                value: "project-test-project-env"
            nodeSelector:
              team: "test-project"
              "eng.it/project": "test-project"
              "eng.it/karpenter-role": "project-test-project-env"
            affinity:
              podAffinity:
                preferredDuringSchedulingIgnoredDuringExecution:
                  - weight: 100
                    podAffinityTerm:
                      labelSelector:
                        matchExpressions:
                          - key: "app"
                            operator: "In"
                            values: ["vcluster-etcd"]
                          - key: "release"
                            operator: "In"
                            values: ["${VCLUSTER_NAME}"]
                      topologyKey: "topology.kubernetes.io/zone"
              podAntiAffinity:
                requiredDuringSchedulingIgnoredDuringExecution:
                  - labelSelector:
                      matchExpressions:
                        - key: "app"
                          operator: "In"
                          values: ["vcluster-etcd"]
                        - key: "release"
                          operator: "In"
                          values: ["${VCLUSTER_NAME}"]
                    topologyKey: "kubernetes.io/hostname"
  statefulSet:
    highAvailability:
      replicas: 3
    scheduling:
      tolerations:
        - effect: "NoSchedule"
          key: "eng.it/karpenter-role"
          operator: "Equal"
          value: "project-test-project-env"
      nodeSelector:
        team: "test-project"
        "eng.it/project": "test-project"
        "eng.it/karpenter-role": "project-test-project-env"
      affinity:
        podAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              podAffinityTerm:
                labelSelector:
                  matchExpressions:
                    - key: "app"
                      operator: "In"
                      values: ["vcluster"]
                    - key: "release"
                      operator: "In"
                      values: ["${VCLUSTER_NAME}"]
                topologyKey: "topology.kubernetes.io/zone"
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            - labelSelector:
                matchExpressions:
                  - key: "app"
                    operator: "In"
                    values: ["vcluster"]
                  - key: "release"
                    operator: "In"
                    values: ["${VCLUSTER_NAME}"]
              topologyKey: "kubernetes.io/hostname"

 sync:
  toHost:
    podDisruptionBudgets:
      enabled: true
    pods:
      enabled: true
      enforceTolerations:
        - "eng.it/karpenter-role=project-test-project-env:NoSchedule"
    serviceAccounts:
      enabled: true
    ingresses:
      enabled: true
  fromHost:
    nodes:
      enabled: true
      selector:
        labels:
          team: "test-project"
          "eng.it/project": "test-project"
          "eng.it/karpenter-role": "project-test-project-env"

 integrations:
  metricsServer:
    enabled: true
    nodes: true
    pods: true
 EOF

    # Aggiungi repo vcluster
    helm repo add loft-sh https://charts.loft.sh
    helm repo update

    # Installa vcluster
    helm install ${VCLUSTER_NAME} loft-sh/vcluster \
        --namespace ${NAMESPACE} \
        --version 0.31.0 \
        --values /tmp/vcluster-values.yaml \
        --wait \
        --timeout 600s

    log_info "vcluster installato con successo"

    # Attendi che tutti i pod siano ready
    log_info "Attesa ready vcluster..."
    kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s
    kubectl wait --for=condition=ready pod -l app=vcluster-etcd -n ${NAMESPACE} --timeout=300s
 }

 # Step 5: Crea workload di test
 create_test_workload() {
    log_info "Creazione workload di test..."

    # Ottieni kubeconfig del vcluster
    kubectl get secret vc-${VCLUSTER_NAME} -n ${NAMESPACE} -o jsonpath='{.data.config}' | base64 -d > /tmp/vcluster-kubeconfig.yaml

    # Verifica che il secret esista
    if [ ! -s /tmp/vcluster-kubeconfig.yaml ]; then
        log_error "Impossibile ottenere kubeconfig dal secret vc-${VCLUSTER_NAME}"
        exit 1
    fi

    log_info "Kubeconfig salvata in /tmp/vcluster-kubeconfig.yaml"

    # Modifica il kubeconfig per usare port-forward invece dell'hostname
    # Sostituisci il server con localhost:8443 per il port-forward
    sed -i "s|server: https://.*|server: https://localhost:8443|" /tmp/vcluster-kubeconfig.yaml

    # Avvia port-forward in background
    log_info "Avvio port-forward verso il vcluster..."
    kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 &
    PORT_FORWARD_PID=$!

    # Attendi che il port-forward sia pronto
    log_info "Attesa port-forward (10s)..."
    sleep 10

    # Esporta il kubeconfig modificato
    export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml

    # Crea namespace test-app
    log_info "Creazione namespace test-app..."
    kubectl create namespace test-app 2>/dev/null || log_info "Namespace test-app esiste già"

    # Crea deployment nginx
    log_info "Creazione deployment nginx..."
    kubectl apply -f - <<'DEPLOYMENT'
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: nginx-test
  namespace: test-app
 spec:
  replicas: 2
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
      - name: nginx
        image: nginx:alpine
        ports:
        - containerPort: 80
        readinessProbe:
          httpGet:
            path: /
            port: 80
          initialDelaySeconds: 5
          periodSeconds: 5
 DEPLOYMENT

    # Crea service nginx
    log_info "Creazione service nginx..."
    kubectl apply -f - <<'SERVICE'
 apiVersion: v1
 kind: Service
 metadata:
  name: nginx
  namespace: test-app
 spec:
  selector:
    app: nginx
  ports:
  - port: 80
    targetPort: 80
  type: ClusterIP
 SERVICE

    log_info "Workload di test creato"

    # Attendi che i pod siano ready
    log_info "Attesa ready pod (30s)..."
    sleep 30

    # Verifica stato
    log_info "Verifica stato workload..."
    kubectl get pods -n test-app

    log_info "Stato endpoint:"
    kubectl get endpoints -n test-app

    # Salva il PID del port-forward per pulizia successiva
    echo ${PORT_FORWARD_PID} > /tmp/vcluster-port-forward.pid
 }

 # Step 6: Simula il problema (elimina syncer + etcd + restart workload test)
 simulate_problem() {
    log_warn "SIMULAZIONE: Eliminazione syncer + etcd + restart workload test..."

    ORIGINAL_KUBECONFIG=${KUBECONFIG:-}
    unset KUBECONFIG

    SYNCER_POD=$(kubectl get pods -n ${NAMESPACE} -l app=vcluster -o jsonpath='{.items[0].metadata.name}')
    ETCD_POD=$(kubectl get pods -n ${NAMESPACE} -l app=vcluster-etcd -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")

    if [ -f /tmp/vcluster-port-forward.pid ]; then
        OLD_PID=$(cat /tmp/vcluster-port-forward.pid)
        kill ${OLD_PID} 2>/dev/null || true
        sleep 2
    fi

    # STEP 1: Elimina syncer + etcd
    if [ -n "$ETCD_POD" ]; then
        log_info "Modalità DEPLOYED etcd rilevata"
        log_info "Eliminazione pod: ${SYNCER_POD} e ${ETCD_POD}"
        kubectl delete pod ${SYNCER_POD} -n ${NAMESPACE} --grace-period=0 --force &
        kubectl delete pod ${ETCD_POD} -n ${NAMESPACE} --grace-period=0 --force &
        wait
    else
        log_info "Modalità EMBEDDED etcd rilevata"
        log_info "Eliminazione pod: ${SYNCER_POD}"
        kubectl delete pod ${SYNCER_POD} -n ${NAMESPACE} --grace-period=0 --force
    fi

    # STEP 2: Riavvia port-forward PRIMA di eliminare i pod nginx
    log_info "Riavvio port-forward..."
    kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 &
    PORT_FORWARD_PID=$!
    echo ${PORT_FORWARD_PID} > /tmp/vcluster-port-forward.pid
    sleep 10

    # STEP 3: IMMEDIATAMENTE riavvia pod test
    log_info "Riavvio workload test..."
    export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml
    kubectl delete pod -l app=nginx -n test-app --grace-period=0 --force
    unset KUBECONFIG

    # STEP 4: Attendi ripresa vcluster
    log_info "Attesa ripresa syncer + etcd..."
    if [ -n "$ETCD_POD" ]; then
        kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s
        kubectl wait --for=condition=ready pod -l app=vcluster-etcd -n ${NAMESPACE} --timeout=300s
    else
        kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s
    fi

    # STEP 5: Attesa stabilizzazione prima verifica
    sleep 15

    verify_state
 }

 # Step 7: Verifica lo stato e rileva il problema
 verify_state() {
    log_info "Verifica stato post-simulazione..."

    # Verifica che il kubeconfig del vcluster esista
    if [ ! -f /tmp/vcluster-kubeconfig.yaml ]; then
        log_error "Kubeconfig del vcluster non trovato"
        return 1
    fi

    # Usa il kubeconfig con port-forward
    export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml

    # Verifica che il port-forward sia ancora attivo
    if [ -f /tmp/vcluster-port-forward.pid ]; then
        PORT_FORWARD_PID=$(cat /tmp/vcluster-port-forward.pid)
        if ! kill -0 ${PORT_FORWARD_PID} 2>/dev/null; then
            log_warn "Port-forward non attivo, riavvio..."
            kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 &
            NEW_PID=$!
            echo ${NEW_PID} > /tmp/vcluster-port-forward.pid
            sleep 10
        fi
    fi

    # Test di connettività al vcluster
    log_info "Test connettività al vcluster..."
    if ! kubectl get nodes &>/dev/null; then
        log_error "Impossibile connettersi al vcluster"
        return 1
    fi

    # Controlla se ci sono pod con container ready ma Ready condition mancante
    log_info "Controllo pod readiness..."
    kubectl get pods -n test-app -o json 2>/dev/null | jq -r '
        .items[] | 
        select(.status.containerStatuses[0].ready == true) |
        select(.status.conditions | map(select(.type == "Ready")) | length == 0) |
        "PROBLEMA RILEVATO: Pod " + .metadata.name + " ha container ready ma Ready condition mancante!"
    ' 2>/dev/null || true

    # Controlla endpoints
    log_info "Controllo endpoints..."
    kubectl get endpoints -n test-app -o json 2>/dev/null | jq -r '
        .items[] | 
        select(.subsets[0].notReadyAddresses) |
        "PROBLEMA RILEVATO: Endpoint " + .metadata.name + " ha indirizzi in notReadyAddresses: " + (.subsets[0].notReadyAddresses | map(.ip) | join(", "))
    ' 2>/dev/null || true

    # Stato dettagliato
    log_info "Stato endpoint nginx:"
    kubectl get endpoints nginx -n test-app -o yaml 2>/dev/null || log_warn "Impossibile ottenere endpoint"

    log_info "Stato pod nginx:"
    kubectl get pods -n test-app -o wide 2>/dev/null || log_warn "Impossibile ottenere pod"

    log_info "Condizioni pod nginx:"
    kubectl get pods -n test-app -o json 2>/dev/null | jq '.items[].status.conditions' || log_warn "Impossibile ottenere condizioni"

    # Test connettività
    log_info "Test connettività servizio..."
    kubectl run test-client --image=curlimages/curl:latest --rm -i --restart=Never -n test-app -- curl -s -m 5 http://nginx 2>&1 || log_error "Test connettività FALLITO!"
 }

 # Step 8: Pulizia
 cleanup() {
    log_warn "Pulizia ambiente..."

    # Ferma il port-forward se attivo
    if [ -f /tmp/vcluster-port-forward.pid ]; then
        PORT_FORWARD_PID=$(cat /tmp/vcluster-port-forward.pid)
        if kill -0 ${PORT_FORWARD_PID} 2>/dev/null; then
            log_info "Arresto port-forward..."
            kill ${PORT_FORWARD_PID} 2>/dev/null || true
        fi
        rm -f /tmp/vcluster-port-forward.pid
    fi

    kind delete cluster --name ${CLUSTER_NAME}
    rm -f /tmp/vcluster-values.yaml /tmp/vcluster-kubeconfig.yaml
    log_info "Pulizia completata"
 }

 # Menu principale
 main() {
    echo "================================"
    echo "vcluster Endpoint Sync Test Tool"
    echo "================================"
    echo ""
    echo "Questo script simula il problema di endpoint sync in locale"
    echo ""

    case "${1:-}" in
        setup)
            create_kind_cluster
            install_cilium
            prepare_environment
            install_vcluster
            create_test_workload
            log_info "Setup completato! Usa './runbook.sh test' per simulare il problema"
            ;;
        test)
            simulate_problem
            verify_state
            ;;
        cleanup)
            cleanup
            ;;
        full)
            create_kind_cluster
            install_cilium
            prepare_environment
            install_vcluster
            create_test_workload
            simulate_problem
            verify_state
            log_warn "Test completato. Usa './runbook.sh cleanup' per eliminare il cluster"
            ;;
        *)
            echo "Uso: $0 {setup|test|cleanup|full}"
            echo ""
            echo "Comandi:"
            echo "  setup  - Crea cluster Kind e installa vcluster"
            echo "  test   - Simula il problema (richiede setup)"
            echo "  cleanup- Elimina cluster Kind"
            echo "  full   - Esegue setup + test"
            echo ""
            echo "Esempio:"
            echo "  $0 full"
            exit 1
            ;;
    esac
 }

 main "$@"
diff --git a/reproduce.sh b/reproduce.sh
 #!/usr/bin/env bash
 # Runbook: Simula il problema vcluster endpoint sync in locale con Kind
 # Questo script replica l'ambiente di produzione per testare il bug

 set -euo pipefail

 # Colori per output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color

 # Configurazione
 CLUSTER_NAME="vcluster-test"
 NAMESPACE="test-env-development"
 VCLUSTER_NAME="development-vcluster"

 log_info() {
    echo -e "${GREEN}[INFO]${NC} $1"
 }

 log_warn() {
    echo -e "${YELLOW}[WARN]${NC} $1"
 }

 log_error() {
    echo -e "${RED}[ERROR]${NC} $1"
 }

 # Step 1: Crea cluster Kind con 3 worker nodes
 create_kind_cluster() {
    log_info "Creazione cluster Kind con 3 worker nodes..."

    cat <<EOF | kind create cluster --name ${CLUSTER_NAME} --config=-
 kind: Cluster
 apiVersion: kind.x-k8s.io/v1alpha4
 networking:
  disableDefaultCNI: true  # Disabiliteremo il CNI di default per installare Cilium
  podSubnet: "10.244.0.0/16"
  serviceSubnet: "10.96.0.0/12"
 nodes:
  - role: control-plane
    kubeadmConfigPatches:
      - |
        kind: InitConfiguration
        nodeRegistration:
          kubeletExtraArgs:
            node-labels: "ingress-ready=true"
  - role: worker
    labels:
      team: "test-project"
      eng.it/project: "test-project"
      eng.it/karpenter-role: "project-test-project-env"
  - role: worker
    labels:
      team: "test-project"
      eng.it/project: "test-project"
      eng.it/karpenter-role: "project-test-project-env"
  - role: worker
    labels:
      team: "test-project"
      eng.it/project: "test-project"
      eng.it/karpenter-role: "project-test-project-env"
 EOF

    log_info "Cluster Kind creato con successo"
 }

 # Step 2: Installa Cilium (configurazione adattata per Kind)
 install_cilium() {
    log_info "Installazione Cilium..."

    # Aggiungi repo Helm (ignora se già esiste)
    helm repo add cilium https://helm.cilium.io/ 2>/dev/null || true
    helm repo update

    # Installa Cilium con configurazione simile a EKS ma adattata per Kind
    # Nota: Non usiamo ENI mode su Kind, usiamo il tunnel mode
    helm install cilium cilium/cilium \
        --namespace kube-system \
        --version 1.18.5 \
        --set ipam.mode=kubernetes \
        --set hubble.relay.enabled=true \
        --set hubble.ui.enabled=true \
        --set kubeProxyReplacement=true \
        --set k8sServiceHost=${CLUSTER_NAME}-control-plane \
        --set k8sServicePort=6443 \
        --set socketLB.hostNamespaceOnly=true \
        --set localRedirectPolicy=true \
        --set loadBalancer.serviceTopology=true \
        --wait

    log_info "Cilium installato con successo"

    # Attendi che Cilium sia pronto
    log_info "Attesa ready Cilium..."
    kubectl wait --for=condition=ready pod -l k8s-app=cilium -n kube-system --timeout=300s
 }

 # Step 3: Crea namespace con taint (simula il NodePool di Karpenter)
 prepare_environment() {
    log_info "Preparazione ambiente..."

    # Crea namespace con label per nodeSelector
    kubectl create namespace ${NAMESPACE}
    kubectl label namespace ${NAMESPACE} team=test-project

    # Applica taint ai worker nodes (simula il comportamento Karpenter)
    log_info "Applicazione taints ai worker nodes..."
    for node in $(kubectl get nodes -l role=worker -o jsonpath='{.items[*].metadata.name}'); do
        kubectl taint nodes ${node} eng.it/karpenter-role=project-test-project-env:NoSchedule --overwrite
    done

    log_info "Ambiente preparato"
 }

 # Step 4: Installa vcluster con configurazione identica a produzione
 install_vcluster() {
    log_info "Installazione vcluster con configurazione produzione..."

    # Crea file values
    cat > /tmp/vcluster-values.yaml <<EOF
 # Configurazione semplificata per Kind - senza hostname esterno
 exportKubeConfig:
  server: "https://localhost:8443"

 controlPlane:
  distro:
    k8s:
      enabled: true
      version: "v1.32.2"
  coredns:
    deployment:
      replicas: 2
  backingStore:
    etcd:
      deploy:
        enabled: true
        statefulSet:
          highAvailability:
            replicas: 3
          extraArgs:
            - "--auto-compaction-retention=30m"
            - "--auto-compaction-mode=periodic"
            - "--quota-backend-bytes=8589934592"
          scheduling:
            tolerations:
              - effect: "NoSchedule"
                key: "eng.it/karpenter-role"
                operator: "Equal"
                value: "project-test-project-env"
            nodeSelector:
              team: "test-project"
              "eng.it/project": "test-project"
              "eng.it/karpenter-role": "project-test-project-env"
            affinity:
              podAffinity:
                preferredDuringSchedulingIgnoredDuringExecution:
                  - weight: 100
                    podAffinityTerm:
                      labelSelector:
                        matchExpressions:
                          - key: "app"
                            operator: "In"
                            values: ["vcluster-etcd"]
                          - key: "release"
                            operator: "In"
                            values: ["${VCLUSTER_NAME}"]
                      topologyKey: "topology.kubernetes.io/zone"
              podAntiAffinity:
                requiredDuringSchedulingIgnoredDuringExecution:
                  - labelSelector:
                      matchExpressions:
                        - key: "app"
                          operator: "In"
                          values: ["vcluster-etcd"]
                        - key: "release"
                          operator: "In"
                          values: ["${VCLUSTER_NAME}"]
                    topologyKey: "kubernetes.io/hostname"
  statefulSet:
    highAvailability:
      replicas: 3
    scheduling:
      tolerations:
        - effect: "NoSchedule"
          key: "eng.it/karpenter-role"
          operator: "Equal"
          value: "project-test-project-env"
      nodeSelector:
        team: "test-project"
        "eng.it/project": "test-project"
        "eng.it/karpenter-role": "project-test-project-env"
      affinity:
        podAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              podAffinityTerm:
                labelSelector:
                  matchExpressions:
                    - key: "app"
                      operator: "In"
                      values: ["vcluster"]
                    - key: "release"
                      operator: "In"
                      values: ["${VCLUSTER_NAME}"]
                topologyKey: "topology.kubernetes.io/zone"
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            - labelSelector:
                matchExpressions:
                  - key: "app"
                    operator: "In"
                    values: ["vcluster"]
                  - key: "release"
                    operator: "In"
                    values: ["${VCLUSTER_NAME}"]
              topologyKey: "kubernetes.io/hostname"

 sync:
  toHost:
    podDisruptionBudgets:
      enabled: true
    pods:
      enabled: true
      enforceTolerations:
        - "eng.it/karpenter-role=project-test-project-env:NoSchedule"
    serviceAccounts:
      enabled: true
    ingresses:
      enabled: true
  fromHost:
    nodes:
      enabled: true
      selector:
        labels:
          team: "test-project"
          "eng.it/project": "test-project"
          "eng.it/karpenter-role": "project-test-project-env"

 integrations:
  metricsServer:
    enabled: true
    nodes: true
    pods: true
 EOF

    # Aggiungi repo vcluster
    helm repo add loft-sh https://charts.loft.sh
    helm repo update

    # Installa vcluster
    helm install ${VCLUSTER_NAME} loft-sh/vcluster \
        --namespace ${NAMESPACE} \
        --version 0.31.0 \
        --values /tmp/vcluster-values.yaml \
        --wait \
        --timeout 600s

    log_info "vcluster installato con successo"

    # Attendi che tutti i pod siano ready
    log_info "Attesa ready vcluster..."
    kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s
    kubectl wait --for=condition=ready pod -l app=vcluster-etcd -n ${NAMESPACE} --timeout=300s
 }

 # Step 5: Crea workload di test
 create_test_workload() {
    log_info "Creazione workload di test..."

    # Ottieni kubeconfig del vcluster
    kubectl get secret vc-${VCLUSTER_NAME} -n ${NAMESPACE} -o jsonpath='{.data.config}' | base64 -d > /tmp/vcluster-kubeconfig.yaml

    # Verifica che il secret esista
    if [ ! -s /tmp/vcluster-kubeconfig.yaml ]; then
        log_error "Impossibile ottenere kubeconfig dal secret vc-${VCLUSTER_NAME}"
        exit 1
    fi

    log_info "Kubeconfig salvata in /tmp/vcluster-kubeconfig.yaml"

    # Modifica il kubeconfig per usare port-forward invece dell'hostname
    # Sostituisci il server con localhost:8443 per il port-forward
    sed -i "s|server: https://.*|server: https://localhost:8443|" /tmp/vcluster-kubeconfig.yaml

    # Avvia port-forward in background
    log_info "Avvio port-forward verso il vcluster..."
    kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 &
    PORT_FORWARD_PID=$!

    # Attendi che il port-forward sia pronto
    log_info "Attesa port-forward (10s)..."
    sleep 10

    # Esporta il kubeconfig modificato
    export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml

    # Crea namespace test-app
    log_info "Creazione namespace test-app..."
    kubectl create namespace test-app 2>/dev/null || log_info "Namespace test-app esiste già"

    # Crea deployment nginx
    log_info "Creazione deployment nginx..."
    kubectl apply -f - <<'DEPLOYMENT'
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: nginx-test
  namespace: test-app
 spec:
  replicas: 2
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
      - name: nginx
        image: nginx:alpine
        ports:
        - containerPort: 80
        readinessProbe:
          httpGet:
            path: /
            port: 80
          initialDelaySeconds: 5
          periodSeconds: 5
 DEPLOYMENT

    # Crea service nginx
    log_info "Creazione service nginx..."
    kubectl apply -f - <<'SERVICE'
 apiVersion: v1
 kind: Service
 metadata:
  name: nginx
  namespace: test-app
 spec:
  selector:
    app: nginx
  ports:
  - port: 80
    targetPort: 80
  type: ClusterIP
 SERVICE

    log_info "Workload di test creato"

    # Attendi che i pod siano ready
    log_info "Attesa ready pod (30s)..."
    sleep 30

    # Verifica stato
    log_info "Verifica stato workload..."
    kubectl get pods -n test-app

    log_info "Stato endpoint:"
    kubectl get endpoints -n test-app

    # Salva il PID del port-forward per pulizia successiva
    echo ${PORT_FORWARD_PID} > /tmp/vcluster-port-forward.pid
 }

 # Step 6: Simula il problema (elimina 1 syncer + 1 etcd contemporaneamente)
 simulate_problem() {
    log_warn "SIMULAZIONE PROBLEMA: Eliminazione pod vcluster..."

    # Salva il kubeconfig originale
    ORIGINAL_KUBECONFIG=${KUBECONFIG:-}
    unset KUBECONFIG

    # Seleziona un pod syncer da eliminare (usa kubeconfig di default del cluster host)
    SYNCER_POD=$(kubectl get pods -n ${NAMESPACE} -l app=vcluster -o jsonpath='{.items[0].metadata.name}')

    # Verifica se esiste un etcd separato (deployed mode) o embedded
    ETCD_POD=$(kubectl get pods -n ${NAMESPACE} -l app=vcluster-etcd -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")

    # Ferma il port-forward prima di eliminare i pod
    log_info "Arresto port-forward esistente..."
    if [ -f /tmp/vcluster-port-forward.pid ]; then
        OLD_PID=$(cat /tmp/vcluster-port-forward.pid)
        kill ${OLD_PID} 2>/dev/null || true
        sleep 2
    fi

    if [ -n "$ETCD_POD" ]; then
        log_info "Modalità DEPLOYED etcd rilevata"
        log_info "Eliminazione pod: ${SYNCER_POD} e ${ETCD_POD}"
        # Elimina entrambi contemporaneamente (simula eviction Karpenter)
        kubectl delete pod ${SYNCER_POD} -n ${NAMESPACE} --grace-period=0 --force &
        kubectl delete pod ${ETCD_POD} -n ${NAMESPACE} --grace-period=0 --force &
        wait

        log_warn "Pod eliminati. Attesa ricreazione..."
        sleep 30

        # Attendi ricreazione di entrambi
        log_info "Attesa ricreazione pod..."
        kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s || true
        kubectl wait --for=condition=ready pod -l app=vcluster-etcd -n ${NAMESPACE} --timeout=300s || true
    else
        log_info "Modalità EMBEDDED etcd rilevata"
        log_info "Eliminazione pod: ${SYNCER_POD} (contiene anche etcd)"
        # Elimina solo il pod vcluster (che contiene anche etcd embedded)
        kubectl delete pod ${SYNCER_POD} -n ${NAMESPACE} --grace-period=0 --force

        log_warn "Pod eliminato. Attesa ricreazione..."
        sleep 30

        # Attendi ricreazione
        log_info "Attesa ricreazione pod..."
        kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s || true
    fi

    # Riavvia il port-forward dopo la ricreazione dei pod
    log_info "Riavvio port-forward..."
    kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 &
    PORT_FORWARD_PID=$!
    echo ${PORT_FORWARD_PID} > /tmp/vcluster-port-forward.pid

    # Ripristina kubeconfig
    if [ -n "$ORIGINAL_KUBECONFIG" ]; then
        export KUBECONFIG=$ORIGINAL_KUBECONFIG
    fi

    # Attendi che il port-forward sia pronto
    log_info "Attesa port-forward (15s)..."
    sleep 15

    # Attendi ulteriormente per stabilizzazione
    log_info "Attesa stabilizzazione (60s)..."
    sleep 60
 }

 # Step 7: Verifica lo stato e rileva il problema
 verify_state() {
    log_info "Verifica stato post-simulazione..."

    # Verifica che il kubeconfig del vcluster esista
    if [ ! -f /tmp/vcluster-kubeconfig.yaml ]; then
        log_error "Kubeconfig del vcluster non trovato"
        return 1
    fi

    # Usa il kubeconfig con port-forward
    export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml

    # Verifica che il port-forward sia ancora attivo
    if [ -f /tmp/vcluster-port-forward.pid ]; then
        PORT_FORWARD_PID=$(cat /tmp/vcluster-port-forward.pid)
        if ! kill -0 ${PORT_FORWARD_PID} 2>/dev/null; then
            log_warn "Port-forward non attivo, riavvio..."
            kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 &
            NEW_PID=$!
            echo ${NEW_PID} > /tmp/vcluster-port-forward.pid
            sleep 10
        fi
    fi

    # Test di connettività al vcluster
    log_info "Test connettività al vcluster..."
    if ! kubectl get nodes &>/dev/null; then
        log_error "Impossibile connettersi al vcluster"
        return 1
    fi

    # Controlla se ci sono pod con container ready ma Ready condition mancante
    log_info "Controllo pod readiness..."
    kubectl get pods -n test-app -o json 2>/dev/null | jq -r '
        .items[] | 
        select(.status.containerStatuses[0].ready == true) |
        select(.status.conditions | map(select(.type == "Ready")) | length == 0) |
        "PROBLEMA RILEVATO: Pod " + .metadata.name + " ha container ready ma Ready condition mancante!"
    ' 2>/dev/null || true

    # Controlla endpoints
    log_info "Controllo endpoints..."
    kubectl get endpoints -n test-app -o json 2>/dev/null | jq -r '
        .items[] | 
        select(.subsets[0].notReadyAddresses) |
        "PROBLEMA RILEVATO: Endpoint " + .metadata.name + " ha indirizzi in notReadyAddresses: " + (.subsets[0].notReadyAddresses | map(.ip) | join(", "))
    ' 2>/dev/null || true

    # Stato dettagliato
    log_info "Stato endpoint nginx:"
    kubectl get endpoints nginx -n test-app -o yaml 2>/dev/null || log_warn "Impossibile ottenere endpoint"

    log_info "Stato pod nginx:"
    kubectl get pods -n test-app -o wide 2>/dev/null || log_warn "Impossibile ottenere pod"

    log_info "Condizioni pod nginx:"
    kubectl get pods -n test-app -o json 2>/dev/null | jq '.items[].status.conditions' || log_warn "Impossibile ottenere condizioni"

    # Test connettività
    log_info "Test connettività servizio..."
    kubectl run test-client --image=curlimages/curl:latest --rm -i --restart=Never -n test-app -- curl -s -m 5 http://nginx 2>&1 || log_error "Test connettività FALLITO!"
 }

 # Step 8: Pulizia
 cleanup() {
    log_warn "Pulizia ambiente..."

    # Ferma il port-forward se attivo
    if [ -f /tmp/vcluster-port-forward.pid ]; then
        PORT_FORWARD_PID=$(cat /tmp/vcluster-port-forward.pid)
        if kill -0 ${PORT_FORWARD_PID} 2>/dev/null; then
            log_info "Arresto port-forward..."
            kill ${PORT_FORWARD_PID} 2>/dev/null || true
        fi
        rm -f /tmp/vcluster-port-forward.pid
    fi

    kind delete cluster --name ${CLUSTER_NAME}
    rm -f /tmp/vcluster-values.yaml /tmp/vcluster-kubeconfig.yaml
    log_info "Pulizia completata"
 }

 # Menu principale
 main() {
    echo "================================"
    echo "vcluster Endpoint Sync Test Tool"
    echo "================================"
    echo ""
    echo "Questo script simula il problema di endpoint sync in locale"
    echo ""

    case "${1:-}" in
        setup)
            create_kind_cluster
            install_cilium
            prepare_environment
            install_vcluster
            create_test_workload
            log_info "Setup completato! Usa './runbook.sh test' per simulare il problema"
            ;;
        test)
            simulate_problem
            verify_state
            ;;
        cleanup)
            cleanup
            ;;
        full)
            create_kind_cluster
            install_cilium
            prepare_environment
            install_vcluster
            create_test_workload
            simulate_problem
            verify_state
            log_warn "Test completato. Usa './runbook.sh cleanup' per eliminare il cluster"
            ;;
        *)
            echo "Uso: $0 {setup|test|cleanup|full}"
            echo ""
            echo "Comandi:"
            echo "  setup  - Crea cluster Kind e installa vcluster"
            echo "  test   - Simula il problema (richiede setup)"
            echo "  cleanup- Elimina cluster Kind"
            echo "  full   - Esegue setup + test"
            echo ""
            echo "Esempio:"
            echo "  $0 full"
            exit 1
            ;;
    esac
 }

 main "$@"
diff --git a/vcluster.yaml b/vcluster.yaml
 exportKubeConfig:
  server: "https://localhost:8443"

 controlPlane:
  distro:
    k8s:
      enabled: true
      version: "v1.32.2"
  coredns:
    deployment:
      replicas: 2
  backingStore:
    etcd:
      deploy:
        enabled: true
        statefulSet:
          highAvailability:
            replicas: 3
          extraArgs:
            - "--auto-compaction-retention=30m"
            - "--auto-compaction-mode=periodic"
            - "--quota-backend-bytes=8589934592"
          scheduling:
            tolerations:
              - effect: "NoSchedule"
                key: "eng.it/karpenter-role"
                operator: "Equal"
                value: "project-test-project-env"
            nodeSelector:
              team: "test-project"
              "eng.it/project": "test-project"
              "eng.it/karpenter-role": "project-test-project-env"
            affinity:
              podAffinity:
                preferredDuringSchedulingIgnoredDuringExecution:
                  - weight: 100
                    podAffinityTerm:
                      labelSelector:
                        matchExpressions:
                          - key: "app"
                            operator: "In"
                            values: ["vcluster-etcd"]
                          - key: "release"
                            operator: "In"
                            values: ["development-vcluster"]
                      topologyKey: "topology.kubernetes.io/zone"
              podAntiAffinity:
                requiredDuringSchedulingIgnoredDuringExecution:
                  - labelSelector:
                      matchExpressions:
                        - key: "app"
                          operator: "In"
                          values: ["vcluster-etcd"]
                        - key: "release"
                          operator: "In"
                          values: ["development-vcluster"]
                    topologyKey: "kubernetes.io/hostname"
  statefulSet:
    probes:
      readinessProbe:
        enabled: true
        failureThreshold: 60
        periodSeconds: 2
        timeoutSeconds: 3
      # Use startupProbe to give the syncer time to warm up its cache
      # 30 attempts * 2 seconds = 60 seconds total warmup time
      startupProbe:
        enabled: true
        failureThreshold: 30
        periodSeconds: 2
        timeoutSeconds: 3
    highAvailability:
      replicas: 3
    scheduling:
      tolerations:
        - effect: "NoSchedule"
          key: "eng.it/karpenter-role"
          operator: "Equal"
          value: "project-test-project-env"
      nodeSelector:
        team: "test-project"
        "eng.it/project": "test-project"
        "eng.it/karpenter-role": "project-test-project-env"
      affinity:
        podAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              podAffinityTerm:
                labelSelector:
                  matchExpressions:
                    - key: "app"
                      operator: "In"
                      values: ["vcluster"]
                    - key: "release"
                      operator: "In"
                      values: ["development-vcluster"]
                topologyKey: "topology.kubernetes.io/zone"
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            - labelSelector:
                matchExpressions:
                  - key: "app"
                    operator: "In"
                    values: ["vcluster"]
                  - key: "release"
                    operator: "In"
                    values: ["development-vcluster"]
              topologyKey: "kubernetes.io/hostname"

 sync:
  toHost:
    podDisruptionBudgets:
      enabled: true
    pods:
      enabled: true
      enforceTolerations:
        - "eng.it/karpenter-role=project-test-project-env:NoSchedule"
    serviceAccounts:
      enabled: true
    ingresses:
      enabled: true
  fromHost:
    nodes:
      enabled: true
      selector:
        labels:
          team: "test-project"
          "eng.it/project": "test-project"
          "eng.it/karpenter-role": "project-test-project-env"

 integrations:
  metricsServer:
    enabled: true
    nodes: true
    pods: true
	#!/usr/bin/env bash
	# Runbook: Simula il problema vcluster endpoint sync in locale con Kind
	# Questo script replica l'ambiente di produzione per testare il bug

	set -euo pipefail

	# Colori per output
	RED='\033[0;31m'
	GREEN='\033[0;32m'
	YELLOW='\033[1;33m'
	NC='\033[0m' # No Color

	# Configurazione
	CLUSTER_NAME="vcluster-test"
	NAMESPACE="test-env-development"
	VCLUSTER_NAME="development-vcluster"

	log_info() {
	echo -e "${GREEN}[INFO]${NC} $1"
	}

	log_warn() {
	echo -e "${YELLOW}[WARN]${NC} $1"
	}

	log_error() {
	echo -e "${RED}[ERROR]${NC} $1"
	}

	# Step 1: Crea cluster Kind con 3 worker nodes
	create_kind_cluster() {
	log_info "Creazione cluster Kind con 3 worker nodes..."

	cat <<EOF \| kind create cluster --name ${CLUSTER_NAME} --config=-
	kind: Cluster
	apiVersion: kind.x-k8s.io/v1alpha4
	networking:
	disableDefaultCNI: true # Disabiliteremo il CNI di default per installare Cilium
	podSubnet: "10.244.0.0/16"
	serviceSubnet: "10.96.0.0/12"
	nodes:
	- role: control-plane
	kubeadmConfigPatches:
	- \|
	kind: InitConfiguration
	nodeRegistration:
	kubeletExtraArgs:
	node-labels: "ingress-ready=true"
	- role: worker
	labels:
	team: "test-project"
	eng.it/project: "test-project"
	eng.it/karpenter-role: "project-test-project-env"
	- role: worker
	labels:
	team: "test-project"
	eng.it/project: "test-project"
	eng.it/karpenter-role: "project-test-project-env"
	- role: worker
	labels:
	team: "test-project"
	eng.it/project: "test-project"
	eng.it/karpenter-role: "project-test-project-env"
	EOF

	log_info "Cluster Kind creato con successo"
	}

	# Step 2: Installa Cilium (configurazione adattata per Kind)
	install_cilium() {
	log_info "Installazione Cilium..."

	# Aggiungi repo Helm (ignora se già esiste)
	helm repo add cilium https://helm.cilium.io/ 2>/dev/null \|\| true
	helm repo update

	# Installa Cilium con configurazione simile a EKS ma adattata per Kind
	# Nota: Non usiamo ENI mode su Kind, usiamo il tunnel mode
	helm install cilium cilium/cilium \
	--namespace kube-system \
	--version 1.18.5 \
	--set ipam.mode=kubernetes \
	--set hubble.relay.enabled=true \
	--set hubble.ui.enabled=true \
	--set kubeProxyReplacement=true \
	--set k8sServiceHost=${CLUSTER_NAME}-control-plane \
	--set k8sServicePort=6443 \
	--set socketLB.hostNamespaceOnly=true \
	--set localRedirectPolicy=true \
	--set loadBalancer.serviceTopology=true \
	--wait

	log_info "Cilium installato con successo"

	# Attendi che Cilium sia pronto
	log_info "Attesa ready Cilium..."
	kubectl wait --for=condition=ready pod -l k8s-app=cilium -n kube-system --timeout=300s
	}

	# Step 3: Crea namespace con taint (simula il NodePool di Karpenter)
	prepare_environment() {
	log_info "Preparazione ambiente..."

	# Crea namespace con label per nodeSelector
	kubectl create namespace ${NAMESPACE}
	kubectl label namespace ${NAMESPACE} team=test-project

	# Applica taint ai worker nodes (simula il comportamento Karpenter)
	log_info "Applicazione taints ai worker nodes..."
	for node in $(kubectl get nodes -l role=worker -o jsonpath='{.items[*].metadata.name}'); do
	kubectl taint nodes ${node} eng.it/karpenter-role=project-test-project-env:NoSchedule --overwrite
	done

	log_info "Ambiente preparato"
	}

	# Step 4: Installa vcluster con configurazione identica a produzione
	install_vcluster() {
	log_info "Installazione vcluster con configurazione produzione..."

	# Crea file values
	cat > /tmp/vcluster-values.yaml <<EOF
	# Configurazione semplificata per Kind - senza hostname esterno
	exportKubeConfig:
	server: "https://localhost:8443"

	controlPlane:
	distro:
	k8s:
	enabled: true
	version: "v1.32.2"
	coredns:
	deployment:
	replicas: 2
	backingStore:
	etcd:
	deploy:
	enabled: true
	statefulSet:
	highAvailability:
	replicas: 3
	extraArgs:
	- "--auto-compaction-retention=30m"
	- "--auto-compaction-mode=periodic"
	- "--quota-backend-bytes=8589934592"
	scheduling:
	tolerations:
	- effect: "NoSchedule"
	key: "eng.it/karpenter-role"
	operator: "Equal"
	value: "project-test-project-env"
	nodeSelector:
	team: "test-project"
	"eng.it/project": "test-project"
	"eng.it/karpenter-role": "project-test-project-env"
	affinity:
	podAffinity:
	preferredDuringSchedulingIgnoredDuringExecution:
	- weight: 100
	podAffinityTerm:
	labelSelector:
	matchExpressions:
	- key: "app"
	operator: "In"
	values: ["vcluster-etcd"]
	- key: "release"
	operator: "In"
	values: ["${VCLUSTER_NAME}"]
	topologyKey: "topology.kubernetes.io/zone"
	podAntiAffinity:
	requiredDuringSchedulingIgnoredDuringExecution:
	- labelSelector:
	matchExpressions:
	- key: "app"
	operator: "In"
	values: ["vcluster-etcd"]
	- key: "release"
	operator: "In"
	values: ["${VCLUSTER_NAME}"]
	topologyKey: "kubernetes.io/hostname"
	statefulSet:
	highAvailability:
	replicas: 3
	scheduling:
	tolerations:
	- effect: "NoSchedule"
	key: "eng.it/karpenter-role"
	operator: "Equal"
	value: "project-test-project-env"
	nodeSelector:
	team: "test-project"
	"eng.it/project": "test-project"
	"eng.it/karpenter-role": "project-test-project-env"
	affinity:
	podAffinity:
	preferredDuringSchedulingIgnoredDuringExecution:
	- weight: 100
	podAffinityTerm:
	labelSelector:
	matchExpressions:
	- key: "app"
	operator: "In"
	values: ["vcluster"]
	- key: "release"
	operator: "In"
	values: ["${VCLUSTER_NAME}"]
	topologyKey: "topology.kubernetes.io/zone"
	podAntiAffinity:
	requiredDuringSchedulingIgnoredDuringExecution:
	- labelSelector:
	matchExpressions:
	- key: "app"
	operator: "In"
	values: ["vcluster"]
	- key: "release"
	operator: "In"
	values: ["${VCLUSTER_NAME}"]
	topologyKey: "kubernetes.io/hostname"

	sync:
	toHost:
	podDisruptionBudgets:
	enabled: true
	pods:
	enabled: true
	enforceTolerations:
	- "eng.it/karpenter-role=project-test-project-env:NoSchedule"
	serviceAccounts:
	enabled: true
	ingresses:
	enabled: true
	fromHost:
	nodes:
	enabled: true
	selector:
	labels:
	team: "test-project"
	"eng.it/project": "test-project"
	"eng.it/karpenter-role": "project-test-project-env"

	integrations:
	metricsServer:
	enabled: true
	nodes: true
	pods: true
	EOF

	# Aggiungi repo vcluster
	helm repo add loft-sh https://charts.loft.sh
	helm repo update

	# Installa vcluster
	helm install ${VCLUSTER_NAME} loft-sh/vcluster \
	--namespace ${NAMESPACE} \
	--version 0.31.0 \
	--values /tmp/vcluster-values.yaml \
	--wait \
	--timeout 600s

	log_info "vcluster installato con successo"

	# Attendi che tutti i pod siano ready
	log_info "Attesa ready vcluster..."
	kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s
	kubectl wait --for=condition=ready pod -l app=vcluster-etcd -n ${NAMESPACE} --timeout=300s
	}

	# Step 5: Crea workload di test
	create_test_workload() {
	log_info "Creazione workload di test..."

	# Ottieni kubeconfig del vcluster
	kubectl get secret vc-${VCLUSTER_NAME} -n ${NAMESPACE} -o jsonpath='{.data.config}' \| base64 -d > /tmp/vcluster-kubeconfig.yaml

	# Verifica che il secret esista
	if [ ! -s /tmp/vcluster-kubeconfig.yaml ]; then
	log_error "Impossibile ottenere kubeconfig dal secret vc-${VCLUSTER_NAME}"
	exit 1
	fi

	log_info "Kubeconfig salvata in /tmp/vcluster-kubeconfig.yaml"

	# Modifica il kubeconfig per usare port-forward invece dell'hostname
	# Sostituisci il server con localhost:8443 per il port-forward
	sed -i "s\|server: https://.*\|server: https://localhost:8443\|" /tmp/vcluster-kubeconfig.yaml

	# Avvia port-forward in background
	log_info "Avvio port-forward verso il vcluster..."
	kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 &
	PORT_FORWARD_PID=$!

	# Attendi che il port-forward sia pronto
	log_info "Attesa port-forward (10s)..."
	sleep 10

	# Esporta il kubeconfig modificato
	export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml

	# Crea namespace test-app
	log_info "Creazione namespace test-app..."
	kubectl create namespace test-app 2>/dev/null \|\| log_info "Namespace test-app esiste già"

	# Crea deployment nginx
	log_info "Creazione deployment nginx..."
	kubectl apply -f - <<'DEPLOYMENT'
	apiVersion: apps/v1
	kind: Deployment
	metadata:
	name: nginx-test
	namespace: test-app
	spec:
	replicas: 2
	selector:
	matchLabels:
	app: nginx
	template:
	metadata:
	labels:
	app: nginx
	spec:
	containers:
	- name: nginx
	image: nginx:alpine
	ports:
	- containerPort: 80
	readinessProbe:
	httpGet:
	path: /
	port: 80
	initialDelaySeconds: 5
	periodSeconds: 5
	DEPLOYMENT

	# Crea service nginx
	log_info "Creazione service nginx..."
	kubectl apply -f - <<'SERVICE'
	apiVersion: v1
	kind: Service
	metadata:
	name: nginx
	namespace: test-app
	spec:
	selector:
	app: nginx
	ports:
	- port: 80
	targetPort: 80
	type: ClusterIP
	SERVICE

	log_info "Workload di test creato"

	# Attendi che i pod siano ready
	log_info "Attesa ready pod (30s)..."
	sleep 30

	# Verifica stato
	log_info "Verifica stato workload..."
	kubectl get pods -n test-app

	log_info "Stato endpoint:"
	kubectl get endpoints -n test-app

	# Salva il PID del port-forward per pulizia successiva
	echo ${PORT_FORWARD_PID} > /tmp/vcluster-port-forward.pid
	}

	# Step 6: Simula il problema (elimina syncer + etcd + restart workload test)
	simulate_problem() {
	log_warn "SIMULAZIONE: Eliminazione syncer + etcd + restart workload test..."

	ORIGINAL_KUBECONFIG=${KUBECONFIG:-}
	unset KUBECONFIG

	SYNCER_POD=$(kubectl get pods -n ${NAMESPACE} -l app=vcluster -o jsonpath='{.items[0].metadata.name}')
	ETCD_POD=$(kubectl get pods -n ${NAMESPACE} -l app=vcluster-etcd -o jsonpath='{.items[0].metadata.name}' 2>/dev/null \|\| echo "")

	if [ -f /tmp/vcluster-port-forward.pid ]; then
	OLD_PID=$(cat /tmp/vcluster-port-forward.pid)
	kill ${OLD_PID} 2>/dev/null \|\| true
	sleep 2
	fi

	# STEP 1: Elimina syncer + etcd
	if [ -n "$ETCD_POD" ]; then
	log_info "Modalità DEPLOYED etcd rilevata"
	log_info "Eliminazione pod: ${SYNCER_POD} e ${ETCD_POD}"
	kubectl delete pod ${SYNCER_POD} -n ${NAMESPACE} --grace-period=0 --force &
	kubectl delete pod ${ETCD_POD} -n ${NAMESPACE} --grace-period=0 --force &
	wait
	else
	log_info "Modalità EMBEDDED etcd rilevata"
	log_info "Eliminazione pod: ${SYNCER_POD}"
	kubectl delete pod ${SYNCER_POD} -n ${NAMESPACE} --grace-period=0 --force
	fi

	# STEP 2: Riavvia port-forward PRIMA di eliminare i pod nginx
	log_info "Riavvio port-forward..."
	kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 &
	PORT_FORWARD_PID=$!
	echo ${PORT_FORWARD_PID} > /tmp/vcluster-port-forward.pid
	sleep 10

	# STEP 3: IMMEDIATAMENTE riavvia pod test
	log_info "Riavvio workload test..."
	export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml
	kubectl delete pod -l app=nginx -n test-app --grace-period=0 --force
	unset KUBECONFIG

	# STEP 4: Attendi ripresa vcluster
	log_info "Attesa ripresa syncer + etcd..."
	if [ -n "$ETCD_POD" ]; then
	kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s
	kubectl wait --for=condition=ready pod -l app=vcluster-etcd -n ${NAMESPACE} --timeout=300s
	else
	kubectl wait --for=condition=ready pod -l app=vcluster -n ${NAMESPACE} --timeout=300s
	fi

	# STEP 5: Attesa stabilizzazione prima verifica
	sleep 15

	verify_state
	}

	# Step 7: Verifica lo stato e rileva il problema
	verify_state() {
	log_info "Verifica stato post-simulazione..."

	# Verifica che il kubeconfig del vcluster esista
	if [ ! -f /tmp/vcluster-kubeconfig.yaml ]; then
	log_error "Kubeconfig del vcluster non trovato"
	return 1
	fi

	# Usa il kubeconfig con port-forward
	export KUBECONFIG=/tmp/vcluster-kubeconfig.yaml

	# Verifica che il port-forward sia ancora attivo
	if [ -f /tmp/vcluster-port-forward.pid ]; then
	PORT_FORWARD_PID=$(cat /tmp/vcluster-port-forward.pid)
	if ! kill -0 ${PORT_FORWARD_PID} 2>/dev/null; then
	log_warn "Port-forward non attivo, riavvio..."
	kubectl port-forward -n ${NAMESPACE} service/${VCLUSTER_NAME} 8443:443 &
	NEW_PID=$!
	echo ${NEW_PID} > /tmp/vcluster-port-forward.pid
	sleep 10
	fi
	fi

	# Test di connettività al vcluster
	log_info "Test connettività al vcluster..."
	if ! kubectl get nodes &>/dev/null; then
	log_error "Impossibile connettersi al vcluster"
	return 1
	fi

	# Controlla se ci sono pod con container ready ma Ready condition mancante
	log_info "Controllo pod readiness..."
	kubectl get pods -n test-app -o json 2>/dev/null \| jq -r '
	.items[] \|
	select(.status.containerStatuses[0].ready == true) \|
	select(.status.conditions \| map(select(.type == "Ready")) \| length == 0) \|
	"PROBLEMA RILEVATO: Pod " + .metadata.name + " ha container ready ma Ready condition mancante!"
	' 2>/dev/null \|\| true

	# Controlla endpoints
	log_info "Controllo endpoints..."
	kubectl get endpoints -n test-app -o json 2>/dev/null \| jq -r '
	.items[] \|
	select(.subsets[0].notReadyAddresses) \|
	"PROBLEMA RILEVATO: Endpoint " + .metadata.name + " ha indirizzi in notReadyAddresses: " + (.subsets[0].notReadyAddresses \| map(.ip) \| join(", "))
	' 2>/dev/null \|\| true

	# Stato dettagliato
	log_info "Stato endpoint nginx:"
	kubectl get endpoints nginx -n test-app -o yaml 2>/dev/null \|\| log_warn "Impossibile ottenere endpoint"

	log_info "Stato pod nginx:"
	kubectl get pods -n test-app -o wide 2>/dev/null \|\| log_warn "Impossibile ottenere pod"

	log_info "Condizioni pod nginx:"
	kubectl get pods -n test-app -o json 2>/dev/null \| jq '.items[].status.conditions' \|\| log_warn "Impossibile ottenere condizioni"

	# Test connettività
	log_info "Test connettività servizio..."
	kubectl run test-client --image=curlimages/curl:latest --rm -i --restart=Never -n test-app -- curl -s -m 5 http://nginx 2>&1 \|\| log_error "Test connettività FALLITO!"
	}

	# Step 8: Pulizia
	cleanup() {
	log_warn "Pulizia ambiente..."

	# Ferma il port-forward se attivo
	if [ -f /tmp/vcluster-port-forward.pid ]; then
	PORT_FORWARD_PID=$(cat /tmp/vcluster-port-forward.pid)
	if kill -0 ${PORT_FORWARD_PID} 2>/dev/null; then
	log_info "Arresto port-forward..."
	kill ${PORT_FORWARD_PID} 2>/dev/null \|\| true
	fi
	rm -f /tmp/vcluster-port-forward.pid
	fi

	kind delete cluster --name ${CLUSTER_NAME}
	rm -f /tmp/vcluster-values.yaml /tmp/vcluster-kubeconfig.yaml
	log_info "Pulizia completata"
	}

	# Menu principale
	main() {
	echo "================================"
	echo "vcluster Endpoint Sync Test Tool"
	echo "================================"
	echo ""
	echo "Questo script simula il problema di endpoint sync in locale"
	echo ""

	case "${1:-}" in
	setup)
	create_kind_cluster
	install_cilium
	prepare_environment
	install_vcluster
	create_test_workload
	log_info "Setup completato! Usa './runbook.sh test' per simulare il problema"
	;;
	test)
	simulate_problem
	verify_state
	;;
	cleanup)
	cleanup
	;;
	full)
	create_kind_cluster
	install_cilium
	prepare_environment
	install_vcluster
	create_test_workload
	simulate_problem
	verify_state
	log_warn "Test completato. Usa './runbook.sh cleanup' per eliminare il cluster"
	;;
	*)
	echo "Uso: $0 {setup\|test\|cleanup\|full}"
	echo ""
	echo "Comandi:"
	echo " setup - Crea cluster Kind e installa vcluster"
	echo " test - Simula il problema (richiede setup)"
	echo " cleanup- Elimina cluster Kind"
	echo " full - Esegue setup + test"
	echo ""
	echo "Esempio:"
	echo " $0 full"
	exit 1
	;;
	esac
	}

	main "$@"
No results found