Skip to content

Instantly share code, notes, and snippets.

@clementnuss
Created October 30, 2025 15:10
Show Gist options
  • Select an option

  • Save clementnuss/f292c0b190a987c111ac151232d565fc to your computer and use it in GitHub Desktop.

Select an option

Save clementnuss/f292c0b190a987c111ac151232d565fc to your computer and use it in GitHub Desktop.
Kubernetes lock testing

NFSv3 Lock Testing on Kubernetes

Test NFSv3 file locking with persistent locks across multiple pods using Trident ONTAP-NAS storage.

Quick Start

kubectl apply -k .
kubectl exec -it deployment/nfs-test-nfs-lock-tester -- nfs-lock.sh test myfile.txt

Files

  • kustomization.yaml - Kustomize config with ConfigMap generator
  • pvc.yaml - RWX PVC using 'nfs' storage class (Trident ONTAP-NAS-economy)
  • deployment.yaml - 3 Alpine pods with anti-affinity, working dir /shared
  • nfs-lock.sh - Lock testing script with persistent lock support

Lock Script Usage

# Interactive test mode
nfs-lock.sh test myfile.txt

# Acquire temporary lock (10s timeout)
nfs-lock.sh lock myfile.txt 10

# Acquire persistent lock (survives script exit)
nfs-lock.sh lock myfile.txt forever

# Check lock status using flock
nfs-lock.sh check myfile.txt

# List all locks
nfs-lock.sh list

# Clean up stale locks
nfs-lock.sh cleanup

Testing NFS Lock Recovery

  1. Acquire persistent lock in pod A: nfs-lock.sh lock testfile forever
  2. Verify lock from pod B: nfs-lock.sh check testfile
  3. Reset the node (without giving it any time to release the lock, i.e. not a graceful shutdown but a reset)
  4. Check that until the host reboots, the file is still locked
  5. Check lock recovery from pod B as soon as sm-notify was executed
apiVersion: apps/v1
kind: Deployment
metadata:
name: nfs-lock-tester
spec:
replicas: 3
selector:
matchLabels:
app: nfs-lock-test
template:
metadata:
labels:
app: nfs-lock-test
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- nfs-lock-test
topologyKey: kubernetes.io/hostname
containers:
- name: alpine-tester
image: alpine:latest
command: ["/bin/sh"]
args:
[
"-c",
"apk add --no-cache flock util-linux procps bash && sleep infinity",
]
workingDir: /shared
volumeMounts:
- name: nfs-storage
mountPath: /shared
- name: lock-script
mountPath: /usr/local/bin/nfs-lock.sh
subPath: nfs-lock.sh
env:
- name: SHARED_DIR
value: "/shared"
resources:
requests:
memory: "64Mi"
cpu: "50m"
limits:
memory: "128Mi"
cpu: "100m"
volumes:
- name: nfs-storage
persistentVolumeClaim:
claimName: nfs-shared-storage
- name: lock-script
configMap:
name: nfs-lock-script
defaultMode: 0755
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- pvc.yaml
- deployment.yaml
configMapGenerator:
- name: nfs-lock-script
files:
- nfs-lock.sh
namePrefix: nfs-test-
namespace: default
commonLabels:
app: nfs-lock-test
version: v1.0.0
#!/bin/bash
# NFSv3 File Locking Test Script
# Usage: nfs-lock.sh <command> <file> [timeout]
# Commands: lock, unlock, check, test
set -e
SHARED_DIR="${SHARED_DIR:-/shared}"
LOCK_DIR="$SHARED_DIR/locks"
LOG_FILE="$SHARED_DIR/lock.log"
# Ensure lock directory exists
mkdir -p "$LOCK_DIR"
usage() {
echo "Usage: $0 <command> <file> [timeout]"
echo "Commands:"
echo " lock <file> [timeout] - Acquire exclusive lock on file (default timeout: 10s, use 'forever' or '0' for persistent lock)"
echo " unlock <file> - Release lock on file"
echo " check <file> - Check if file is locked"
echo " test <file> - Interactive test mode"
echo " list - List all active locks"
echo " cleanup - Clean up stale locks"
echo ""
echo "Examples:"
echo " $0 lock myfile.txt 30"
echo " $0 check myfile.txt"
echo " $0 unlock myfile.txt"
exit 1
}
log_message() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$(hostname)] $*" | tee -a "$LOG_FILE"
}
acquire_lock() {
local file="$1"
local timeout="${2:-10}"
local lock_file="$LOCK_DIR/${file}.lock"
local pid_file="$LOCK_DIR/${file}.pid"
if [ "$timeout" = "forever" ] || [ "$timeout" = "0" ]; then
log_message "Attempting to acquire persistent lock on '$file' (no timeout)"
# Create a subshell that will hold the lock even after script exits
(
if flock -x 200; then
echo $$ >"$pid_file"
echo "$(hostname)" >"$lock_file"
log_message "Successfully acquired persistent lock on '$file', background PID: $$"
echo "Persistent lock acquired on '$file'. Background PID: $$, Host: $(hostname)"
echo "Lock will persist until process is killed or system reboot"
# Sleep forever to keep the lock
while true; do
sleep 3600
done
else
log_message "Failed to acquire persistent lock on '$file'"
echo "Failed to acquire persistent lock on '$file'"
exit 1
fi 200>"$lock_file"
) &
local bg_pid=$!
echo "Background lock process started with PID: $bg_pid"
echo "To release lock manually: kill $bg_pid"
else
log_message "Attempting to acquire lock on '$file' (timeout: ${timeout}s)"
# Use flock with timeout
if timeout "$timeout" flock -x 200; then
echo $$ >"$pid_file"
echo "$(hostname)" >"$lock_file"
log_message "Successfully acquired lock on '$file'"
echo "Lock acquired on '$file'. PID: $$, Host: $(hostname)"
# Keep the lock open by reading from stdin
echo "Lock is active. Press ENTER to release lock..."
read -r
release_lock "$file"
else
log_message "Failed to acquire lock on '$file' (timeout or already locked)"
echo "Failed to acquire lock on '$file'"
return 1
fi 200>"$lock_file"
fi
}
release_lock() {
local file="$1"
local lock_file="$LOCK_DIR/${file}.lock"
local pid_file="$LOCK_DIR/${file}.pid"
if [ -f "$lock_file" ]; then
rm -f "$lock_file" "$pid_file"
log_message "Released lock on '$file'"
echo "Lock released on '$file'"
else
echo "No lock found for '$file'"
fi
}
check_lock() {
local file="$1"
local lock_file="$LOCK_DIR/${file}.lock"
local pid_file="$LOCK_DIR/${file}.pid"
# Try to acquire a non-blocking exclusive lock
if flock -n -x 200; then
echo "File '$file' is NOT locked"
return 1
else
# File is locked, try to get metadata
if [ -f "$lock_file" ] && [ -f "$pid_file" ]; then
local lock_host=$(cat "$lock_file" 2>/dev/null || echo "unknown")
local lock_pid=$(cat "$pid_file" 2>/dev/null || echo "unknown")
echo "File '$file' is LOCKED by host '$lock_host', PID '$lock_pid'"
else
echo "File '$file' is LOCKED (no metadata available)"
fi
return 0
fi 200>"$lock_file"
}
list_locks() {
echo "Active locks in $LOCK_DIR:"
if [ -n "$(ls "$LOCK_DIR"/*.lock 2>/dev/null)" ]; then
for lock_file in "$LOCK_DIR"/*.lock; do
if [ -f "$lock_file" ]; then
local base_name=$(basename "$lock_file" .lock)
local lock_host=$(cat "$lock_file" 2>/dev/null || echo "unknown")
local pid_file="$LOCK_DIR/${base_name}.pid"
local lock_pid=$(cat "$pid_file" 2>/dev/null || echo "unknown")
echo " $base_name -> Host: $lock_host, PID: $lock_pid"
fi
done
else
echo " No active locks"
fi
}
cleanup_locks() {
echo "Cleaning up stale locks..."
local cleaned=0
for lock_file in "$LOCK_DIR"/*.lock; do
if [ -f "$lock_file" ]; then
local base_name=$(basename "$lock_file" .lock)
local pid_file="$LOCK_DIR/${base_name}.pid"
local lock_pid=$(cat "$pid_file" 2>/dev/null || echo "")
if [ -n "$lock_pid" ] && ! kill -0 "$lock_pid" 2>/dev/null; then
rm -f "$lock_file" "$pid_file"
echo " Removed stale lock for '$base_name' (PID $lock_pid no longer exists)"
cleaned=$((cleaned + 1))
fi
fi
done 2>/dev/null
echo "Cleaned up $cleaned stale locks"
}
interactive_test() {
local file="$1"
echo "=== NFS Lock Interactive Test ==="
echo "File: $file"
echo "Host: $(hostname)"
echo "PID: $$"
echo ""
while true; do
echo "Choose an action:"
echo "1) Check lock status"
echo "2) Acquire lock (10s timeout)"
echo "3) Acquire lock (30s timeout)"
echo "4) Acquire persistent lock (forever)"
echo "5) Release lock"
echo "6) List all locks"
echo "7) View lock log"
echo "8) Exit"
echo -n "Enter choice [1-8]: "
read -r choice
case $choice in
1) check_lock "$file" || true ;;
2) acquire_lock "$file" 10 ;;
3) acquire_lock "$file" 30 ;;
4) acquire_lock "$file" forever ;;
5) release_lock "$file" ;;
6) list_locks ;;
7)
echo "=== Lock Log ==="
tail -20 "$LOG_FILE" 2>/dev/null || echo "No log file found"
;;
8)
echo "Exiting..."
break
;;
*) echo "Invalid choice" ;;
esac
echo ""
done
}
# Main script logic
if [ $# -lt 1 ]; then
usage
fi
command="$1"
file="$2"
timeout="$3"
case "$command" in
"lock")
if [ -z "$file" ]; then
echo "Error: file name required"
usage
fi
acquire_lock "$file" "$timeout"
;;
"unlock")
if [ -z "$file" ]; then
echo "Error: file name required"
usage
fi
release_lock "$file"
;;
"check")
if [ -z "$file" ]; then
echo "Error: file name required"
usage
fi
check_lock "$file"
;;
"test")
if [ -z "$file" ]; then
echo "Error: file name required"
usage
fi
interactive_test "$file"
;;
"list")
list_locks
;;
"cleanup")
cleanup_locks
;;
*)
echo "Error: unknown command '$command'"
usage
;;
esac
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: nfs-shared-storage
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
storageClassName: nfs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment