🐳 Docker Learning Path: From Beginner to Advanced
Fundamentals
Core Concepts
Docker in Practice
Networking
Storage
Docker Compose
Security
Advanced Topics
Production Ready
Beyond Docker
Containerization Platform : Packages applications and dependencies into portable containers
Lightweight Alternative to Virtual Machines
Build Once, Run Anywhere : Consistent environments across development, testing, and production
✅ Consistency across environments
✅ Isolation of applications
✅ Resource efficiency (vs VMs)
✅ Rapid deployment and scaling
✅ Version control for environments
# Check installation
docker --version
docker-compose --version
docker run hello-world
# Information
docker version
docker info
docker system df # Disk usage
# Lifecycle
docker run < image>
docker start < container>
docker stop < container>
docker restart < container>
docker rm < container>
docker rmi < image>
# Monitoring
docker ps # Running containers
docker ps -a # All containers
docker logs < container>
docker stats # Real-time stats
Client (CLI) → Docker Host (Docker Daemon) → Images/Containers
↓
Registry (Docker Hub)
Images - Blueprint/template (read-only)
Containers - Running instance of an image
Registries - Storage and distribution of images
# Base image
FROM ubuntu:20.04
# Metadata
LABEL maintainer="your@email.com"
# Set working directory
WORKDIR /app
# Copy files
COPY . .
# Install dependencies
RUN apt-get update && apt-get install -y python3
# Set environment variables
ENV PORT=8080
# Expose port
EXPOSE 8080
# Command to run
CMD ["python3" , "app.py" ]
# Build image
docker build -t myapp:1.0 .
# Tag image
docker tag myapp:1.0 myregistry.com/myapp:1.0
# Push to registry
docker push myregistry.com/myapp:1.0
# Pull from registry
docker pull ubuntu:latest
# List images
docker images
docker image ls
# Remove images
docker image rm < image_id>
docker image prune # Remove unused images
# Run with options
docker run -d --name mycontainer -p 80:80 nginx
# -d: detached mode
# -p: port mapping (host:container)
# --name: container name
# Execute commands in running container
docker exec -it mycontainer bash
# -it: interactive terminal
# Copy files
docker cp file.txt mycontainer:/path/
docker cp mycontainer:/path/file.txt ./
# Environment variables
docker run -e " ENV_VAR=value" image
# Volume mounting
docker run -v /host/path:/container/path image
# Network configuration
docker run --network=mynetwork image
# Resource limits
docker run --memory=" 512m" --cpus=" 1.5" image
# List networks
docker network ls
# Create network
docker network create mynetwork
# Connect container to network
docker network connect mynetwork mycontainer
# Inspect network
docker network inspect mynetwork
bridge (default): Isolated containers on same host
host : Remove isolation between container and host
overlay : Connect multiple Docker daemons (Swarm)
macvlan : Assign MAC addresses to containers
none : No networking
# Single port
docker run -p 8080:80 nginx
# Range of ports
docker run -p 8080-8085:80-85 nginx
# Random host port
docker run -p 80 nginx
# Check with: docker port <container>
# Volumes (managed by Docker)
docker volume create myvolume
docker run -v myvolume:/data image
# Bind mounts (host filesystem)
docker run -v /host/path:/container/path image
# tmpfs mounts (in-memory)
docker run --tmpfs /app image
# List volumes
docker volume ls
# Inspect volume
docker volume inspect myvolume
# Remove unused volumes
docker volume prune
# Backup volume
docker run --rm -v myvolume:/data -v $( pwd) :/backup ubuntu tar czf /backup/backup.tar.gz /data
docker-compose.yml Structure
version : ' 3.8'
services :
web :
build : .
ports :
- " 8000:8000"
volumes :
- .:/code
environment :
- DEBUG=1
depends_on :
- db
- redis
db :
image : postgres:13
environment :
POSTGRES_PASSWORD : secret
redis :
image : redis:alpine
volumes :
postgres_data :
networks :
app-network :
driver : bridge
# Start services
docker-compose up
docker-compose up -d # detached mode
# Build and start
docker-compose up --build
# Stop services
docker-compose down
docker-compose down -v # remove volumes too
# View logs
docker-compose logs
docker-compose logs -f # follow
# Execute commands
docker-compose exec web bash
# Scale services
docker-compose up --scale web=3
# 1. Use official base images
FROM node:16-alpine
# 2. Don't run as root
RUN groupadd -r nodejs && useradd -r -g nodejs nodejs
USER nodejs
# 3. Scan for vulnerabilities
# docker scan <image>
# 4. Use .dockerignore
# .dockerignore file:
node_modules
.git
*.log
# Scan image vulnerabilities
docker scan nginx:latest
# Run container with security options
docker run --read-only \
--security-opt no-new-privileges \
--cap-drop ALL \
--cap-add NET_BIND_SERVICE \
nginx
# Content trust
export DOCKER_CONTENT_TRUST=1
# Build stage
FROM golang:1.19 AS builder
WORKDIR /app
COPY . .
RUN go build -o main .
# Final stage
FROM alpine:latest
WORKDIR /root/
COPY --from=builder /app/main .
CMD ["./main" ]
# Enable BuildKit
export DOCKER_BUILDKIT=1
# Build with secrets (not in image history)
RUN --mount=type=secret,id=mysecret cat /run/secrets/mysecret
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD curl -f http://localhost/ || exit 1
# Run with resource limits
docker run \
--memory=" 512m" \
--memory-swap=" 1g" \
--cpus=" 1.5" \
--cpu-shares=" 1024" \
nginx
Docker Swarm (Orchestration)
# Initialize swarm
docker swarm init
# Deploy stack
docker stack deploy -c docker-compose.yml mystack
# Services
docker service ls
docker service scale web=5
# Update service
docker service update --image myapp:2.0 web
# Container metrics
docker stats
# Events
docker events
# Logging drivers
docker run --log-driver=syslog nginx
# pod.yaml
apiVersion : v1
kind : Pod
metadata :
name : myapp-pod
spec :
containers :
- name : myapp
image : myapp:1.0
ports :
- containerPort : 80
# GitHub Actions example
name : Build and Push
on : [push]
jobs :
build :
runs-on : ubuntu-latest
steps :
- uses : actions/checkout@v2
- name : Build Docker image
run : docker build -t myapp .
- name : Push to Registry
run : docker push myregistry/myapp
# Cleanup
docker system prune -a # Remove all unused data
# View container details
docker inspect < container>
# Save/Load images
docker save -o myimage.tar myimage:tag
docker load -i myimage.tar
# Docker Hub login
docker login
docker logout
# See container processes
docker top < container>
# Check container changes
docker diff < container>
# View resource usage
docker stats < container>
# Get real-time events
docker events --since ' 2024-01-01'
Web App : Nginx + Node.js + PostgreSQL
Microservices : 3-tier application
CI/CD Pipeline : GitHub Actions + Docker
Monitoring Stack : Prometheus + Grafana
Docker Certification (DCA)
Kubernetes (CKA)
Cloud Platforms : AWS ECS, Google GKE, Azure AKS
Service Mesh : Istio, Linkerd
Level
Skills
Status
Beginner
Basic commands, Dockerfile
☑️
Intermediate
Compose, Networking, Volumes
☐
Advanced
Security, Multi-stage, Swarm
☐
Expert
Production, Kubernetes, CI/CD
☐
🐳 Intermediate Docker - Deep Dive Guide
📚 Intermediate Concepts Roadmap
Advanced Dockerfile Techniques
Networking Deep Dive
Storage Strategies
Docker Compose Mastery
Multi-Container Applications
Image Optimization
Docker Registry Management
Development Workflows
🏗️ ADVANCED DOCKERFILE TECHNIQUES
Multi-Stage Builds in Detail
# Stage 1: Builder - includes all build tools
FROM node:16 AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production
# Stage 2: Compiler/Transpiler
FROM builder AS compiler
COPY . .
RUN npm run build
# Stage 3: Production - minimal image
FROM node:16-alpine AS production
WORKDIR /app
# Copy only necessary files from previous stages
COPY --from=builder /app/node_modules ./node_modules
COPY --from=compiler /app/dist ./dist
COPY package.json .
# Non-root user for security
RUN addgroup -g 1001 -S nodejs && \
adduser -S nodejs -u 1001
USER nodejs
EXPOSE 3000
CMD ["node" , "dist/server.js" ]
Build Arguments and Environment Variables
# ARG for build-time only
ARG NODE_VERSION=16
FROM node:${NODE_VERSION}-alpine
# Build-time ARG can be passed as ENV at runtime
ARG BUILD_VERSION
ENV APP_VERSION=${BUILD_VERSION}
# Build with:
# docker build --build-arg NODE_VERSION=18 --build-arg BUILD_VERSION=2.0.0 .
.dockerignore Best Practices
# Essential .dockerignore
**/node_modules
**/.git
**/.DS_Store
**/npm-debug.log
**/yarn-error.log
# Build artifacts
**/dist
**/build
**/*.log
**/*.tmp
# Environment files (except production)
**/.env*
!**/.env.production
# IDE files
**/.vscode
**/.idea
**/*.swp
**/*.swo
Health Checks Implementation
# Different health check strategies
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
# HTTP check
CMD curl -f http://localhost/health || exit 1
# Or script-based
CMD-SHELL "scripts/healthcheck.sh"
# Or TCP check
CMD nc -z localhost 8080 || exit 1
# Create custom bridge with options
docker network create \
--driver bridge \
--subnet=172.20.0.0/16 \
--ip-range=172.20.5.0/24 \
--gateway=172.20.5.254 \
my-custom-network
# Attach container with specific IP
docker run -d \
--name myapp \
--network my-custom-network \
--ip 172.20.5.10 \
nginx:alpine
# Connect existing container
docker network connect my-custom-network existing-container
# Multiple containers with same network alias (load balancing)
docker run -d --name web1 --network mynet --network-alias app nginx
docker run -d --name web2 --network mynet --network-alias app nginx
docker run -d --name web3 --network mynet --network-alias app nginx
# Test DNS resolution from another container
docker run --rm --network mynet alpine nslookup app
# Returns all three IPs
# Inspect container network
docker inspect < container> | grep -A 20 " NetworkSettings"
# Network connectivity test
docker run --rm --network mynet appropriate/curl curl http://app:80
# View iptables rules (for bridge networks)
sudo iptables -t nat -L -n
sudo iptables -L DOCKER-USER
# Capture container traffic
docker run --rm -it --net container:< container_name> nicolaka/netshoot tcpdump -i any port 80
Macvlan Networking (Direct Host Network Access)
# Create macvlan network
docker network create -d macvlan \
--subnet=192.168.1.0/24 \
--gateway=192.168.1.1 \
--ip-range=192.168.1.192/27 \
-o parent=eth0 \
my-macvlan-net
# Run container with macvlan
docker run -d \
--name macvlan-container \
--network my-macvlan-net \
--ip=192.168.1.200 \
nginx
# Container now has its own MAC and IP on physical network
Volume Drivers and Plugins
# List available volume drivers
docker volume create --driver local \
--opt type=none \
--opt o=bind \
--opt device=/path/on/host \
named_volume
# NFS volume example
docker volume create --driver local \
--opt type=nfs \
--opt o=addr=192.168.1.100,rw \
--opt device=:/path/on/nfs \
nfs_volume
Volume Backup and Migration
# Backup volume to tar
docker run --rm \
-v app_data:/volume_data \
-v $( pwd) :/backup \
busybox \
tar czf /backup/backup_$( date +%Y%m%d) .tar.gz -C /volume_data .
# Restore volume from tar
docker run --rm \
-v app_data:/volume_data \
-v $( pwd) :/backup \
busybox \
sh -c " cd /volume_data && tar xzf /backup/backup.tar.gz --strip 1"
# Volume migration between hosts
# 1. Backup on source
docker run --rm -v app_data:/data -v $( pwd) :/backup alpine tar cf /backup/data.tar -C /data .
# 2. Copy tar to new host
# 3. Restore on destination
docker run --rm -v app_data_new:/data -v $( pwd) :/backup alpine tar xf /backup/data.tar -C /data
# In-memory storage (not persisted)
docker run -d \
--name tmpfs-app \
--tmpfs /app/temp:rw,noexec,nosuid,size=100m \
--tmpfs /app/cache:rw,noexec,nosuid,size=50m \
nginx
# In docker-compose
services:
app:
tmpfs:
- /tmp:rw,noexec,nosuid,size=100m
Named Volumes with Labels
# Create labeled volumes for organization
docker volume create \
--label project=myapp \
--label environment=production \
--label type=database \
postgres_data
# List volumes by label
docker volume ls --filter label=project=myapp
# Backup all volumes with specific label
docker run --rm \
-v $( pwd) :/backup \
-v postgres_data:/data \
busybox tar czf /backup/postgres_backup.tar.gz -C /data .
Advanced docker-compose.yml Features
version : ' 3.8'
# Environment variable interpolation
x-common : &common
environment :
- TZ=America/New_York
- LOG_LEVEL=${LOG_LEVEL:-info}
logging :
driver : json-file
options :
max-size : " 10m"
max-file : " 3"
services :
web :
<< : *common # YAML anchor reference
build :
context : .
target : development # Multi-stage target
args :
NODE_ENV : development
ports :
- " ${HOST_PORT:-8080}:80"
volumes :
- type : bind
source : ./app
target : /app
consistency : cached
- type : volume
source : app-data
target : /data
depends_on :
db :
condition : service_healthy # Wait for health check
redis :
condition : service_started
deploy : # Swarm mode configurations
resources :
limits :
memory : 500M
reservations :
memory : 200M
restart_policy :
condition : on-failure
delay : 5s
max_attempts : 3
window : 120s
db :
image : postgres:14
environment :
POSTGRES_PASSWORD_FILE : /run/secrets/db_password
secrets :
- db_password
volumes :
- postgres_data:/var/lib/postgresql/data
healthcheck :
test : ["CMD-SHELL", "pg_isready -U postgres"]
interval : 10s
timeout : 5s
retries : 5
start_period : 30s
redis :
image : redis:7-alpine
command : redis-server --appendonly yes
volumes :
- redis_data:/data
# Development tools (only for dev)
adminer :
profiles : ["dev"] # Conditional service
image : adminer
ports :
- " 8081:8080"
volumes :
app-data :
driver : local
driver_opts :
type : none
o : bind
device : ./data
postgres_data :
redis_data :
secrets :
db_password :
file : ./secrets/db_password.txt
networks :
default :
name : app-network
driver : bridge
ipam :
config :
- subnet : 172.20.0.0/16
Compose Override Patterns
# docker-compose.override.yml (development)
services :
web :
volumes :
- ./src:/app/src:ro # Mount source code
- ./config:/app/config
environment :
- NODE_ENV=development
- DEBUG=true
ports :
- " 9229:9229" # Debug port
db :
ports :
- " 5432:5432" # Expose DB for local tools
# docker-compose.prod.yml (production)
services :
web :
restart : always
deploy :
mode : replicated
replicas : 3
environment :
- NODE_ENV=production
logging :
driver : " json-file"
options :
max-size : " 100m"
max-file : " 3"
# Run with specific files
docker-compose -f docker-compose.yml -f docker-compose.prod.yml up
# Profile management
docker-compose --profile dev up # Start services with 'dev' profile
# Run one-off commands
docker-compose run --rm web npm test
docker-compose exec db psql -U postgres
# Configuration validation
docker-compose config # Validate and view final config
docker-compose config --services # List services
docker-compose config --volumes # List volumes
# Build specific services
docker-compose build --no-cache web
docker-compose build --parallel # Build multiple in parallel
# Environment variable files
# .env file at project root:
COMPOSE_PROJECT_NAME=myapp
HOST_PORT=3000
# Use different .env file
docker-compose --env-file .env.production up
🏗️ MULTI-CONTAINER APPLICATIONS
Full-Stack Application Example
# docker-compose.fullstack.yml
version : ' 3.8'
services :
# Frontend (React)
frontend :
build :
context : ./frontend
dockerfile : Dockerfile.dev
ports :
- " 3000:3000"
volumes :
- ./frontend/src:/app/src
- frontend_node_modules:/app/node_modules
environment :
- REACT_APP_API_URL=http://api:5000
depends_on :
- api
develop : # Development configuration
watch :
- action : sync
path : ./frontend/src
target : /app/src
- action : rebuild
path : ./frontend/package.json
# Backend API (Node.js)
api :
build : ./backend
ports :
- " 5000:5000"
volumes :
- ./backend:/app
- api_node_modules:/app/node_modules
environment :
- DATABASE_URL=postgresql://user:pass@db:5432/appdb
- REDIS_URL=redis://redis:6379
depends_on :
db :
condition : service_healthy
redis :
condition : service_started
healthcheck :
test : ["CMD", "curl", "-f", "http://localhost:5000/health"]
interval : 30s
timeout : 10s
retries : 3
# Database (PostgreSQL)
db :
image : postgres:14-alpine
environment :
POSTGRES_USER : user
POSTGRES_PASSWORD : pass
POSTGRES_DB : appdb
volumes :
- postgres_data:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
healthcheck :
test : ["CMD-SHELL", "pg_isready -U user -d appdb"]
interval : 10s
timeout : 5s
retries : 5
# Cache (Redis)
redis :
image : redis:7-alpine
command : redis-server --requirepass pass
volumes :
- redis_data:/data
healthcheck :
test : ["CMD", "redis-cli", "--raw", "incr", "ping"]
interval : 10s
timeout : 5s
retries : 5
# Reverse Proxy (Nginx)
proxy :
image : nginx:alpine
ports :
- " 80:80"
- " 443:443"
volumes :
- ./nginx.conf:/etc/nginx/nginx.conf:ro
- ./ssl:/etc/nginx/ssl:ro
depends_on :
- frontend
- api
# Monitoring (Prometheus + Grafana)
prometheus :
image : prom/prometheus:latest
ports :
- " 9090:9090"
volumes :
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus_data:/prometheus
command :
- ' --config.file=/etc/prometheus/prometheus.yml'
- ' --storage.tsdb.path=/prometheus'
grafana :
image : grafana/grafana:latest
ports :
- " 3001:3000"
environment :
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes :
- grafana_data:/var/lib/grafana
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards
volumes :
postgres_data :
redis_data :
prometheus_data :
grafana_data :
frontend_node_modules :
api_node_modules :
networks :
default :
driver : bridge
ipam :
config :
- subnet : 172.22.0.0/16
Service Communication Patterns
# Inter-service communication
# 1. Direct container-to-container (same network)
curl http://api:5000/endpoint
# 2. Through reverse proxy
curl http://proxy/api/endpoint
# 3. Using service discovery
# Install dockerize for waiting
services:
api:
image: your-api
depends_on:
- db
command: >
sh -c "
/usr/local/bin/dockerize -wait tcp://db:5432 -timeout 30s &&
npm start
"
# Bad - changes invalidate cache
FROM node:16
WORKDIR /app
COPY . . # Changes frequently
RUN npm install # Runs every time
# Good - leverage cache
FROM node:16
WORKDIR /app
# Copy package files first
COPY package*.json ./
RUN npm ci --only=production # Cached unless packages change
# Copy rest of files
COPY . .
Size Reduction Techniques
# 1. Use Alpine/minimal base images
FROM node:16-alpine # ~110MB vs ~900MB
# 2. Clean up in same RUN layer
RUN apt-get update && apt-get install -y \
build-essential \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# 3. Remove unnecessary files
RUN npm install && \
npm cache clean --force && \
rm -rf /tmp/*
# 4. Use multi-stage builds (as shown earlier)
Docker Squash (Experimental)
# Install docker-squash
pip install docker-squash
# Squash image layers
docker-squash -t myapp:squashed myapp:latest
# Build with --squash (requires experimental features)
DOCKER_BUILDKIT=1 docker build --squash -t myapp:squashed .
# Dive - image layer analysis
docker run --rm -it \
-v /var/run/docker.sock:/var/run/docker.sock \
wagoodman/dive:latest myimage:tag
# Docker Slim
docker-slim build --target myimage:tag --http-probe
# Skopeo for registry operations
skopeo inspect docker://nginx:latest
📦 DOCKER REGISTRY MANAGEMENT
# Run local registry
docker run -d \
-p 5000:5000 \
--name registry \
-v registry_data:/var/lib/registry \
registry:2
# Tag and push to local registry
docker tag myapp:latest localhost:5000/myapp:latest
docker push localhost:5000/myapp:latest
# Pull from local registry
docker pull localhost:5000/myapp:latest
# Secure registry with TLS
docker run -d \
-p 5000:5000 \
--name registry \
-v /certs:/certs \
-e REGISTRY_HTTP_TLS_CERTIFICATE=/certs/domain.crt \
-e REGISTRY_HTTP_TLS_KEY=/certs/domain.key \
registry:2
# List tags in registry
curl -X GET https://myregistry.com/v2/myapp/tags/list
# Delete specific tag
curl -X DELETE https://myregistry.com/v2/myapp/manifests/sha256:...
# Use registry garbage collection
docker exec registry bin/registry garbage-collect /etc/docker/registry/config.yml
# Harbor (Enterprise registry) cleanup
# Use Harbor UI or API for retention policies
# Enable Docker Content Trust
export DOCKER_CONTENT_TRUST=1
# Push signed image
docker push myregistry.com/myapp:1.0
# Pull with verification
docker pull myregistry.com/myapp:1.0
# View trust data
docker trust inspect myregistry.com/myapp:1.0
# docker-compose.dev.yml
services :
api :
build :
context : .
target : development
volumes :
- ./src:/app/src
- ./config:/app/config
environment :
- NODE_ENV=development
- DEBUG=*
command : npm run dev # Auto-restart on changes
ports :
- " 9229:9229" # Debug port
develop :
watch :
- action : sync+restart
path : ./src
target : /app/src
# Interactive debugging
docker run -it --rm --name debug \
--cap-add=SYS_PTRACE \
--security-opt seccomp=unconfined \
myapp:latest sh
# Debug running container
docker exec -it < container> bash
docker exec -it < container> sh -c " cat /proc/1/status"
# Network debugging container
docker run --rm -it \
--net container:< target_container> \
nicolaka/netshoot # Network troubleshooting toolbox
# Log aggregation
docker logs --tail 100 -f < container>
docker-compose logs -f --tail=50
# docker-compose.test.yml
services :
test :
build :
context : .
target : test
environment :
- NODE_ENV=test
- DATABASE_URL=postgresql://test:test@test-db:5432/test
depends_on :
test-db :
condition : service_healthy
volumes :
- ./coverage:/app/coverage
test-db :
image : postgres:14-alpine
environment :
POSTGRES_USER : test
POSTGRES_PASSWORD : test
POSTGRES_DB : test
healthcheck :
test : ["CMD-SHELL", "pg_isready -U test"]
# Run tests
docker-compose -f docker-compose.test.yml run --rm test npm test
docker-compose -f docker-compose.test.yml run --rm test npm run test:e2e
# Integration test
docker-compose -f docker-compose.yml -f docker-compose.test.yml up -d
docker-compose -f docker-compose.yml -f docker-compose.test.yml run test
CI/CD Pipeline Integration
# .gitlab-ci.yml example
stages :
- test
- build
- deploy
test :
stage : test
image : docker:20.10
services :
- docker:20.10-dind
script :
- docker-compose -f docker-compose.test.yml run --rm test
build :
stage : build
script :
- docker build -t $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA .
- docker push $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
deploy :
stage : deploy
script :
- docker stack deploy -c docker-compose.prod.yml myapp
📊 MONITORING AND MAINTENANCE
# Real-time stats
docker stats --format " table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}"
# Resource usage history
docker run -d \
--name=cadvisor \
--volume=/:/rootfs:ro \
--volume=/var/run:/var/run:ro \
--volume=/sys:/sys:ro \
--volume=/var/lib/docker/:/var/lib/docker:ro \
--publish=8080:8080 \
google/cadvisor:latest
# Prometheus metrics endpoint
docker run -d \
--name=node-exporter \
--net=" host" \
--pid=" host" \
-v " /:/host:ro,rslave" \
quay.io/prometheus/node-exporter:latest \
--path.rootfs=/host
# Log drivers
docker run -d \
--log-driver=syslog \
--log-opt syslog-address=udp://logs.example.com:514 \
nginx
# JSON logging with rotation
docker run -d \
--log-driver=json-file \
--log-opt max-size=10m \
--log-opt max-file=3 \
nginx
# Centralized logging with ELK
# docker-compose.logging.yml
services:
elasticsearch:
image: elasticsearch:8.5
environment:
- discovery.type=single-node
logstash:
image: logstash:8.5
volumes:
- ./logstash.conf:/usr/share/logstash/pipeline/logstash.conf
kibana:
image: kibana:8.5
ports:
- " 5601:5601"
# Cleanup script
#! /bin/bash
# remove stopped containers
docker container prune -f
# remove unused images
docker image prune -f
# remove unused volumes
docker volume prune -f
# remove unused networks
docker network prune -f
# remove build cache
docker builder prune -f
# system-wide cleanup
docker system prune -af
# Remove images older than 7 days
docker image prune -a --filter " until=168h"
# Keep only latest 5 tags
docker images | grep myapp | tail -n +6 | awk ' {print $3}' | xargs docker rmi
Common Issues and Solutions
# 1. Port already in use
# Check what's using the port
sudo lsof -i :8080
sudo netstat -tulpn | grep :8080
# 2. Container won't start
# Check logs
docker logs < container>
# Last error
docker inspect < container> --format=' {{.State.Error}}'
# 3. Permission issues with volumes
# Run as specific user
docker run -u 1000:1000 -v $( pwd) :/app nginx
# 4. Out of memory
# Check memory limits
docker inspect < container> --format=' {{.HostConfig.Memory}}'
# Increase Docker Desktop resources or add swap
# 5. Network connectivity issues
# Test DNS
docker run --rm busybox nslookup google.com
# Test network from container
docker run --rm --network container:< target> appropriate/curl curl http://service:port
# 6. Slow builds
# Use BuildKit
export DOCKER_BUILDKIT=1
# Use cache mounts
docker build --build-arg BUILDKIT_INLINE_CACHE=1 -t myapp .
Three-tier web application (Frontend + API + Database)
Microservices architecture with service discovery
CI/CD pipeline with automated testing
Monitoring stack with metrics and logging
Development environment with live reload
Intermediate Topic
Status
Notes
Advanced Dockerfile
☐
Multi-stage, ARG, healthchecks
Networking
☐
Custom networks, DNS, macvlan
Storage
☐
Volume drivers, backup strategies
Docker Compose
☐
Overrides, profiles, advanced config
Multi-container apps
☐
Full-stack setup, service communication
Image optimization
☐
Size reduction, layer caching
Registry management
☐
Private registry, image signing
Development workflows
☐
Hot reload, debugging, testing
Monitoring
☐
Metrics, logging, maintenance
🐳 Docker Intermediate - Complete Guide (Part 2)
📚 Remaining Intermediate Topics
Container Orchestration Basics
Security Hardening
Performance Optimization
Docker in Production
Docker API and SDK
Custom Docker Tools
Docker with Cloud Providers
🎪 CONTAINER ORCHESTRATION BASICS
Docker Swarm Fundamentals
# Initialize Swarm (Manager node)
docker swarm init --advertise-addr < MANAGER-IP>
# Output provides token for worker nodes to join
# Join as worker
docker swarm join --token < WORKER-TOKEN> < MANAGER-IP> :2377
# Join as manager
docker swarm join-token manager
docker swarm join --token < MANAGER-TOKEN> < MANAGER-IP> :2377
# View nodes
docker node ls
docker node inspect < NODE-ID>
# Promote/demote nodes
docker node promote < NODE-ID>
docker node demote < NODE-ID>
# Leave swarm
docker swarm leave --force # Manager
docker swarm leave # Worker
# Create a service
docker service create \
--name web \
--replicas 3 \
--publish published=80,target=80 \
--mount type=volume,source=web_data,target=/usr/share/nginx/html \
--constraint ' node.role==worker' \
--restart-condition on-failure \
--restart-delay 5s \
--update-delay 10s \
--update-parallelism 2 \
nginx:alpine
# Service management
docker service ls
docker service ps web
docker service logs web --follow
docker service inspect web --pretty
# Scale service
docker service scale web=5
# Update service
docker service update \
--image nginx:latest \
--update-parallelism 1 \
--update-delay 30s \
web
# Rollback update
docker service rollback web
# Remove service
docker service rm web
Swarm Stacks (docker-compose for Swarm)
# docker-compose.swarm.yml
version : ' 3.8'
services :
web :
image : nginx:alpine
deploy :
mode : replicated
replicas : 3
placement :
constraints :
- node.role == worker
update_config :
parallelism : 2
delay : 10s
order : start-first
failure_action : rollback
rollback_config :
parallelism : 1
delay : 5s
order : stop-first
restart_policy :
condition : on-failure
delay : 5s
max_attempts : 3
window : 120s
resources :
limits :
memory : 256M
reservations :
memory : 128M
labels :
- " com.example.description=Web server"
ports :
- target : 80
published : 8080
protocol : tcp
mode : ingress
volumes :
- type : volume
source : web_data
target : /usr/share/nginx/html
read_only : true
networks :
- webnet
configs :
- source : nginx_config
target : /etc/nginx/nginx.conf
redis :
image : redis:alpine
deploy :
mode : global # One per node
placement :
constraints :
- node.labels.redis == true
volumes :
- redis_data:/data
command : redis-server --appendonly yes
networks :
- webnet
visualizer :
image : dockersamples/visualizer:stable
deploy :
placement :
constraints : [node.role == manager]
ports :
- " 9000:8080"
volumes :
- " /var/run/docker.sock:/var/run/docker.sock"
networks :
- webnet
volumes :
web_data :
driver : local
redis_data :
driver : local
configs :
nginx_config :
file : ./nginx.conf
networks :
webnet :
driver : overlay
attachable : true
driver_opts :
encrypted : " true"
# Deploy stack
docker stack deploy -c docker-compose.swarm.yml myapp
# Stack commands
docker stack ls
docker stack ps myapp
docker stack services myapp
docker stack rm myapp
# Deploy with secrets
echo " mysecretpassword" | docker secret create db_password -
docker stack deploy -c docker-compose.swarm.yml myapp
# Overlay networks (span across nodes)
docker network create -d overlay \
--attachable \
--subnet=10.0.9.0/24 \
my-overlay-net
# Routing Mesh
# Ingress network automatically load balances across nodes
# External traffic → Any node → Target container
# DNS-based service discovery
# Services accessible by name within the overlay network
docker exec < container> nslookup web
# Returns VIP (Virtual IP) that load balances to service replicas
# In docker-compose.swarm.yml
secrets :
db_password :
external : true
api_key :
file : ./secrets/api_key.txt
ssl_cert :
external : true
services :
db :
image : postgres
secrets :
- db_password
environment :
POSTGRES_PASSWORD_FILE : /run/secrets/db_password
# Create secrets
echo " supersecret" | docker secret create db_password -
cat cert.pem | docker secret create ssl_cert -
docker secret create api_key ./api_key.txt
# List secrets
docker secret ls
docker secret inspect db_password
# Mount secrets in containers
# Automatically mounted at: /run/secrets/<secret_name>
# File permissions: 0444 (read-only)
# Only available to swarm services, not regular containers
# 1. Use trusted base images
FROM debian:bullseye-slim
# 2. Non-root user
RUN groupadd -r appuser && useradd -r -g appuser appuser
USER appuser
# 3. Principle of least privilege
RUN apt-get update && apt-get install -y \
--no-install-recommends \ # Only necessary packages
python3 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# 4. Scan for vulnerabilities (in CI/CD)
# Use: docker scan myimage:tag
# 5. Sign images with Docker Content Trust
Secure Container Runtime Options
# Run with security constraints
docker run -d \
--name secure-app \
--read-only \ # Read-only root filesystem
--security-opt=no-new-privileges \ # No privilege escalation
--cap-drop ALL \ # Drop all capabilities
--cap-add NET_BIND_SERVICE \ # Add only necessary capabilities
--pids-limit 100 \ # Limit number of processes
--ulimit nofile=1024:1024 \ # File descriptor limits
--tmpfs /tmp:rw,noexec,nosuid,size=100m \ # Secure tmp
--device-read-bps /dev/sda:1mb \ # I/O limits
--memory=" 512m" \
--memory-swap=" 1g" \
--cpus=" 1.5" \
nginx:alpine
# User namespace remapping (daemon.json)
# Maps container root to non-root host user
{
" userns-remap" : " default"
}
# Seccomp profiles
docker run --security-opt seccomp=./custom-seccomp.json nginx
// custom-seccomp.json
{
"defaultAction" : " SCMP_ACT_ERRNO" ,
"architectures" : [
" SCMP_ARCH_X86_64" ,
" SCMP_ARCH_X86" ,
" SCMP_ARCH_X32"
],
"syscalls" : [
{
"names" : [
" accept" ,
" accept4" ,
" access" ,
" arch_prctl" ,
" bind" ,
" brk" ,
" capget" ,
" capset" ,
" chdir" ,
" clock_gettime" ,
" clone" ,
" close" ,
" connect" ,
" dup" ,
" dup2" ,
" epoll_ctl" ,
" epoll_pwait" ,
" epoll_wait" ,
" execve" ,
" exit" ,
" exit_group" ,
" faccessat" ,
" fchdir" ,
" fchmod" ,
" fchown" ,
" fcntl" ,
" fdatasync" ,
" fgetxattr" ,
" flistxattr" ,
" fstat" ,
" fsync" ,
" ftruncate" ,
" futex" ,
" getcwd" ,
" getdents" ,
" getegid" ,
" geteuid" ,
" getgid" ,
" getgroups" ,
" getpeername" ,
" getpgid" ,
" getpgrp" ,
" getpid" ,
" getppid" ,
" getpriority" ,
" getrandom" ,
" getresgid" ,
" getresuid" ,
" getrlimit" ,
" getrusage" ,
" getsockname" ,
" getsockopt" ,
" gettid" ,
" gettimeofday" ,
" getuid" ,
" getxattr" ,
" inotify_add_watch" ,
" inotify_init1" ,
" inotify_rm_watch" ,
" ioctl" ,
" kill" ,
" listen" ,
" lseek" ,
" lstat" ,
" mkdir" ,
" mkdirat" ,
" mmap" ,
" mprotect" ,
" munmap" ,
" nanosleep" ,
" newfstatat" ,
" open" ,
" openat" ,
" pipe" ,
" poll" ,
" pread64" ,
" prlimit64" ,
" pwrite64" ,
" read" ,
" readlink" ,
" readv" ,
" recvfrom" ,
" recvmmsg" ,
" recvmsg" ,
" rename" ,
" renameat" ,
" restart_syscall" ,
" rmdir" ,
" rt_sigaction" ,
" rt_sigpending" ,
" rt_sigprocmask" ,
" rt_sigqueueinfo" ,
" rt_sigreturn" ,
" rt_sigsuspend" ,
" rt_sigtimedwait" ,
" rt_tgsigqueueinfo" ,
" sched_getaffinity" ,
" sched_getparam" ,
" sched_getscheduler" ,
" sched_setscheduler" ,
" sendfile" ,
" sendmmsg" ,
" sendmsg" ,
" sendto" ,
" set_robust_list" ,
" set_tid_address" ,
" setgid" ,
" setgroups" ,
" sethostname" ,
" setitimer" ,
" setpgid" ,
" setpriority" ,
" setresgid" ,
" setresuid" ,
" setsid" ,
" setsockopt" ,
" setuid" ,
" shutdown" ,
" sigaltstack" ,
" socket" ,
" socketpair" ,
" stat" ,
" symlink" ,
" symlinkat" ,
" sysinfo" ,
" tgkill" ,
" time" ,
" tkill" ,
" uname" ,
" unlink" ,
" unlinkat" ,
" utimensat" ,
" wait4" ,
" waitid" ,
" write" ,
" writev"
],
"action" : " SCMP_ACT_ALLOW"
}
]
}
# Generate AppArmor profile
aa-genprof /usr/bin/docker
# Load AppArmor profile
apparmor_parser -r /etc/apparmor.d/docker-profile
# Run container with AppArmor
docker run --security-opt apparmor=docker-profile nginx
# Deny write access example profile
# include <tunables/global>
profile docker-nginx flags=(attach_disconnected,mediate_deleted) {
# include <abstractions/base>
# Deny all file writes
deny /** w,
# Allow specific paths
/tmp/** rw,
/var/log/nginx/** rw,
/var/cache/nginx/** rw,
# Network
network inet tcp,
network inet udp,
# Capabilities
capability chown,
capability net_bind_service,
capability setuid,
capability setgid,
}
# Run Docker security audit
docker run -it --net host --pid host --userns host --cap-add audit_control \
-e DOCKER_CONTENT_TRUST=1 \
-v /var/lib:/var/lib \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /usr/lib/systemd:/usr/lib/systemd \
-v /etc:/etc --label docker_bench_security \
docker/docker-bench-security
# Fix common issues
# 1. Enable user namespace
# 2. Set default ulimits in daemon.json
# 3. Enable content trust
# 4. Use seccomp profiles
# 5. Regular vulnerability scanning
# Trivy (comprehensive vulnerability scanner)
docker run aquasec/trivy image nginx:latest
docker run aquasec/trivy fs --security-checks vuln,secret,config ./
# Grype
docker run anchore/grype nginx:latest
# Clair
# Static analysis for vulnerabilities in appc/docker containers
# Snyk
docker run --rm -v /var/run/docker.sock:/var/run/docker.sock \
snyk/snyk-cli:docker monitor --docker nginx:latest
# DIY scanning script
#! /bin/bash
IMAGE=$1
docker scout cves $IMAGE
docker scout recommendations $IMAGE
docker scan $IMAGE
⚡ PERFORMANCE OPTIMIZATION
# Enable BuildKit (faster builds, better caching)
export DOCKER_BUILDKIT=1
export BUILDKIT_PROGRESS=plain
# Use cache mounts (BuildKit feature)
docker build --build-arg BUILDKIT_INLINE_CACHE=1 -t myapp .
# Parallel multi-stage builds
docker buildx build --parallel .
# Use .dockerignore to exclude unnecessary files
# Reduce context size sent to daemon
# Multi-architecture builds
docker buildx create --use # Create builder instance
docker buildx build --platform linux/amd64,linux/arm64 -t myapp:multi .
# 1. Resource limits
docker run -d \
--cpus=" 1.5" \ # 1.5 CPUs
--cpuset-cpus=" 0-3" \ # Use CPUs 0-3
--memory=" 512m" \ # RAM limit
--memory-swap=" 1g" \ # Swap limit
--memory-reservation=" 256m" \ # Soft limit
--blkio-weight=500 \ # Block I/O weight (100-1000)
--device-read-bps=" /dev/sda:1mb" \ # Read rate limit
--device-write-bps=" /dev/sda:1mb" \ # Write rate limit
nginx
# 2. PIDs limit (prevent fork bombs)
docker run --pids-limit 100 nginx
# 3. Network optimization
docker run --network=host nginx # Best performance, less isolation
# 1. Use volumes instead of bind mounts for production
# Bind mounts have OS overhead, volumes are optimized
# 2. Choose appropriate volume driver
# local: default
# nfs: network storage
# tmpfs: in-memory (fastest)
# 3. Mount volumes as read-only when possible
docker run -v data:/app/data:ro nginx
# 4. Use :delegated for MacOS (better performance)
docker run -v $( pwd) :/app:delegated nginx
# Docker stats
docker stats --format " table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}"
# cAdvisor for historical metrics
docker run -d \
--name=cadvisor \
--volume=/:/rootfs:ro \
--volume=/var/run:/var/run:ro \
--volume=/sys:/sys:ro \
--volume=/var/lib/docker/:/var/lib/docker:ro \
--publish=8080:8080 \
google/cadvisor:latest
# Dive for image optimization
docker run --rm -it \
-v /var/run/docker.sock:/var/run/docker.sock \
wagoodman/dive:latest myimage:tag
Performance Tuning Daemon
// /etc/docker/daemon.json
{
"storage-driver" : " overlay2" ,
"log-driver" : " json-file" ,
"log-opts" : {
"max-size" : " 10m" ,
"max-file" : " 3"
},
"default-ulimits" : {
"nofile" : {
"Name" : " nofile" ,
"Hard" : 64000 ,
"Soft" : 64000
},
"nproc" : {
"Name" : " nproc" ,
"Hard" : 64000 ,
"Soft" : 64000
}
},
"live-restore" : true ,
"max-concurrent-downloads" : 3 ,
"max-concurrent-uploads" : 5 ,
"registry-mirrors" : [
" https://mirror.gcr.io"
],
"insecure-registries" : [],
"debug" : false ,
"experimental" : false ,
"metrics-addr" : " 127.0.0.1:9323"
}
Production Dockerfile Patterns
# Multi-stage production build
FROM node:16-alpine AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production
FROM node:16-alpine AS runner
WORKDIR /app
# Create non-root user
RUN addgroup --system --gid 1001 nodejs && \
adduser --system --uid 1001 nodejs
# Copy necessary files
COPY --from=builder --chown=nodejs:nodejs /app/node_modules ./node_modules
COPY --chown=nodejs:nodejs . .
# Switch to non-root user
USER nodejs
# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD node healthcheck.js
# Run as PID 1 for proper signal handling
CMD ["node" , "server.js" ]
Production Deployment Checklist
# production-config.yml
version : ' 3.8'
services :
app :
image : myapp:${TAG:-latest}
deploy :
replicas : 3
update_config :
parallelism : 1
delay : 30s
order : start-first
failure_action : rollback
rollback_config :
parallelism : 0
order : stop-first
restart_policy :
condition : on-failure
delay : 10s
max_attempts : 3
window : 120s
placement :
constraints :
- node.role == worker
configs :
- source : app_config
target : /app/config/production.json
secrets :
- db_password
logging :
driver : " json-file"
options :
max-size : " 10m"
max-file : " 3"
tag : " {{.Name}}"
healthcheck :
test : ["CMD", "curl", "-f", "http://localhost:3000/health"]
interval : 30s
timeout : 10s
retries : 3
start_period : 60s
Zero-Downtime Deployments
#! /bin/bash
# Blue-Green Deployment Script
# Build new version
docker build -t myapp:$NEW_VERSION .
# Push to registry
docker push myregistry.com/myapp:$NEW_VERSION
# Create green deployment
docker stack deploy -c docker-compose-green.yml myapp-green
# Wait for green to be healthy
while ! docker service ls | grep myapp-green | grep -q " 3/3" ; do
sleep 5
done
# Switch traffic (update load balancer)
# Update DNS or load balancer config to point to green
# Monitor green for issues
sleep 300 # Monitor for 5 minutes
# Remove blue if green is stable
docker stack rm myapp-blue
# Rotate: green becomes new blue
docker service update --image myregistry.com/myapp:$NEW_VERSION myapp-blue
Backup and Recovery Strategy
#! /bin/bash
# Backup script for Docker production
DATE=$( date +%Y%m%d_%H%M%S)
BACKUP_DIR=" /backup/docker/$DATE "
mkdir -p $BACKUP_DIR
# 1. Backup all volumes
for volume in $( docker volume ls -q) ; do
docker run --rm -v $volume :/data -v $BACKUP_DIR :/backup alpine \
tar czf /backup/${volume} .tar.gz -C /data .
done
# 2. Backup Docker Compose/Stack files
cp -r /opt/app/docker-compose* .yml $BACKUP_DIR /
# 3. Backup container configuration
docker inspect $( docker ps -aq) > $BACKUP_DIR /containers.json
# 4. Backup image list
docker images --format " {{.Repository}}:{{.Tag}}" > $BACKUP_DIR /images.txt
# 5. Create restore script
cat > $BACKUP_DIR /restore.sh << 'EOF '
#!/bin/bash
# Restore volumes
for volume_backup in *.tar.gz; do
volume=${volume_backup%.tar.gz}
docker volume create $volume
docker run --rm -v $volume:/data -v $(pwd):/backup alpine \
tar xzf /backup/$volume_backup -C /data
done
# Load images
while read image; do
docker pull $image
done < images.txt
# Recreate containers using docker-compose files
docker-compose -f docker-compose.prod.yml up -d
EOF
chmod +x $BACKUP_DIR /restore.sh
# 6. Sync to remote storage
rsync -avz $BACKUP_DIR / backup-server:/docker-backups/
# Centralized logging with Fluentd
services :
fluentd :
image : fluent/fluentd:v1.14-1
volumes :
- ./fluent.conf:/fluentd/etc/fluent.conf
- fluentd_data:/fluentd/log
ports :
- " 24224:24224"
- " 24224:24224/udp"
app :
image : myapp:latest
logging :
driver : " fluentd"
options :
fluentd-address : " localhost:24224"
tag : " docker.{{.Name}}"
# Alternative: ELK Stack
elasticsearch :
image : elasticsearch:8.5
environment :
- discovery.type=single-node
- " ES_JAVA_OPTS=-Xms512m -Xmx512m"
logstash :
image : logstash:8.5
volumes :
- ./logstash.conf:/usr/share/logstash/pipeline/logstash.conf
kibana :
image : kibana:8.5
ports :
- " 5601:5601"
# Enable API (daemon.json)
{
" hosts" : [" unix:///var/run/docker.sock" , " tcp://0.0.0.0:2375" ]
}
# REST API examples
curl --unix-socket /var/run/docker.sock http://localhost/v1.41/containers/json
curl --unix-socket /var/run/docker.sock http://localhost/v1.41/images/json
curl -X POST --unix-socket /var/run/docker.sock \
http://localhost/v1.41/containers/create \
-H " Content-Type: application/json" \
-d ' {"Image": "nginx:alpine"}'
# Using socat for TCP access
socat TCP-LISTEN:2375,reuseaddr,fork UNIX-CONNECT:/var/run/docker.sock &
curl http://localhost:2375/version
# requirements.txt
# docker==6.0.0
import docker
from docker .errors import APIError
# Connect to Docker daemon
client = docker .from_env ()
# Or connect remotely
# client = docker.DockerClient(base_url='tcp://localhost:2375')
# List containers
for container in client .containers .list (all = True ):
print (f"Container: { container .name } , Status: { container .status } " )
# Create container
container = client .containers .run (
image = "nginx:alpine" ,
name = "my-nginx" ,
ports = {'80/tcp' : 8080 },
volumes = {'/host/path' : {'bind' : '/container/path' , 'mode' : 'rw' }},
environment = {'ENV_VAR' : 'value' },
detach = True
)
# Build image
image , build_logs = client .images .build (
path = "." ,
tag = "myapp:latest" ,
buildargs = {"VERSION" : "1.0" },
rm = True # Remove intermediate containers
)
for log in build_logs :
if 'stream' in log :
print (log ['stream' ].strip ())
# Manage services (Swarm)
swarm_info = client .swarm .attrs
services = client .services .list ()
for service in services :
print (f"Service: { service .name } , Replicas: { service .attrs ['Spec' ]['Mode' ]['Replicated' ]['Replicas' ]} " )
# Execute command in container
exec_result = container .exec_run ("ls -la /" )
print (exec_result .output .decode ())
# Stream logs
for line in container .logs (stream = True , follow = True ):
print (line .decode ().strip ())
// package.json
// "dockerode": "^3.3.1"
const Docker = require ( 'dockerode' ) ;
const docker = new Docker ( ) ;
// List containers
docker . listContainers ( { all : true } , ( err , containers ) => {
containers . forEach ( containerInfo => {
console . log ( `Container: ${ containerInfo . Names [ 0 ] } ` ) ;
} ) ;
} ) ;
// Create container
docker . createContainer ( {
Image : 'nginx:alpine' ,
name : 'my-nginx' ,
ExposedPorts : { '80/tcp' : { } } ,
HostConfig : {
PortBindings : { '80/tcp' : [ { HostPort : '8080' } ] } ,
Binds : [ '/host/path:/container/path:rw' ]
} ,
Env : [ 'ENV_VAR=value' ]
} , ( err , container ) => {
container . start ( ( err , data ) => {
console . log ( 'Container started' ) ;
} ) ;
} ) ;
// Build image
const tar = require ( 'tar-fs' ) ;
const fs = require ( 'fs' ) ;
tar . pack ( './app' ) . pipe ( fs . createWriteStream ( './context.tar' ) ) ;
const buildStream = docker . buildImage ( './context.tar' , { t : 'myapp:latest' } ) ;
docker . modem . followProgress ( buildStream , ( err , output ) => {
console . log ( 'Build completed:' , output ) ;
} , event => {
console . log ( event . stream || event . status ) ;
} ) ;
package main
import (
"context"
"fmt"
"io"
"os"
"github.com/docker/docker/api/types"
"github.com/docker/docker/client"
"github.com/docker/docker/pkg/stdcopy"
)
func main () {
// Create client
cli , err := client .NewClientWithOpts (client .FromEnv , client .WithAPIVersionNegotiation ())
if err != nil {
panic (err )
}
// List containers
containers , err := cli .ContainerList (context .Background (), types.ContainerListOptions {All : true })
if err != nil {
panic (err )
}
for _ , container := range containers {
fmt .Printf ("Container: %s, Status: %s\n " , container .Names [0 ], container .State )
}
// Create container
resp , err := cli .ContainerCreate (context .Background (),
& container.Config {
Image : "nginx:alpine" ,
Env : []string {"ENV_VAR=value" },
},
& container.HostConfig {
PortBindings : nat.PortMap {
"80/tcp" : []nat.PortBinding {{HostPort : "8080" }},
},
},
nil , nil , "my-nginx" )
if err != nil {
panic (err )
}
// Start container
if err := cli .ContainerStart (context .Background (), resp .ID , types.ContainerStartOptions {}); err != nil {
panic (err )
}
// Stream logs
out , err := cli .ContainerLogs (context .Background (), resp .ID , types.ContainerLogsOptions {
ShowStdout : true ,
ShowStderr : true ,
Follow : true ,
})
if err != nil {
panic (err )
}
defer out .Close ()
stdcopy .StdCopy (os .Stdout , os .Stderr , out )
}
# Install Docker Extensions CLI
# Available in Docker Desktop 4.8+
# List extensions
docker extension ls
# Install extension
docker extension install \
ghcr.io/docker/extension-< name> :latest
# Example extensions:
# - Resource Monitor
# - Logs Explorer
# - Volumes Backup & Share
# - Docker Scout
#!/usr/bin/env python3
# docker-manager.py - Custom Docker management tool
import click
import docker
import json
import subprocess
from datetime import datetime
client = docker .from_env ()
@click .group ()
def cli ():
"""Custom Docker Management Tool"""
pass
@cli .command ()
@click .option ('--all' , '-a' , is_flag = True , help = 'Include stopped containers' )
def ps (all ):
"""List containers with custom formatting"""
containers = client .containers .list (all = all )
for c in containers :
status = c .status
if c .status == 'running' :
status = click .style (c .status , fg = 'green' )
click .echo (f"{ c .short_id } { c .name :30} { status :15} { c .image .tags [0 ] if c .image .tags else '' } " )
@cli .command ()
@click .argument ('name' )
def logs (name ):
"""Follow container logs with timestamp"""
try :
container = client .containers .get (name )
for line in container .logs (stream = True , follow = True ):
timestamp = datetime .now ().strftime ('%Y-%m-%d %H:%M:%S' )
click .echo (f"[{ timestamp } ] { line .decode ().strip ()} " )
except docker .errors .NotFound :
click .echo (f"Container { name } not found" , err = True )
@cli .command ()
@click .argument ('pattern' )
def cleanup (pattern ):
"""Cleanup containers matching pattern"""
containers = client .containers .list (all = True , filters = {'name' : pattern })
for c in containers :
if click .confirm (f"Remove { c .name } ?" ):
c .remove (force = True )
click .echo (f"Removed { c .name } " )
@cli .command ()
def stats ():
"""Display container statistics"""
containers = client .containers .list ()
for c in containers :
stats = c .stats (stream = False )
cpu = stats ['cpu_stats' ]['cpu_usage' ]['total_usage' ]
memory = stats ['memory_stats' ]['usage' ]
click .echo (f"{ c .name :20} CPU: { cpu :10} Memory: { memory :10} " )
if __name__ == '__main__' :
cli ()
Docker Buildx Custom Builders
# Create custom builder with specific drivers
docker buildx create \
--name mybuilder \
--driver docker-container \
--driver-opt network=host \
--use
# Build with custom builder
docker buildx build --builder mybuilder -t myapp .
# Multi-platform builds
docker buildx build \
--platform linux/amd64,linux/arm64,linux/arm/v7 \
-t myapp:multi \
--push . # Push all platforms at once
# Create builder for remote Kubernetes cluster
docker buildx create \
--name k8s-builder \
--driver kubernetes \
--driver-opt namespace=buildkit \
--use
Health Check Monitoring Tool
#! /bin/bash
# health-monitor.sh
CONTAINERS=$( docker ps --format " {{.Names}}" )
for container in $CONTAINERS ; do
# Check if container has health status
health=$( docker inspect --format=' {{.State.Health.Status}}' $container 2> /dev/null)
if [ " $health " = " healthy" ]; then
echo " ✅ $container is healthy"
elif [ " $health " = " unhealthy" ]; then
echo " ❌ $container is unhealthy"
# Send alert
curl -X POST -H ' Content-type: application/json' \
--data " {\" text\" :\" Container $container is unhealthy\" }" \
$SLACK_WEBHOOK
elif [ -n " $health " ]; then
echo " ⚠️ $container : $health "
fi
done
# Check resource usage
ALERT_THRESHOLD=80
docker stats --no-stream --format " table {{.Container}}\t{{.CPUPerc}}\t{{.MemPerc}}" | \
while read container cpu mem; do
if [[ $container != " CONTAINER" ]]; then
cpu_num=${cpu% \% }
mem_num=${mem% \% }
if (( $(echo "$cpu_num > $ALERT_THRESHOLD " | bc - l) )) ; then
echo " ⚠️ High CPU: $container ($cpu )"
fi
if (( $(echo "$mem_num > $ALERT_THRESHOLD " | bc - l) )) ; then
echo " ⚠️ High Memory: $container ($mem )"
fi
fi
done
☁️ DOCKER WITH CLOUD PROVIDERS
AWS ECS (Elastic Container Service)
# task-definition.json
{
" family " : " myapp-task" ,
" networkMode " : " awsvpc" ,
" requiresCompatibilities " : ["FARGATE"],
" cpu " : " 256" ,
" memory " : " 512" ,
" executionRoleArn " : " arn:aws:iam::account-id:role/ecsTaskExecutionRole" ,
" containerDefinitions " : [
{
" name " : " myapp" ,
" image " : " account-id.dkr.ecr.region.amazonaws.com/myapp:latest" ,
" portMappings " : [
{
" containerPort " : 80,
" protocol " : " tcp"
}
],
" logConfiguration " : {
" logDriver " : " awslogs" ,
" options " : {
" awslogs-group " : " /ecs/myapp" ,
" awslogs-region " : " us-east-1" ,
" awslogs-stream-prefix " : " ecs"
}
},
" environment " : [
{
" name " : " ENVIRONMENT" ,
" value " : " production"
}
]
}
]
}
# Build and push to ECR
aws ecr get-login-password --region region | docker login \
--username AWS \
--password-stdin account-id.dkr.ecr.region.amazonaws.com
docker build -t myapp .
docker tag myapp:latest account-id.dkr.ecr.region.amazonaws.com/myapp:latest
docker push account-id.dkr.ecr.region.amazonaws.com/myapp:latest
# Run task
aws ecs run-task \
--cluster my-cluster \
--task-definition myapp-task \
--launch-type FARGATE \
--network-configuration " awsvpcConfiguration={subnets=[subnet-xxx],securityGroups=[sg-xxx],assignPublicIp=ENABLED}"
# Dockerfile for Cloud Run
FROM node:16-slim
WORKDIR /usr/src/app
COPY package*.json ./
RUN npm ci --only=production
COPY . .
CMD [ "node" , "server.js" ]
# Build and push to Google Container Registry
gcloud auth configure-docker
docker build -t gcr.io/PROJECT-ID/myapp .
docker push gcr.io/PROJECT-ID/myapp
# Deploy to Cloud Run
gcloud run deploy myapp \
--image gcr.io/PROJECT-ID/myapp \
--platform managed \
--region us-central1 \
--allow-unauthenticated \
--memory 512Mi \
--cpu 1 \
--concurrency 80 \
--max-instances 10 \
--timeout 300s
# Update environment variables
gcloud run services update myapp \
--update-env-vars ENVIRONMENT=production \
--region us-central1
Azure Container Instances
# aci-deploy.yaml
apiVersion : 2019-12-01
location : eastus
name : myapp-container
properties :
containers :
- name : myapp
properties :
image : myregistry.azurecr.io/myapp:latest
resources :
requests :
cpu : 1
memoryInGB : 1.5
ports :
- port : 80
environmentVariables :
- name : ENVIRONMENT
value : production
- name : DATABASE_URL
secureValue : $(DATABASE_URL)
osType : Linux
imageRegistryCredentials :
- server : myregistry.azurecr.io
username : $(REGISTRY_USERNAME)
password : $(REGISTRY_PASSWORD)
ipAddress :
type : Public
ports :
- protocol : tcp
port : 80
tags :
app : myapp
environment : production
# Login to Azure Container Registry
az acr login --name myregistry
# Build and push
docker build -t myregistry.azurecr.io/myapp:latest .
docker push myregistry.azurecr.io/myapp:latest
# Deploy to ACI
az container create --resource-group myResourceGroup \
--file aci-deploy.yaml
# Set environment variables
az container create \
--resource-group myResourceGroup \
--name myapp \
--image myregistry.azurecr.io/myapp:latest \
--environment-variables ENVIRONMENT=production \
--secure-environment-variables DATABASE_URL=$DATABASE_URL \
--dns-name-label myapp-dns \
--ports 80
Multi-Cloud Docker Deployment
#! /bin/bash
# multi-cloud-deploy.sh
IMAGE=" myapp:latest"
REGISTRIES=(
" account-id.dkr.ecr.region.amazonaws.com/myapp"
" gcr.io/PROJECT-ID/myapp"
" myregistry.azurecr.io/myapp"
)
# Build once
docker build -t $IMAGE .
# Tag and push to all registries
for registry in " ${REGISTRIES[@]} " ; do
docker tag $IMAGE $registry :latest
docker push $registry :latest
done
# Deploy to AWS ECS
aws ecs update-service --cluster my-cluster --service myapp-service \
--force-new-deployment
# Deploy to Google Cloud Run
gcloud run deploy myapp --image gcr.io/PROJECT-ID/myapp --region us-central1
# Deploy to Azure ACI
az container create --resource-group myResourceGroup \
--name myapp --image myregistry.azurecr.io/myapp:latest
Project 1: Microservices Application
# Complete microservices setup
services :
# API Gateway
gateway :
image : nginx:alpine
configs :
- source : nginx_config
target : /etc/nginx/nginx.conf
ports :
- " 80:80"
depends_on :
- auth-service
- user-service
- product-service
# Auth Service
auth-service :
build : ./services/auth
environment :
- JWT_SECRET=secret
- REDIS_URL=redis://redis:6379
depends_on :
- redis
- postgres
# User Service
user-service :
build : ./services/user
environment :
- DATABASE_URL=postgresql://user:pass@postgres:5432/users
# Product Service
product-service :
build : ./services/product
environment :
- ELASTICSEARCH_URL=http://elasticsearch:9200
depends_on :
- elasticsearch
# Message Queue
rabbitmq :
image : rabbitmq:management
ports :
- " 15672:15672"
# Cache
redis :
image : redis:alpine
command : redis-server --appendonly yes
# Database
postgres :
image : postgres:14
environment :
POSTGRES_PASSWORD : pass
volumes :
- postgres_data:/var/lib/postgresql/data
# Search
elasticsearch :
image : elasticsearch:8.5
environment :
- discovery.type=single-node
- " ES_JAVA_OPTS=-Xms512m -Xmx512m"
# Monitoring
prometheus :
image : prom/prometheus
volumes :
- ./prometheus.yml:/etc/prometheus/prometheus.yml
ports :
- " 9090:9090"
grafana :
image : grafana/grafana
environment :
- GF_SECURITY_ADMIN_PASSWORD=admin
ports :
- " 3000:3000"
Project 2: CI/CD Pipeline with Docker
# .github/workflows/docker-pipeline.yml
name : Docker CI/CD
on :
push :
branches : [main]
pull_request :
branches : [main]
env :
REGISTRY : ghcr.io
IMAGE_NAME : ${{ github.repository }}
jobs :
test :
runs-on : ubuntu-latest
services :
postgres :
image : postgres:14
env :
POSTGRES_PASSWORD : postgres
options : >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
redis :
image : redis:alpine
options : >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps :
- uses : actions/checkout@v3
- name : Run tests
run : |
docker-compose -f docker-compose.test.yml run --rm test
build-and-push :
needs : test
runs-on : ubuntu-latest
permissions :
contents : read
packages : write
steps :
- uses : actions/checkout@v3
- name : Set up Docker Buildx
uses : docker/setup-buildx-action@v2
- name : Log in to registry
uses : docker/login-action@v2
with :
registry : ${{ env.REGISTRY }}
username : ${{ github.actor }}
password : ${{ secrets.GITHUB_TOKEN }}
- name : Extract metadata
id : meta
uses : docker/metadata-action@v4
with :
images : ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name : Build and push
uses : docker/build-push-action@v4
with :
context : .
push : ${{ github.event_name != 'pull_request' }}
tags : ${{ steps.meta.outputs.tags }}
labels : ${{ steps.meta.outputs.labels }}
cache-from : type=gha
cache-to : type=gha,mode=max
deploy :
needs : build-and-push
runs-on : ubuntu-latest
if : github.event_name == 'push' && github.ref == 'refs/heads/main'
steps :
- uses : actions/checkout@v3
- name : Deploy to production
run : |
echo ${{ secrets.SSH_PRIVATE_KEY }} > private_key
chmod 600 private_key
ssh -o StrictHostKeyChecking=no \
-i private_key \
user@server "cd /app && docker-compose pull && docker-compose up -d"
✅ INTERMEDIATE MASTERY CHECKLIST
Production-ready microservices application
Automated CI/CD pipeline with Docker
Custom Docker management tool
Multi-cloud deployment setup
Security-hardened container environment
🎯 WHAT'S NEXT: ADVANCED DOCKER
Advanced Topics to Explore
Kubernetes Deep Dive : Pods, Services, Ingress, Helm
Service Mesh : Istio, Linkerd for microservices
GitOps : ArgoCD, Flux for declarative deployments
Serverless Containers : Knative, OpenFaaS
Edge Computing : Docker on IoT/Edge devices
Custom Runtimes : containerd, runc internals
Docker Plugin Development : Create custom plugins
Security Scanning Automation : Integrate scanning in pipelines
Disaster Recovery : Multi-region deployments
Cost Optimization : Resource management and optimization
Docker Certified Associate (DCA)
Certified Kubernetes Administrator (CKA)
AWS Certified DevOps Engineer
Google Cloud Professional DevOps Engineer
📈 PROGRESS TRACKING COMPLETE
Intermediate Topic
Status
Confidence (1-5)
Container Orchestration
☐
Security Hardening
☐
Performance Optimization
☐
Docker in Production
☐
Docker API and SDK
☐
Custom Docker Tools
☐
Docker with Cloud Providers
☐
OVERALL INTERMEDIATE MASTERY
☐
🐳 Docker Advanced - Complete Mastery Guide
📚 Advanced Docker Curriculum
Kubernetes Deep Dive
Service Mesh Architecture
GitOps and Declarative Deployments
Serverless Containers
Container Runtime Internals
Custom Docker Plugins
Enterprise Security & Compliance
Advanced Networking Patterns
Performance Engineering
Disaster Recovery & Multi-Region
Cost Optimization Strategies
Docker at Scale
Kubernetes Architecture Mastery
# Control Plane Components
apiVersion : v1
kind : Pod
metadata :
name : kube-apiserver
namespace : kube-system
spec :
containers :
- name : kube-apiserver
image : k8s.gcr.io/kube-apiserver:v1.27.0
command :
- kube-apiserver
- --advertise-address=10.0.0.1
- --allow-privileged=true
- --authorization-mode=Node,RBAC
- --client-ca-file=/etc/kubernetes/pki/ca.crt
- --enable-admission-plugins=NodeRestriction,PodSecurityPolicy
- --enable-bootstrap-token-auth=true
- --etcd-cafile=/etc/kubernetes/pki/etcd/ca.crt
- --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt
- --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key
- --etcd-servers=https://127.0.0.1:2379
- --kubelet-client-certificate=/etc/kubernetes/pki/apiserver-kubelet-client.crt
- --kubelet-client-key=/etc/kubernetes/pki/apiserver-kubelet-client.key
- --secure-port=6443
- --service-account-issuer=https://kubernetes.default.svc.cluster.local
- --service-account-key-file=/etc/kubernetes/pki/sa.pub
- --service-account-signing-key-file=/etc/kubernetes/pki/sa.key
- --service-cluster-ip-range=10.96.0.0/12
# Init Containers for Setup
apiVersion : v1
kind : Pod
metadata :
name : init-container-example
spec :
initContainers :
- name : init-myservice
image : busybox:1.28
command : ['sh', '-c', 'until nslookup myservice; do echo waiting for myservice; sleep 2; done;']
- name : init-mydb
image : busybox:1.28
command : ['sh', '-c', 'until nslookup mydb; do echo waiting for mydb; sleep 2; done;']
containers :
- name : main-app
image : nginx:alpine
ports :
- containerPort : 80
---
# Sidecar Pattern with Logging
apiVersion : v1
kind : Pod
metadata :
name : web-app-with-logging
spec :
containers :
- name : web-app
image : nginx:alpine
ports :
- containerPort : 80
volumeMounts :
- name : logs
mountPath : /var/log/nginx
- name : log-collector
image : fluent/fluentd:v1.14
volumeMounts :
- name : logs
mountPath : /var/log/nginx
- name : config
mountPath : /fluentd/etc
volumes :
- name : logs
emptyDir : {}
- name : config
configMap :
name : fluentd-config
---
# Ambassador Pattern
apiVersion : v1
kind : Pod
metadata :
name : redis-proxy
spec :
containers :
- name : app
image : myapp:latest
- name : redis-proxy
image : redis:alpine
command : ["redis-server", "--slaveof", "redis-master", "6379"]
Custom Controllers & Operators
// custom-controller/main.go
package main
import (
"context"
"fmt"
"time"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/clientcmd"
)
type AppController struct {
clientset * kubernetes.Clientset
deploymentLister cache.Store
}
func (c * AppController ) Run (stopCh <- chan struct {}) {
// Watch for Deployment changes
watchlist := cache .NewListWatchFromClient (
c .clientset .AppsV1 ().RESTClient (),
"deployments" ,
"default" ,
fields .Everything (),
)
_ , controller := cache .NewInformer (
watchlist ,
& appsv1.Deployment {},
time .Second * 30 ,
cache.ResourceEventHandlerFuncs {
AddFunc : c .onAdd ,
UpdateFunc : c .onUpdate ,
DeleteFunc : c .onDelete ,
},
)
controller .Run (stopCh )
}
func (c * AppController ) onAdd (obj interface {}) {
deployment := obj .(* appsv1.Deployment )
fmt .Printf ("Deployment created: %s\n " , deployment .Name )
// Auto-scale based on labels
if deployment .Labels ["auto-scale" ] == "true" {
c .autoScale (deployment )
}
}
func (c * AppController ) autoScale (deployment * appsv1.Deployment ) {
// Custom autoscaling logic
replicas := int32 (3 )
deployment .Spec .Replicas = & replicas
_ , err := c .clientset .AppsV1 ().Deployments ("default" ).Update (
context .Background (),
deployment ,
metav1.UpdateOptions {},
)
if err != nil {
fmt .Printf ("Failed to scale: %v\n " , err )
}
}
# Custom Resource Definition (CRD)
apiVersion : apiextensions.k8s.io/v1
kind : CustomResourceDefinition
metadata :
name : databases.mycompany.com
spec :
group : mycompany.com
versions :
- name : v1
served : true
storage : true
schema :
openAPIV3Schema :
type : object
properties :
spec :
type : object
properties :
engine :
type : string
enum : ["postgres", "mysql", "mongodb"]
version :
type : string
replicas :
type : integer
minimum : 1
maximum : 10
storage :
type : string
backup :
type : object
properties :
enabled :
type : boolean
schedule :
type : string
retention :
type : integer
scope : Namespaced
names :
plural : databases
singular : database
kind : Database
shortNames :
- db
# Operator using kubebuilder
apiVersion : v1
kind : Namespace
metadata :
name : database-operator-system
---
apiVersion : apps/v1
kind : Deployment
metadata :
name : database-operator
namespace : database-operator-system
spec :
replicas : 1
selector :
matchLabels :
control-plane : database-operator
template :
metadata :
labels :
control-plane : database-operator
spec :
containers :
- name : manager
image : myregistry/database-operator:v1.0.0
args :
- --leader-elect
- --metrics-bind-address=:8080
- --health-probe-bind-address=:8081
env :
- name : WATCH_NAMESPACE
valueFrom :
fieldRef :
fieldPath : metadata.namespace
- name : POD_NAME
valueFrom :
fieldRef :
fieldPath : metadata.name
- name : OPERATOR_NAME
value : " database-operator"
ports :
- containerPort : 9443
name : webhook-server
protocol : TCP
volumeMounts :
- mountPath : /tmp/k8s-webhook-server/serving-certs
name : cert
readOnly : true
volumes :
- name : cert
secret :
defaultMode : 420
secretName : webhook-server-cert
# Node Affinity & Anti-Affinity
apiVersion : apps/v1
kind : Deployment
metadata :
name : critical-app
spec :
replicas : 3
selector :
matchLabels :
app : critical
template :
metadata :
labels :
app : critical
spec :
affinity :
nodeAffinity :
requiredDuringSchedulingIgnoredDuringExecution :
nodeSelectorTerms :
- matchExpressions :
- key : topology.kubernetes.io/zone
operator : In
values :
- us-east-1a
- us-east-1b
preferredDuringSchedulingIgnoredDuringExecution :
- weight : 100
preference :
matchExpressions :
- key : dedicated
operator : In
values :
- high-performance
podAntiAffinity :
requiredDuringSchedulingIgnoredDuringExecution :
- labelSelector :
matchExpressions :
- key : app
operator : In
values :
- critical
topologyKey : " kubernetes.io/hostname"
tolerations :
- key : " dedicated"
operator : " Equal"
value : " high-performance"
effect : " NoSchedule"
containers :
- name : app
image : nginx:alpine
resources :
requests :
memory : " 1Gi"
cpu : " 500m"
limits :
memory : " 2Gi"
cpu : " 1000m"
// custom-scheduler/main.go
package main
import (
"context"
"fmt"
"net/http"
"time"
"k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/leaderelection"
"k8s.io/kubernetes/cmd/kube-scheduler/app"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
)
type CustomScheduler struct {
handle framework.Handle
}
func (c * CustomScheduler ) Name () string {
return "CustomScheduler"
}
func (c * CustomScheduler ) Filter (ctx context.Context , state * framework.CycleState , pod * v1.Pod , nodeInfo * framework.NodeInfo ) * framework.Status {
node := nodeInfo .Node ()
// Custom filtering logic
if node .Labels ["environment" ] != pod .Labels ["environment" ] {
return framework .NewStatus (framework .Unschedulable , "environment mismatch" )
}
// Check if node has enough memory
availableMemory := node .Status .Allocatable .Memory ().Value ()
requestedMemory := getPodMemoryRequest (pod )
if availableMemory < requestedMemory * 2 {
return framework .NewStatus (framework .Unschedulable , "insufficient memory" )
}
return framework .NewStatus (framework .Success , "" )
}
func (c * CustomScheduler ) Score (ctx context.Context , state * framework.CycleState , pod * v1.Pod , nodeName string ) (int64 , * framework.Status ) {
nodeInfo , err := c .handle .SnapshotSharedLister ().NodeInfos ().Get (nodeName )
if err != nil {
return 0 , framework .NewStatus (framework .Error , fmt .Sprintf ("getting node %q from Snapshot: %v" , nodeName , err ))
}
// Custom scoring logic
score := int64 (0 )
// Prefer nodes with SSDs
if nodeInfo .Node ().Labels ["disk-type" ] == "ssd" {
score += 50
}
// Prefer less loaded nodes
cpuUsed := getNodeCPUUsage (nodeInfo )
score += 100 - cpuUsed
return score , framework .NewStatus (framework .Success , "" )
}
func main () {
command := app .NewSchedulerCommand (
app .WithPlugin ("CustomScheduler" , func (configuration runtime.Object , f framework.Handle ) (framework.Plugin , error ) {
return & CustomScheduler {handle : f }, nil
}),
)
if err := command .Execute (); err != nil {
fmt .Printf ("Error executing scheduler: %v\n " , err )
}
}
🔗 SERVICE MESH ARCHITECTURE
Istio Advanced Configuration
# VirtualService with Advanced Traffic Management
apiVersion : networking.istio.io/v1beta1
kind : VirtualService
metadata :
name : reviews
spec :
hosts :
- reviews
http :
- match :
- headers :
end-user :
exact : jason
- uri :
prefix : /api/v1/test
rewrite :
uri : /api/v1/prod
route :
- destination :
host : reviews
subset : v2
weight : 100
- match :
- uri :
prefix : /api/v1/
route :
- destination :
host : reviews
subset : v1
weight : 80
- destination :
host : reviews
subset : v2
weight : 20
retries :
attempts : 3
perTryTimeout : 2s
retryOn : 5xx,connect-failure,refused-stream
timeout : 10s
- match :
- uri :
prefix : /api/v2/
route :
- destination :
host : reviews
subset : v2
weight : 100
fault :
delay :
percentage :
value : 10.0
fixedDelay : 5s
abort :
percentage :
value : 5.0
httpStatus : 500
---
# DestinationRule with Advanced Load Balancing
apiVersion : networking.istio.io/v1beta1
kind : DestinationRule
metadata :
name : reviews
spec :
host : reviews
trafficPolicy :
loadBalancer :
consistentHash :
httpHeaderName : " x-user"
minimumRingSize : 1024
connectionPool :
tcp :
maxConnections : 100
connectTimeout : 30ms
tcpKeepalive :
time : 7200s
interval : 75s
http :
http1MaxPendingRequests : 10
http2MaxRequests : 100
maxRequestsPerConnection : 10
maxRetries : 3
outlierDetection :
consecutive5xxErrors : 5
interval : 30s
baseEjectionTime : 30s
maxEjectionPercent : 10
minHealthPercent : 50
subsets :
- name : v1
labels :
version : v1
trafficPolicy :
loadBalancer :
simple : ROUND_ROBIN
- name : v2
labels :
version : v2
trafficPolicy :
loadBalancer :
simple : LEAST_CONN
---
# ServiceEntry for External Services
apiVersion : networking.istio.io/v1beta1
kind : ServiceEntry
metadata :
name : external-api
spec :
hosts :
- api.external.com
- *.external.com
addresses :
- 192.168.1.0/24
ports :
- number : 443
name : https
protocol : HTTPS
- number : 80
name : http
protocol : HTTP
location : MESH_EXTERNAL
resolution : DNS
endpoints :
- address : api.external.com
ports :
https : 443
labels :
version : prod
network : external-network
---
# Authorization Policy
apiVersion : security.istio.io/v1beta1
kind : AuthorizationPolicy
metadata :
name : frontend-ingress
spec :
selector :
matchLabels :
app : frontend
action : ALLOW
rules :
- from :
- source :
principals : ["cluster.local/ns/default/sa/ingress-gateway"]
to :
- operation :
methods : ["GET", "HEAD"]
paths : ["/public/*"]
when :
- key : request.auth.claims[iss]
values : ["https://accounts.google.com"]
- from :
- source :
namespaces : ["backend"]
to :
- operation :
methods : ["POST", "PUT"]
paths : ["/api/*"]
when :
- key : connection.sni
values : ["*.internal"]
Linkerd Advanced Features
# Service Profile for Per-Route Metrics
apiVersion : linkerd.io/v1alpha2
kind : ServiceProfile
metadata :
name : books-web.default.svc.cluster.local
namespace : default
spec :
routes :
- name : GET /books/{id}
condition :
method : GET
pathRegex : /books/[^/]*
responseClasses :
- condition :
status :
min : 500
max : 599
isFailure : true
- condition :
status :
min : 400
max : 499
isFailure : false
timeout : 200ms
- name : POST /books
condition :
method : POST
pathRegex : /books
timeout : 100ms
retryBudget :
retryRatio : 0.2
minRetriesPerSecond : 10
ttl : 10s
---
# Traffic Split for Canary
apiVersion : split.smi-spec.io/v1alpha1
kind : TrafficSplit
metadata :
name : books-split
namespace : default
spec :
service : books
backends :
- service : books-v1
weight : 900m
- service : books-v2
weight : 100m
// custom-mesh-proxy/main.go
package main
import (
"context"
"crypto/tls"
"fmt"
"log"
"net"
"net/http"
"time"
"github.com/envoyproxy/go-control-plane/envoy/api/v2"
"github.com/envoyproxy/go-control-plane/envoy/api/v2/core"
"github.com/envoyproxy/go-control-plane/envoy/api/v2/endpoint"
"github.com/envoyproxy/go-control-plane/envoy/api/v2/listener"
"github.com/envoyproxy/go-control-plane/envoy/api/v2/route"
"github.com/envoyproxy/go-control-plane/pkg/cache/v2"
"github.com/envoyproxy/go-control-plane/pkg/server/v2"
)
type MeshProxy struct {
cache cache.SnapshotCache
server server.Server
}
func NewMeshProxy () * MeshProxy {
snapshotCache := cache .NewSnapshotCache (false , cache.IDHash {}, nil )
srv := server .NewServer (context .Background (), snapshotCache , nil )
return & MeshProxy {
cache : snapshotCache ,
server : srv ,
}
}
func (p * MeshProxy ) CreateSnapshot (version string , nodes []string ) * cache.Snapshot {
// Create endpoints
endpoints := []endpoint.LocalityLbEndpoints {
{
LbEndpoints : []endpoint.LbEndpoint {
{
HostIdentifier : & endpoint.LbEndpoint_Endpoint {
Endpoint : & endpoint.Endpoint {
Address : & core.Address {
Address : & core.Address_SocketAddress {
SocketAddress : & core.SocketAddress {
Protocol : core .SocketAddress_TCP ,
Address : "127.0.0.1" ,
PortSpecifier : & core.SocketAddress_PortValue {
PortValue : 8080 ,
},
},
},
},
},
},
},
},
},
}
// Create clusters
clusters := []v2.Cluster {
{
Name : "service_cluster" ,
ConnectTimeout : 5 * time .Second ,
ClusterDiscoveryType : & v2.Cluster_Type {Type : v2 .Cluster_STRICT_DNS },
LbPolicy : v2 .Cluster_ROUND_ROBIN ,
LoadAssignment : & v2.ClusterLoadAssignment {
ClusterName : "service_cluster" ,
Endpoints : endpoints ,
},
},
}
// Create routes
routes := []route.VirtualHost {
{
Name : "backend" ,
Domains : []string {"*" },
Routes : []route.Route {
{
Match : & route.RouteMatch {
PathSpecifier : & route.RouteMatch_Prefix {
Prefix : "/" ,
},
},
Action : & route.Route_Route {
Route : & route.RouteAction {
ClusterSpecifier : & route.RouteAction_Cluster {
Cluster : "service_cluster" ,
},
Timeout : 15 * time .Second ,
},
},
},
},
},
}
// Create listeners
listeners := []v2.Listener {
{
Name : "listener_0" ,
Address : & core.Address {
Address : & core.Address_SocketAddress {
SocketAddress : & core.SocketAddress {
Protocol : core .SocketAddress_TCP ,
Address : "0.0.0.0" ,
PortSpecifier : & core.SocketAddress_PortValue {
PortValue : 10000 ,
},
},
},
},
FilterChains : []listener.FilterChain {
{
Filters : []listener.Filter {
{
Name : "envoy.http_connection_manager" ,
ConfigType : & listener.Filter_TypedConfig {
TypedConfig : nil , // Would contain marshaled HTTP connection manager config
},
},
},
},
},
},
}
snapshot := cache .NewSnapshot (
version ,
endpoints ,
clusters ,
routes ,
listeners ,
nil , // secrets
)
return & snapshot
}
🔄 GITOPS AND DECLARATIVE DEPLOYMENTS
# Application with Sync Phases
apiVersion : argoproj.io/v1alpha1
kind : Application
metadata :
name : guestbook
namespace : argocd
spec :
project : default
source :
repoURL : https://github.com/argoproj/argocd-example-apps.git
targetRevision : HEAD
path : guestbook
helm :
values : |
image:
tag: v2.0.0
replicaCount: 3
parameters :
- name : " ingress.enabled"
value : " true"
destination :
server : https://kubernetes.default.svc
namespace : guestbook
syncPolicy :
automated :
prune : true
selfHeal : true
allowEmpty : false
syncOptions :
- CreateNamespace=true
- Validate=false
- PrunePropagationPolicy=foreground
- PruneLast=true
- ApplyOutOfSyncOnly=true
retry :
limit : 5
backoff :
duration : 5s
factor : 2
maxDuration : 3m
---
# ApplicationSet for Multi-Cluster
apiVersion : argoproj.io/v1alpha1
kind : ApplicationSet
metadata :
name : guestbook
namespace : argocd
spec :
generators :
- clusters :
selector :
matchLabels :
environment : prod
values :
env : prod
- clusters :
selector :
matchLabels :
environment : staging
values :
env : staging
- git :
repoURL : https://github.com/argoproj/argocd-example-apps.git
revision : HEAD
directories :
- path : guestbook/*
template :
metadata :
name : ' {{name}}-guestbook'
spec :
project : default
source :
repoURL : https://github.com/argoproj/argocd-example-apps.git
targetRevision : HEAD
path : ' {{path}}'
destination :
server : ' {{server}}'
namespace : ' {{values.env}}-guestbook'
---
# App of Apps Pattern
apiVersion : argoproj.io/v1alpha1
kind : Application
metadata :
name : root-app
namespace : argocd
spec :
project : default
source :
repoURL : https://github.com/company/k8s-config.git
targetRevision : HEAD
path : apps
directory :
recurse : true
jsonnet :
tlas :
- name : environment
value : prod
destination :
server : https://kubernetes.default.svc
namespace : argocd
syncPolicy :
automated :
prune : true
selfHeal : true
# GitRepository Source
apiVersion : source.toolkit.fluxcd.io/v1beta2
kind : GitRepository
metadata :
name : flux-system
namespace : flux-system
spec :
interval : 1m0s
ref :
branch : main
secretRef :
name : flux-system
url : https://github.com/company/flux-config
ignore : |
# exclude all
/*
# include deploy dir
!/deploy/
---
# Kustomization with Dependencies
apiVersion : kustomize.toolkit.fluxcd.io/v1beta2
kind : Kustomization
metadata :
name : infrastructure
namespace : flux-system
spec :
interval : 10m0s
path : ./infrastructure
prune : true
sourceRef :
kind : GitRepository
name : flux-system
validation : client
dependsOn :
- name : crds
postBuild :
substitute :
environment : prod
region : us-east-1
substituteFrom :
- kind : ConfigMap
name : cluster-config
---
# HelmRelease with Values From
apiVersion : helm.toolkit.fluxcd.io/v2beta1
kind : HelmRelease
metadata :
name : redis
namespace : default
spec :
interval : 5m
chart :
spec :
chart : redis
sourceRef :
kind : HelmRepository
name : bitnami
namespace : flux-system
interval : 1m
valuesFrom :
- kind : ConfigMap
name : redis-values
valuesKey : values.yaml
optional : false
- kind : Secret
name : redis-secret-values
valuesKey : secret.yaml
install :
remediation :
retries : 3
crds : Create
upgrade :
remediation :
retries : 3
remediateLastFailure : true
crds : CreateReplace
rollback :
enable : true
retries : 3
test :
enable : true
ignoreFailures : false
Knative Serving Deep Dive
# Service with Autoscaling
apiVersion : serving.knative.dev/v1
kind : Service
metadata :
name : helloworld-go
namespace : default
spec :
template :
metadata :
annotations :
autoscaling.knative.dev/minScale : " 1"
autoscaling.knative.dev/maxScale : " 10"
autoscaling.knative.dev/target : " 100"
autoscaling.knative.dev/window : " 30s"
autoscaling.knative.dev/scaleToZeroPodRetention : " 30s"
spec :
containers :
- image : gcr.io/knative-samples/helloworld-go
env :
- name : TARGET
value : " Knative"
ports :
- containerPort : 8080
resources :
requests :
cpu : 100m
memory : 64Mi
limits :
cpu : 1000m
memory : 512Mi
timeoutSeconds : 300
containerConcurrency : 10
---
# Traffic Splitting
apiVersion : serving.knative.dev/v1
kind : Route
metadata :
name : helloworld-go
namespace : default
spec :
traffic :
- tag : current
revisionName : helloworld-go-00001
percent : 90
- tag : candidate
revisionName : helloworld-go-00002
percent : 10
- tag : latest
latestRevision : true
percent : 0
---
# Domain Mapping
apiVersion : serving.knative.dev/v1alpha1
kind : DomainMapping
metadata :
name : app.example.com
namespace : default
spec :
ref :
name : helloworld-go
kind : Service
apiVersion : serving.knative.dev/v1
tls :
secretName : tls-secret
# Function with advanced configuration
apiVersion : openfaas.com/v1
kind : Function
metadata :
name : advanced-function
namespace : openfaas-fn
spec :
name : advanced-function
image : registry/advanced-function:latest
handler : ./advanced-function
labels :
com.openfaas.scale.min : " 2"
com.openfaas.scale.max : " 10"
com.openfaas.scale.target : " 50"
com.openfaas.scale.target-proportion : " 0.7"
com.openfaas.scale.zero : " true"
com.openfaas.scale.zero-duration : " 5m"
annotations :
prometheus.io.scrape : " true"
prometheus.io.port : " 8080"
environment :
write_debug : " true"
combine_output : " false"
environment_files :
- " secrets.yml"
secrets :
- db-password
- api-key
limits :
cpu : " 1000m"
memory : " 512Mi"
requests :
cpu : " 100m"
memory : " 64Mi"
constraints :
- " node.lifecycle == normal"
- " cloud.google.com/gke-nodepool == functions"
---
# Auto-scaling rule
apiVersion : openfaas.com/v1
kind : ScaledFunction
metadata :
name : advanced-function
spec :
functionRef :
name : advanced-function
deployment :
replicas : 1
triggers :
- type : cpu
metadata :
threshold : " 50"
period : " 30"
- type : memory
metadata :
threshold : " 80"
period : " 30"
- type : kafka
metadata :
topic : " requests"
bootstrapServers : " kafka:9092"
consumerGroup : " function-group"
lagThreshold : " 100"
🔧 CONTAINER RUNTIME INTERNALS
// containerd client example
package main
import (
"context"
"fmt"
"log"
"time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/cio"
"github.com/containerd/containerd/namespaces"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/remotes/docker"
)
func main () {
// Create containerd client
client , err := containerd .New ("/run/containerd/containerd.sock" )
if err != nil {
log .Fatal (err )
}
defer client .Close ()
ctx := namespaces .WithNamespace (context .Background (), "default" )
// Pull image
image , err := client .Pull (ctx , "docker.io/library/nginx:alpine" ,
containerd .WithPullUnpack ,
containerd .WithResolver (docker .NewResolver (docker.ResolverOptions {})))
if err != nil {
log .Fatal (err )
}
// Create container
container , err := client .NewContainer (
ctx ,
"nginx-advanced" ,
containerd .WithImage (image ),
containerd .WithNewSnapshot ("nginx-advanced-snapshot" , image ),
containerd .WithNewSpec (oci .WithImageConfig (image ),
oci .WithHostname ("nginx-container" ),
oci .WithEnv ([]string {"NGINX_PORT=8080" }),
oci .WithCapabilities ([]string {"CAP_NET_BIND_SERVICE" }),
oci .WithMounts ([]oci.Mount {
{
Source : "/host/path" ,
Destination : "/container/path" ,
Type : "bind" ,
Options : []string {"rw" , "rbind" },
},
}),
),
)
if err != nil {
log .Fatal (err )
}
defer container .Delete (ctx , containerd .WithSnapshotCleanup )
// Create task
task , err := container .NewTask (ctx , cio .NewCreator (cio .WithStdio ))
if err != nil {
log .Fatal (err )
}
defer task .Delete (ctx )
// Start task
err = task .Start (ctx )
if err != nil {
log .Fatal (err )
}
// Wait for exit
exitStatusC , err := task .Wait (ctx )
if err != nil {
log .Fatal (err )
}
// Kill after 10 seconds
time .Sleep (10 * time .Second )
err = task .Kill (ctx , syscall .SIGTERM )
if err != nil {
log .Fatal (err )
}
status := <- exitStatusC
code , _ , err := status .Result ()
if err != nil {
log .Fatal (err )
}
fmt .Printf ("Container exited with status: %d\n " , code )
}
runc Low-Level Operations
#! /bin/bash
# runc advanced operations
# Create root filesystem
mkdir -p /container/rootfs
docker export $( docker create nginx:alpine) | tar -C /container/rootfs -xvf -
# Create runtime spec
runc spec --rootless
cat config.json | jq ' .process.args = ["nginx", "-g", "daemon off;"]' | tee config.json
cat config.json | jq ' .linux.namespaces += [{"type": "user"}]' | tee config.json
# Run container
runc run mynginx
# List containers
runc list
# Pause container
runc pause mynginx
# Resume container
runc resume mynginx
# Get container state
runc state mynginx
# Execute command in container
runc exec mynginx ps aux
# Update container resources
runc update --memory 256M --cpu-period 100000 --cpu-quota 50000 mynginx
# Checkpoint container (CRIU)
runc checkpoint --image-path /checkpoint mynginx
runc restore --image-path /checkpoint mynginx
# Delete container
runc delete mynginx
CRIU for Checkpoint/Restore
// checkpoint_example.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main () {
printf ("Container started. PID: %d\n" , getpid ());
// Create checkpoint file
int fd = open ("/tmp/criu_checkpoint" , O_WRONLY | O_CREAT , 0644 );
if (fd == -1 ) {
perror ("open" );
return 1 ;
}
// Write checkpoint information
dprintf (fd , "PID: %d\n" , getpid ());
dprintf (fd , "Time: %ld\n" , time (NULL ));
close (fd );
// Simulate work
int counter = 0 ;
while (1 ) {
printf ("Working... iteration %d\n" , counter ++ );
sleep (5 );
}
return 0 ;
}
# Compile and run
gcc -o checkpoint_example checkpoint_example.c
docker run -d --name test-container checkpoint_example
# Create checkpoint
docker checkpoint create --checkpoint-dir=/tmp/checkpoints test-container checkpoint1
# Restore from checkpoint
docker start --checkpoint --checkpoint-dir=/tmp/checkpoints test-container checkpoint1
Volume Plugin Development
// volume-plugin/main.go
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"net"
"net/http"
"os"
"path/filepath"
"strings"
"github.com/docker/go-plugins-helpers/volume"
)
type EncryptedVolumeDriver struct {
root string
encryptionKey []byte
}
func (d * EncryptedVolumeDriver ) Create (r * volume.CreateRequest ) error {
path := filepath .Join (d .root , r .Name )
// Create encrypted volume
if err := os .MkdirAll (path , 0750 ); err != nil {
return err
}
// Initialize encryption
if err := d .initEncryption (path ); err != nil {
return err
}
// Store metadata
metadata := map [string ]string {
"created" : r .Name ,
"encrypted" : "true" ,
}
d .saveMetadata (path , metadata )
return nil
}
func (d * EncryptedVolumeDriver ) initEncryption (path string ) error {
// Create encrypted filesystem
cmd := exec .Command ("cryptsetup" , "luksFormat" ,
fmt .Sprintf ("%s/volume.img" , path ),
"--key-file" , "/dev/stdin" )
cmd .Stdin = bytes .NewReader (d .encryptionKey )
if err := cmd .Run (); err != nil {
return err
}
// Open encrypted device
cmd = exec .Command ("cryptsetup" , "open" ,
fmt .Sprintf ("%s/volume.img" , path ),
fmt .Sprintf ("crypt-%s" , filepath .Base (path )),
"--key-file" , "/dev/stdin" )
cmd .Stdin = bytes .NewReader (d .encryptionKey )
return cmd .Run ()
}
func (d * EncryptedVolumeDriver ) Mount (r * volume.MountRequest ) (* volume.MountResponse , error ) {
path := filepath .Join (d .root , r .Name )
mountpoint := filepath .Join ("/mnt" , "docker" , "volumes" , r .Name )
// Mount encrypted volume
if err := os .MkdirAll (mountpoint , 0750 ); err != nil {
return nil , err
}
cmd := exec .Command ("mount" ,
fmt .Sprintf ("/dev/mapper/crypt-%s" , r .Name ),
mountpoint )
if err := cmd .Run (); err != nil {
return nil , err
}
return & volume.MountResponse {Mountpoint : mountpoint }, nil
}
func (d * EncryptedVolumeDriver ) Path (r * volume.PathRequest ) (* volume.PathResponse , error ) {
mountpoint := filepath .Join ("/mnt" , "docker" , "volumes" , r .Name )
return & volume.PathResponse {Mountpoint : mountpoint }, nil
}
func (d * EncryptedVolumeDriver ) Unmount (r * volume.UnmountRequest ) error {
mountpoint := filepath .Join ("/mnt" , "docker" , "volumes" , r .Name )
// Unmount volume
cmd := exec .Command ("umount" , mountpoint )
if err := cmd .Run (); err != nil {
return err
}
// Close encrypted device
cmd = exec .Command ("cryptsetup" , "close" ,
fmt .Sprintf ("crypt-%s" , r .Name ))
return cmd .Run ()
}
func main () {
key := []byte (os .Getenv ("ENCRYPTION_KEY" ))
if len (key ) == 0 {
log .Fatal ("ENCRYPTION_KEY not set" )
}
d := & EncryptedVolumeDriver {
root : "/var/lib/docker-volumes" ,
encryptionKey : key ,
}
h := volume .NewHandler (d )
fmt .Println ("Starting encrypted volume plugin..." )
fmt .Println (h .ServeUnix ("encrypted_volume" , 0 ))
}
# Build and install plugin
go build -o encrypted-volume-plugin ./volume-plugin
mkdir -p /usr/lib/docker/plugins/encrypted-volume
cp encrypted-volume-plugin /usr/lib/docker/plugins/encrypted-volume/
# Create plugin config
cat > /etc/docker/plugins/encrypted-volume.spec << EOF
{
"Name": "encrypted-volume",
"Addr": "/run/docker/plugins/encrypted-volume.sock"
}
EOF
# Start plugin
ENCRYPTION_KEY=" my-secret-key" /usr/lib/docker/plugins/encrypted-volume/encrypted-volume-plugin &
# Use plugin
docker volume create -d encrypted-volume --name secure-data
docker run -v secure-data:/data ubuntu ls /data
Network Plugin Development
// network-plugin/main.go
package main
import (
"encoding/json"
"fmt"
"net"
"os/exec"
"github.com/docker/go-plugins-helpers/network"
)
type OverlayNetworkDriver struct {
networks map [string ]* networkNetwork
}
type networkNetwork struct {
id string
ipamData * network.IPAMData
vxlanID int
bridgeName string
}
func (d * OverlayNetworkDriver ) GetCapabilities () (* network.CapabilitiesResponse , error ) {
return & network.CapabilitiesResponse {
Scope : network .LocalScope ,
}, nil
}
func (d * OverlayNetworkDriver ) CreateNetwork (r * network.CreateNetworkRequest ) error {
// Create VXLAN interface
vxlanID := 100 + len (d .networks )
bridgeName := fmt .Sprintf ("br-%s" , r .NetworkID [:12 ])
// Create bridge
if err := exec .Command ("ip" , "link" , "add" , bridgeName ,
"type" , "bridge" ).Run (); err != nil {
return err
}
// Create VXLAN
vxlanName := fmt .Sprintf ("vxlan-%d" , vxlanID )
if err := exec .Command ("ip" , "link" , "add" , vxlanName ,
"type" , "vxlan" , "id" , fmt .Sprintf ("%d" , vxlanID ),
"dev" , "eth0" , "dstport" , "4789" ).Run (); err != nil {
return err
}
// Connect VXLAN to bridge
if err := exec .Command ("ip" , "link" , "set" , vxlanName ,
"master" , bridgeName ).Run (); err != nil {
return err
}
// Enable interfaces
exec .Command ("ip" , "link" , "set" , bridgeName , "up" ).Run ()
exec .Command ("ip" , "link" , "set" , vxlanName , "up" ).Run ()
// Store network info
d .networks [r .NetworkID ] = & networkNetwork {
id : r .NetworkID ,
vxlanID : vxlanID ,
bridgeName : bridgeName ,
}
return nil
}
func (d * OverlayNetworkDriver ) DeleteNetwork (r * network.DeleteNetworkRequest ) error {
if netInfo , exists := d .networks [r .NetworkID ]; exists {
// Clean up interfaces
exec .Command ("ip" , "link" , "del" , netInfo .bridgeName ).Run ()
exec .Command ("ip" , "link" , "del" , fmt .Sprintf ("vxlan-%d" , netInfo .vxlanID )).Run ()
delete (d .networks , r .NetworkID )
}
return nil
}
func (d * OverlayNetworkDriver ) CreateEndpoint (r * network.CreateEndpointRequest ) (* network.CreateEndpointResponse , error ) {
vethName := fmt .Sprintf ("veth%s" , r .EndpointID [:8 ])
peerName := fmt .Sprintf ("peer%s" , r .EndpointID [:8 ])
// Create veth pair
if err := exec .Command ("ip" , "link" , "add" , vethName ,
"type" , "veth" , "peer" , "name" , peerName ).Run (); err != nil {
return nil , err
}
// Connect one end to bridge
netInfo := d .networks [r .NetworkID ]
if err := exec .Command ("ip" , "link" , "set" , peerName ,
"master" , netInfo .bridgeName ).Run (); err != nil {
return nil , err
}
// Enable interfaces
exec .Command ("ip" , "link" , "set" , vethName , "up" ).Run ()
exec .Command ("ip" , "link" , "set" , peerName , "up" ).Run ()
// Generate interface info
response := & network.CreateEndpointResponse {
Interface : & network.EndpointInterface {
MacAddress : generateMAC (),
Address : r .Interface .Address ,
},
}
return response , nil
}
func main () {
d := & OverlayNetworkDriver {
networks : make (map [string ]* networkNetwork ),
}
h := network .NewHandler (d )
fmt .Println ("Starting overlay network plugin..." )
fmt .Println (h .ServeUnix ("overlay_network" , 0 ))
}
func generateMAC () string {
hw := make (net.HardwareAddr , 6 )
hw [0 ] = 0x02
hw [1 ] = 0x42
hw [2 ] = 0xac
hw [3 ] = byte (rand .Intn (256 ))
hw [4 ] = byte (rand .Intn (256 ))
hw [5 ] = byte (rand .Intn (256 ))
return hw .String ()
}
🔒 ENTERPRISE SECURITY & COMPLIANCE
Advanced Security Policies
# Pod Security Standards (Restricted)
apiVersion : policy/v1beta1
kind : PodSecurityPolicy
metadata :
name : restricted
spec :
privileged : false
allowPrivilegeEscalation : false
requiredDropCapabilities :
- ALL
volumes :
- ' configMap'
- ' emptyDir'
- ' projected'
- ' secret'
- ' downwardAPI'
- ' persistentVolumeClaim'
hostNetwork : false
hostIPC : false
hostPID : false
runAsUser :
rule : ' MustRunAsNonRoot'
seLinux :
rule : ' RunAsAny'
supplementalGroups :
rule : ' MustRunAs'
ranges :
- min : 1
max : 65535
fsGroup :
rule : ' MustRunAs'
ranges :
- min : 1
max : 65535
readOnlyRootFilesystem : true
---
# Image Policy Webhook
apiVersion : imagepolicy.k8s.io/v1alpha1
kind : ImageReview
metadata :
name : image-policy
spec :
containers :
- image : nginx:latest
namespace : default
annotations :
image-policy.kyverno.io/require-digest : " true"
image-policy.kyverno.io/deny-latest-tag : " true"
---
# Kyverno Policy
apiVersion : kyverno.io/v1
kind : ClusterPolicy
metadata :
name : require-pod-requests-limits
spec :
validationFailureAction : enforce
rules :
- name : validate-resources
match :
resources :
kinds :
- Pod
validate :
message : " CPU and memory resource requests and limits are required"
pattern :
spec :
containers :
- resources :
requests :
memory : " ?*"
cpu : " ?*"
limits :
memory : " ?*"
cpu : " ?*"
- name : require-ro-rootfs
match :
resources :
kinds :
- Pod
validate :
message : " Root filesystem must be read-only"
pattern :
spec :
containers :
- securityContext :
readOnlyRootFilesystem : true
# Docker Content Trust with Notary
export DOCKER_CONTENT_TRUST=1
export DOCKER_CONTENT_TRUST_SERVER=https://notary.docker.io
# Sign and push
docker trust sign myregistry/myapp:v1.0.0
# Manage keys
docker trust key generate alice
docker trust signer add --key alice.pub alice myregistry/myapp
# Verify
docker trust inspect myregistry/myapp:v1.0.0 --pretty
---
# SLSA Provenance
apiVersion : intoto.dev/v1alpha1
kind : Provenance
metadata :
name : myapp-v1.0.0
spec :
builder :
id : https://github.com/actions/runner
buildType : https://github.com/actions/docker-build-push@v1
invocation :
configSource :
uri : git+https://github.com/company/myapp@refs/tags/v1.0.0
entryPoint : .github/workflows/build.yml
materials :
- uri : git+https://github.com/company/myapp@refs/tags/v1.0.0
- uri : docker://node:16-alpine
metadata :
buildStartedOn : " 2024-01-01T10:00:00Z"
buildFinishedOn : " 2024-01-01T10:05:00Z"
completeness :
arguments : true
environment : true
reproducible : false
# Trivy Operator
apiVersion : aquasecurity.github.io/v1alpha1
kind : TrivyReport
metadata :
name : deployment-scan
spec :
scanJob :
tolerations :
- key : " node-role.kubernetes.io/master"
operator : " Exists"
priorityClassName : " system-cluster-critical"
report :
format : " json"
severity : " CRITICAL,HIGH"
ignoreUnfixed : true
---
# Grafeas Integration
{
" name " : " projects/myproject/notes/myapp-v1" ,
" kind " : " VULNERABILITY" ,
" vulnerability " : {
" cvssScore " : 7.5,
" severity " : " HIGH" ,
" details " : " Critical vulnerability in base image" ,
" cve " : " CVE-2024-12345" ,
" packageIssue " : [
{
" affectedLocation " : {
" cpeUri " : " cpe:/o:debian:debian_linux:11" ,
" package " : " openssl"
},
" fixedVersion " : " 1.1.1n-0+deb11u5"
}
]
}
}
🌐 ADVANCED NETWORKING PATTERNS
# Submariner
apiVersion : submariner.io/v1alpha1
kind : Submariner
metadata :
name : submariner
spec :
version : 0.14.0
broker : k8s
brokerK8sApiServer : https://broker-cluster:6443
brokerK8sCA : /etc/broker/ca.crt
brokerK8sRemoteNamespace : submariner-broker
natEnabled : true
cableDriver : libreswan
clusterID : cluster-west
clusterCidr : 10.244.0.0/16
serviceCidr : 10.96.0.0/12
globalCidr : 169.254.0.0/16
globalnetEnabled : true
---
# ServiceExport
apiVersion : multicluster.x-k8s.io/v1alpha1
kind : ServiceExport
metadata :
name : redis
namespace : default
spec :
port :
port : 6379
protocol : TCP
---
# ServiceImport
apiVersion : multicluster.x-k8s.io/v1alpha1
kind : ServiceImport
metadata :
name : redis
namespace : default
spec :
type : ClusterSetIP
ports :
- port : 6379
protocol : TCP
// ebpf_program.c
#include <linux/bpf.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
SEC ("filter" )
int drop_high_port (struct __sk_buff * skb ) {
void * data = (void * )(long )skb -> data ;
void * data_end = (void * )(long )skb -> data_end ;
struct ethhdr * eth = data ;
if ((void * )eth + sizeof (* eth ) > data_end )
return 0 ;
if (eth -> h_proto != htons (ETH_P_IP ))
return 0 ;
struct iphdr * ip = data + sizeof (* eth );
if ((void * )ip + sizeof (* ip ) > data_end )
return 0 ;
if (ip -> protocol != IPPROTO_TCP )
return 0 ;
struct tcphdr * tcp = (void * )ip + sizeof (* ip );
if ((void * )tcp + sizeof (* tcp ) > data_end )
return 0 ;
// Block ports > 1024
if (ntohs (tcp -> dest ) > 1024 ) {
bpf_trace_printk ("Blocked port %d\\n" , ntohs (tcp -> dest ));
return -1 ; // DROP
}
return 0 ; // PASS
}
char _license [] SEC ("license" ) = "GPL" ;
# Cilium Network Policy
apiVersion : cilium.io/v2
kind : CiliumNetworkPolicy
metadata :
name : secure-app
spec :
endpointSelector :
matchLabels :
app : secure-app
ingress :
- fromEndpoints :
- matchLabels :
app : trusted-client
toPorts :
- ports :
- port : " 443"
protocol : TCP
authentication :
mode : required
kubeproxyReplacement : probe
- fromCIDR :
- 10.0.0.0/8
except :
- 10.96.0.0/12
egress :
- toFQDNs :
- matchName : " *.example.com"
toPorts :
- ports :
- port : " 443"
protocol : TCP
- toEntities :
- cluster
- toServices :
- k8sService :
namespace : kube-system
serviceName : kube-dns
⚡ PERFORMANCE ENGINEERING
// /etc/docker/daemon.json - Production tuning
{
"storage-driver" : " overlay2" ,
"storage-opts" : [
" overlay2.override_kernel_check=true" ,
" overlay2.size=20GB"
],
"log-driver" : " json-file" ,
"log-opts" : {
"max-size" : " 50m" ,
"max-file" : " 5" ,
"compress" : " true"
},
"max-concurrent-downloads" : 10 ,
"max-concurrent-uploads" : 10 ,
"default-ulimits" : {
"nofile" : {
"Name" : " nofile" ,
"Hard" : 1000000 ,
"Soft" : 1000000
}
},
"live-restore" : true ,
"userland-proxy" : false ,
"iptables" : true ,
"ip-forward" : true ,
"ip-masq" : true ,
"experimental" : true ,
"metrics-addr" : " 0.0.0.0:9323" ,
"cpu-rt-period" : 1000000 ,
"cpu-rt-runtime" : 950000 ,
"runtimes" : {
"nvidia" : {
"path" : " nvidia-container-runtime" ,
"runtimeArgs" : []
}
}
}
// pprof integration
package main
import (
"net/http"
_ "net/http/pprof"
"runtime"
"time"
"github.com/docker/docker/client"
)
func monitorContainerPerformance () {
cli , _ := client .NewClientWithOpts (client .FromEnv )
for {
containers , _ := cli .ContainerList (context .Background (), types.ContainerListOptions {})
for _ , container := range containers {
stats , _ := cli .ContainerStats (context .Background (), container .ID , false )
var v * types.StatsJSON
decoder := json .NewDecoder (stats .Body )
decoder .Decode (& v )
stats .Body .Close ()
// CPU analysis
cpuDelta := float64 (v .CPUStats .CPUUsage .TotalUsage - v .PreCPUStats .CPUUsage .TotalUsage )
systemDelta := float64 (v .CPUStats .SystemUsage - v .PreCPUStats .SystemUsage )
cpuPercent := (cpuDelta / systemDelta ) * float64 (len (v .CPUStats .CPUUsage .PercpuUsage )) * 100
// Memory analysis
memoryPercent := float64 (v .MemoryStats .Usage ) / float64 (v .MemoryStats .Limit ) * 100
// Block I/O analysis
var blkRead , blkWrite uint64
for _ , bioEntry := range v .BlkioStats .IoServiceBytesRecursive {
switch bioEntry .Op {
case "Read" :
blkRead = bioEntry .Value
case "Write" :
blkWrite = bioEntry .Value
}
}
// Network analysis
var netRx , netTx uint64
for _ , netEntry := range v .Networks {
netRx += netEntry .RxBytes
netTx += netEntry .TxBytes
}
// Store metrics in time-series DB
storeMetrics (container .ID , cpuPercent , memoryPercent , blkRead , blkWrite , netRx , netTx )
}
time .Sleep (30 * time .Second )
}
}
func main () {
go monitorContainerPerformance ()
// Start pprof server
go func () {
log .Println (http .ListenAndServe ("localhost:6060" , nil ))
}()
select {}
}
# NUMA-aware scheduling
apiVersion : v1
kind : Pod
metadata :
name : numa-optimized
spec :
containers :
- name : app
image : nginx:alpine
resources :
requests :
memory : " 2Gi"
cpu : " 2"
hugepages-2Mi : " 1Gi"
limits :
memory : " 2Gi"
cpu : " 2"
hugepages-2Mi : " 1Gi"
volumeMounts :
- mountPath : /dev/hugepages
name : hugepage
volumes :
- name : hugepage
emptyDir :
medium : HugePages
topologySpreadConstraints :
- maxSkew : 1
topologyKey : topology.kubernetes.io/zone
whenUnsatisfiable : DoNotSchedule
labelSelector :
matchLabels :
app : numa-optimized
affinity :
nodeAffinity :
requiredDuringSchedulingIgnoredDuringExecution :
nodeSelectorTerms :
- matchExpressions :
- key : numa-node
operator : In
values :
- " 0"
🚨 DISASTER RECOVERY & MULTI-REGION
# Global Load Balancing
apiVersion : v1
kind : Service
metadata :
name : global-app
annotations :
external-dns.alpha.kubernetes.io/hostname : app.example.com
service.beta.kubernetes.io/aws-load-balancer-ssl-cert : arn:aws:acm:us-east-1:...
spec :
type : LoadBalancer
ports :
- port : 443
targetPort : 8080
selector :
app : global-app
---
# Route53 DNS Failover
{
" Comment " : " Global load balancing with failover" ,
" Changes " : [
{
" Action " : " UPSERT" ,
" ResourceRecordSet " : {
" Name " : " app.example.com" ,
" Type " : " A" ,
" SetIdentifier " : " us-east-1" ,
" Region " : " us-east-1" ,
" AliasTarget " : {
" HostedZoneId " : " Z..." ,
" DNSName " : " us-east-1-lb.example.com" ,
" EvaluateTargetHealth " : true
},
" Failover " : " PRIMARY" ,
" HealthCheckId " : " abcd-1234"
}
},
{
" Action " : " UPSERT" ,
" ResourceRecordSet " : {
" Name " : " app.example.com" ,
" Type " : " A" ,
" SetIdentifier " : " us-west-2" ,
" Region " : " us-west-2" ,
" AliasTarget " : {
" HostedZoneId " : " Z..." ,
" DNSName " : " us-west-2-lb.example.com" ,
" EvaluateTargetHealth " : true
},
" Failover " : " SECONDARY"
}
}
]
}
Automated Disaster Recovery
# Velero Backup Schedule
apiVersion : velero.io/v1
kind : Schedule
metadata :
name : daily-backup
namespace : velero
spec :
schedule : 0 2 * * *
template :
includedNamespaces :
- default
- production
excludedResources :
- nodes
- events
- events.events.k8s.io
- backups.velero.io
- restores.velero.io
ttl : 720h0m0s
snapshotVolumes : true
storageLocation : default
volumeSnapshotLocations :
- aws-us-east-1
---
# Velero Restore
apiVersion : velero.io/v1
kind : Restore
metadata :
name : disaster-recovery
namespace : velero
spec :
backupName : daily-backup-20240101
includedNamespaces :
- production
restorePVs : true
restoreStatus :
includedResources :
- pods
- services
labelSelector :
matchLabels :
app : critical
---
# Cross-Region Replication
apiVersion : v1
kind : PersistentVolume
metadata :
name : replicated-pv
spec :
capacity :
storage : 100Gi
volumeMode : Filesystem
accessModes :
- ReadWriteMany
persistentVolumeReclaimPolicy : Retain
storageClassName : regional-dr
csi :
driver : efs.csi.aws.com
volumeHandle : fs-12345678::fsap-98765432
💰 COST OPTIMIZATION STRATEGIES
# Vertical Pod Autoscaler
apiVersion : autoscaling.k8s.io/v1
kind : VerticalPodAutoscaler
metadata :
name : app-vpa
spec :
targetRef :
apiVersion : apps/v1
kind : Deployment
name : app
updatePolicy :
updateMode : Auto
resourcePolicy :
containerPolicies :
- containerName : ' *'
minAllowed :
cpu : 100m
memory : 128Mi
maxAllowed :
cpu : 4
memory : 8Gi
controlledResources : ["cpu", "memory"]
controlledValues : RequestsAndLimits
---
# Cluster Autoscaler
apiVersion : v1
kind : ConfigMap
metadata :
name : cluster-autoscaler-status
namespace : kube-system
data :
scale-down-enabled : " true"
scale-down-delay-after-add : " 10m"
scale-down-delay-after-delete : " 10s"
scale-down-delay-after-failure : " 3m"
scale-down-unneeded-time : " 10m"
scale-down-unready-time : " 20m"
max-node-provision-time : " 15m"
---
# KubeCost Report
apiVersion : kubecost.com/v1
kind : AssetReport
metadata :
name : monthly-cost
spec :
start : " 2024-01-01T00:00:00Z"
end : " 2024-01-31T23:59:59Z"
aggregation : " namespace"
accumulate : true
# Spot Instances with Interruption Handling
apiVersion : apps/v1
kind : Deployment
metadata :
name : spot-workload
spec :
replicas : 10
selector :
matchLabels :
app : spot-workload
template :
metadata :
labels :
app : spot-workload
spec :
nodeSelector :
kubernetes.azure.com/scalesetpriority : spot
tolerations :
- key : kubernetes.azure.com/scalesetpriority
operator : Equal
value : spot
effect : NoSchedule
- key : kubernetes.azure.com/scalesetpriority
operator : Equal
value : spot
effect : NoExecute
containers :
- name : app
image : nginx:alpine
resources :
requests :
cpu : " 1"
memory : " 1Gi"
limits :
cpu : " 2"
memory : " 2Gi"
terminationGracePeriodSeconds : 30
---
# Spot Instance Handler
apiVersion : v1
kind : Pod
metadata :
name : spot-interruption-handler
spec :
serviceAccountName : spot-handler
hostNetwork : true
containers :
- name : handler
image : myregistry/spot-handler:latest
env :
- name : AWS_METADATA_URL
value : http://169.254.169.254/latest/meta-data/spot/termination-time
volumeMounts :
- name : scripts
mountPath : /scripts
volumes :
- name : scripts
configMap :
name : spot-handler-scripts
defaultMode : 0755
# Rancher Fleet
apiVersion : fleet.cattle.io/v1alpha1
kind : GitRepo
metadata :
name : fleet-management
namespace : fleet-local
spec :
repo : https://github.com/company/fleet-config
branch : main
paths :
- ./clusters
targets :
- clusterSelector :
matchLabels :
env : production
---
# Cluster API
apiVersion : cluster.x-k8s.io/v1beta1
kind : Cluster
metadata :
name : production-cluster
spec :
clusterNetwork :
pods :
cidrBlocks : ["10.244.0.0/16"]
services :
cidrBlocks : ["10.96.0.0/12"]
serviceDomain : cluster.local
controlPlaneRef :
apiVersion : controlplane.cluster.x-k8s.io/v1beta1
kind : KubeadmControlPlane
name : production-control-plane
infrastructureRef :
apiVersion : infrastructure.cluster.x-k8s.io/v1beta1
kind : AWSCluster
name : production-cluster
---
# ClusterClass
apiVersion : cluster.x-k8s.io/v1beta1
kind : ClusterClass
metadata :
name : standard-cluster
spec :
controlPlane :
ref :
apiVersion : controlplane.cluster.x-k8s.io/v1beta1
kind : KubeadmControlPlaneTemplate
name : standard-control-plane
machineInfrastructure :
ref :
kind : AWSMachineTemplate
apiVersion : infrastructure.cluster.x-k8s.io/v1beta1
name : standard-control-plane-machine
infrastructure :
ref :
apiVersion : infrastructure.cluster.x-k8s.io/v1beta1
kind : AWSClusterTemplate
name : standard-cluster
workers :
machineDeployments :
- class : standard-worker
template :
bootstrap :
ref :
apiVersion : bootstrap.cluster.x-k8s.io/v1beta1
kind : KubeadmConfigTemplate
name : standard-worker-bootstrap
infrastructure :
ref :
apiVersion : infrastructure.cluster.x-k8s.io/v1beta1
kind : AWSMachineTemplate
name : standard-worker-machine
# Global Istio Mesh
apiVersion : install.istio.io/v1alpha1
kind : IstioOperator
metadata :
name : global-mesh
spec :
profile : default
meshConfig :
enableTracing : true
accessLogFile : /dev/stdout
extensionProviders :
- name : oauth2-proxy
envoyExtAuthzGrpc :
service : oauth2-proxy.istio-system.svc.cluster.local
port : 9001
defaultConfig :
proxyMetadata :
ISTIO_META_DNS_CAPTURE : " true"
ISTIO_META_DNS_AUTO_ALLOCATE : " true"
components :
pilot :
k8s :
replicaCount : 3
env :
- name : PILOT_ENABLE_ANALYSIS
value : " true"
ingressGateways :
- name : istio-ingressgateway
enabled : true
k8s :
service :
type : LoadBalancer
ports :
- port : 80
targetPort : 8080
name : http2
- port : 443
targetPort : 8443
name : https
---
# Multi-Cluster Istio
apiVersion : networking.istio.io/v1beta1
kind : ServiceEntry
metadata :
name : cross-cluster-service
spec :
hosts :
- remote-cluster.local
addresses :
- 240.0.0.0/24
ports :
- number : 80
name : http
protocol : HTTP
resolution : STATIC
endpoints :
- address : 10.244.1.1
labels :
cluster : remote-cluster
ports :
http : 15443
---
# Global Load Balancing
apiVersion : networking.istio.io/v1beta1
kind : DestinationRule
metadata :
name : global-lb
spec :
host : myapp.global
trafficPolicy :
loadBalancer :
localityLbSetting :
enabled : true
failover :
- from : us-east
to : us-west
outlierDetection :
consecutive5xxErrors : 5
interval : 10s
baseEjectionTime : 30s
maxEjectionPercent : 100
🎓 ADVANCED MASTERY CHECKLIST
Kubernetes : Custom controllers, operators, custom schedulers
Service Mesh : Istio/Linkerd advanced policies, custom mesh components
GitOps : Advanced ArgoCD patterns, Flux v2, multi-cluster management
Serverless : Knative autoscaling, OpenFaaS at scale
Runtime Internals : containerd/runc deep knowledge, CRIU
Plugin Development : Custom volume/network/security plugins
Security : Zero-trust, supply chain security, compliance automation
Networking : eBPF, multi-cluster networking, service mesh federation
Performance : NUMA optimization, kernel tuning, profiling
Scale : Multi-region, global load balancing, disaster recovery
Leadership & Architecture
Enterprise Container Platform : Build internal platform with Kubernetes, service mesh, CI/CD
[ Global Microservices Platform : Multi-region, active-active, disaster recovery
[ Custom Container Runtime : Extend containerd/runc for specialized workloads
[ Container Security Framework : Automated compliance, vulnerability management, policy-as-code
[ Performance Optimization System : Automated resource tuning, cost optimization
[ Multi-Cloud Container Strategy : Unified management across cloud providers
[ Container-native Development Platform : Developer experience platform with remote dev environments
🏆 BEYOND DOCKER: CONTAINER ECOSYSTEM
WebAssembly (WASM) : Run WASM in containers
eBPF : Deep observability and security
Confidential Computing : Encrypted containers
Container-optimized OS : Bottlerocket, Flatcar
Edge Containers : k3s, microk8s, Akri
AI/ML Containers : Kubeflow, MLflow
Blockchain Containers : Hyperledger, Ethereum
CKA (Certified Kubernetes Administrator)
CKAD (Certified Kubernetes Application Developer)
CKS (Certified Kubernetes Security Specialist)
DCA (Docker Certified Associate)
AWS EKS , Azure AKS , Google GKE specializations
Domain
Novice
Intermediate
Advanced
Expert
Container Basics
☑️
☑️
☑️
☐
Orchestration
☑️
☑️
☐
☐
Service Mesh
☑️
☐
☐
☐
Security
☑️
☑️
☐
☐
Networking
☑️
☑️
☐
☐
Storage
☑️
☑️
☐
☐
CI/CD
☑️
☑️
☐
☐
GitOps
☑️
☐
☐
☐
Performance
☑️
☐
☐
☐
Scale
☑️
☐
☐
☐
Custom Development
☑️
☐
☐
☐
Architecture
☑️
☐
☐
☐
🐳 Complete Docker Commands Mastery Guide (A-Z)
📋 Complete Docker CLI Command Reference
Command
Description
Usage Examples
Level
A
docker attach
Attach local standard input, output, and error streams to a running container
docker attach mycontainerdocker attach --detach-keys="ctrl-d" mycontainer
⭐⭐
docker build
Build an image from a Dockerfile
docker build -t myapp:latest .docker build --no-cache -t myapp .docker build --build-arg VERSION=1.0 -t myapp .docker build -f Dockerfile.dev .
⭐
docker buildx
Extended build capabilities with BuildKit
docker buildx create --usedocker buildx build --platform linux/amd64,linux/arm64 -t myapp .docker buildx lsdocker buildx inspect
⭐⭐⭐⭐
docker builder
Manage builds
docker builder prunedocker builder prune -a -fdocker builder lsdocker builder debug
⭐⭐⭐
C
docker checkpoint
Manage checkpoints (experimental)
docker checkpoint create container checkpoint1docker checkpoint ls containerdocker checkpoint rm container checkpoint1
⭐⭐⭐⭐
docker commit
Create a new image from a container's changes
docker commit mycontainer myapp:v2docker commit -m "added nginx" -a "author" mycontainer myapp:v2
⭐⭐
docker config
Manage Swarm configs
docker config create myconfig ./config.confdocker config lsdocker config inspect myconfigdocker config rm myconfig
⭐⭐⭐
docker container
Manage containers
docker container ls -adocker container prune -fdocker container inspect mycontainerdocker container stats
⭐
docker context
Manage Docker contexts
docker context create mycontext --docker "host=tcp://192.168.1.100:2375"docker context use mycontextdocker context lsdocker context inspect
⭐⭐⭐
docker cp
Copy files/folders between container and local filesystem
docker cp mycontainer:/app/logs ./logsdocker cp ./config.json mycontainer:/app/docker cp --archive mycontainer:/data ./backup
⭐⭐
docker create
Create a new container
docker create --name myapp nginxdocker create -p 8080:80 -v ./data:/data nginxdocker create --restart always nginx
⭐
D
docker diff
Inspect changes to files or directories on a container's filesystem
docker diff mycontainerdocker diff --format "{{json .}}" mycontainer
⭐⭐
docker docker
Docker's self-command (deprecated)
docker docker --help
⭐
docker events
Get real-time events from the server
docker events --since '2024-01-01'docker events --filter 'container=myapp'docker events --format '{{json .}}'
⭐⭐⭐
docker exec
Execute a command in a running container
docker exec -it mycontainer bashdocker exec mycontainer ps auxdocker exec -d mycontainer touch /tmp/healthdocker exec --env VAR=value mycontainer app
⭐
docker export
Export a container's filesystem as a tar archive
docker export mycontainer > backup.tardocker export --output="backup.tar" mycontainer `docker export mycontainer
gzip > backup.tar.gz`
H
docker help
Get help for any command
docker help rundocker help builddocker --help
⭐
docker history
Show the history of an image
docker history nginx:latestdocker history --no-trunc myappdocker history --format "table {{.ID}}\t{{.CreatedBy}}" myapp
⭐⭐
I
docker image
Manage images
docker image ls -adocker image prune -a -fdocker image inspect nginxdocker image history myapp
⭐
docker images
List images
docker imagesdocker images -adocker images --filter "dangling=true"docker images --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}"
⭐
docker import
Import the contents from a tarball to create a filesystem image
docker import backup.tar myapp:v1docker import http://example.com/image.tgz myapp:v1 `cat backup.tar
docker import - myapp:v1`
docker info
Display system-wide information
docker infodocker info --format '{{.ServerVersion}}'docker info -f '{{json .}}'
⭐
docker inspect
Return low-level information on Docker objects
docker inspect mycontainerdocker inspect --format='{{.NetworkSettings.IPAddress}}' mycontainerdocker inspect -f '{{json .Config}}' nginx
⭐⭐
K
docker kill
Kill one or more running containers
docker kill mycontainerdocker kill $(docker ps -q)docker kill --signal SIGTERM mycontainer
⭐
L
docker load
Load an image from a tar archive or STDIN
docker load < myapp.tardocker load -i myapp.tardocker load --input backup/myapp.tar
⭐⭐
docker login
Log in to a Docker registry
docker logindocker login registry.example.com -u username -p passworddocker login --password-stdin
⭐
docker logout
Log out from a Docker registry
docker logoutdocker logout registry.example.com
⭐
docker logs
Fetch the logs of a container
docker logs mycontainerdocker logs -f --tail 100 mycontainerdocker logs --since 2024-01-01T00:00:00 mycontainerdocker logs --details mycontainer
⭐
M
docker manifest
Manage Docker image manifests and manifest lists
docker manifest inspect nginx:latestdocker manifest create myapp:multi --amend myapp:amd64 --amend myapp:arm64docker manifest push myapp:multi
⭐⭐⭐⭐
docker network
Manage networks
docker network create --subnet=172.20.0.0/16 mynetdocker network lsdocker network connect mynet mycontainerdocker network disconnect mynet mycontainerdocker network prune
⭐⭐
docker node
Manage Swarm nodes
docker node lsdocker node promote node1docker node demote node1docker node update --availability drain node1docker node inspect self
⭐⭐⭐
P
docker pause
Pause all processes within one or more containers
docker pause mycontainerdocker pause $(docker ps -q)
⭐⭐
docker plugin
Manage plugins
docker plugin install vieux/sshfsdocker plugin lsdocker plugin enable myplugindocker plugin disable myplugindocker plugin inspect myplugin
⭐⭐⭐⭐
docker port
List port mappings or a specific mapping for the container
docker port mycontainerdocker port mycontainer 80docker port mycontainer 80/tcp
⭐
docker ps
List containers
docker psdocker ps -adocker ps --filter "status=exited"docker ps --format "table {{.Names}}\t{{.Status}}"docker ps -s
⭐
docker pull
Pull an image or a repository from a registry
docker pull nginx:alpinedocker pull registry.example.com/myapp:1.0docker pull --platform linux/arm64 nginxdocker pull -a myapp
⭐
docker push
Push an image or a repository to a registry
docker push myregistry/myapp:1.0docker push --all-tags myregistry/myappdocker push myregistry/myapp:latest
⭐
docker rename
Rename a container
docker rename oldname newname
⭐
docker restart
Restart one or more containers
docker restart mycontainerdocker restart -t 30 mycontainerdocker restart $(docker ps -q)
⭐
docker rm
Remove one or more containers
docker rm mycontainerdocker rm -f mycontainerdocker rm $(docker ps -aq)docker rm -v mycontainer
⭐
docker rmi
Remove one or more images
docker rmi nginx:latestdocker rmi -f myappdocker rmi $(docker images -q)docker rmi --no-prune myapp
⭐
docker run
Run a command in a new container
docker run -d --name myapp -p 8080:80 nginxdocker run -it --rm ubuntu bashdocker run --restart always --memory="512m" nginxdocker run -v /host/data:/container/data nginxdocker run --network mynet --ip 172.20.0.10 nginx
⭐
S
docker save
Save one or more images to a tar archive (streamed to STDOUT by default)
docker save -o myapp.tar myapp:latest `docker save myapp:latest
gzip > myapp.tar.gz<br>docker save nginx alpine > images.tar`
docker search
Search Docker Hub for images
docker search nginxdocker search --filter stars=1000 --limit 10 nginxdocker search --format "table {{.Name}}\t{{.Description}}" nginx
⭐
docker secret
Manage Swarm secrets
docker secret create db_password ./password.txtdocker secret lsdocker secret inspect db_passworddocker secret rm db_password
⭐⭐⭐
docker service
Manage Swarm services
docker service create --name web --replicas 3 -p 80:80 nginxdocker service lsdocker service scale web=5docker service update --image nginx:alpine webdocker service logs webdocker service ps webdocker service rollback web
⭐⭐⭐
docker stack
Manage Swarm stacks
docker stack deploy -c docker-compose.yml myappdocker stack lsdocker stack services myappdocker stack ps myappdocker stack rm myapp
⭐⭐⭐
docker start
Start one or more stopped containers
docker start mycontainerdocker start -a -i mycontainerdocker start $(docker ps -aq -f status=exited)
⭐
docker stats
Display a live stream of container resource usage statistics
docker statsdocker stats --no-streamdocker stats --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}"docker stats $(docker ps --format={{.Names}})
⭐⭐
docker stop
Stop one or more running containers
docker stop mycontainerdocker stop -t 30 mycontainerdocker stop $(docker ps -q)
⭐
docker swarm
Manage Swarm
docker swarm init --advertise-addr 192.168.1.100docker swarm join --token TOKEN 192.168.1.100:2377docker swarm leave --forcedocker swarm update --autolock=truedocker swarm ca --rotate
⭐⭐⭐
docker system
Manage Docker system
docker system dfdocker system prune -a -f --volumesdocker system eventsdocker system infodocker system dial-stdio
⭐⭐
T
docker tag
Create a tag TARGET_IMAGE that refers to SOURCE_IMAGE
docker tag myapp:latest myregistry/myapp:1.0docker tag nginx:alpine mynginx:v1docker tag myapp myapp:staging
⭐
docker top
Display the running processes of a container
docker top mycontainerdocker top mycontainer -efdocker top mycontainer aux
⭐⭐
docker trust
Manage trust on Docker images
docker trust sign myregistry/myapp:1.0docker trust inspect --pretty myregistry/myappdocker trust key generate alicedocker trust signer add --key alice.pub alice myregistry/myappdocker trust revoke myregistry/myapp:1.0
⭐⭐⭐⭐
U
docker unpause
Unpause all processes within one or more containers
docker unpause mycontainerdocker unpause $(docker ps -q -f status=paused)
⭐⭐
docker update
Update configuration of one or more containers
docker update --cpus 2 --memory 512M mycontainerdocker update --restart always mycontainerdocker update --kernel-memory 1G mycontainer
⭐⭐
V
docker version
Show the Docker version information
docker versiondocker version --format '{{.Server.Version}}'docker version -f '{{json .}}'
⭐
docker volume
Manage volumes
docker volume create myvolumedocker volume lsdocker volume inspect myvolumedocker volume prune -fdocker volume rm myvolumedocker volume create --driver local --opt type=nfs --opt o=addr=192.168.1.100,rw --opt device=:/path nfs-volume
⭐⭐
W
docker wait
Block until one or more containers stop, then print their exit codes
docker wait mycontainerdocker wait $(docker ps -aq)
⭐⭐
📊 Command Categories by Use Case
🔵 Beginner Commands (Level ⭐)
Command
Primary Use
Frequency
docker run
Create and start containers
Daily
docker ps
List containers
Daily
docker stop
Stop containers
Daily
docker rm
Remove containers
Daily
docker rmi
Remove images
Daily
docker pull
Download images
Daily
docker push
Upload images
Daily
docker build
Build images
Daily
docker exec
Execute commands in containers
Daily
docker logs
View container logs
Daily
docker images
List images
Daily
docker cp
Copy files
Weekly
docker tag
Tag images
Weekly
docker login
Registry authentication
Weekly
docker logout
Registry logout
Weekly
docker version
Version info
Monthly
docker info
System info
Monthly
docker help
Get help
As needed
docker create
Create containers
As needed
docker start
Start containers
As needed
docker restart
Restart containers
As needed
docker port
Check port mappings
As needed
docker rename
Rename containers
Rarely
docker kill
Force stop containers
Rarely
🟡 Intermediate Commands (Level ⭐⭐)
Command
Primary Use
Frequency
docker commit
Create images from containers
Weekly
docker network
Manage networks
Weekly
docker volume
Manage volumes
Weekly
docker inspect
Detailed object inspection
Weekly
docker diff
Filesystem changes
Weekly
docker history
Image layer history
Weekly
docker export
Export containers
Monthly
docker import
Import images
Monthly
docker save
Save images
Monthly
docker load
Load images
Monthly
docker stats
Resource monitoring
Weekly
docker top
Process monitoring
Weekly
docker pause
Pause containers
Monthly
docker unpause
Unpause containers
Monthly
docker wait
Wait for containers
Monthly
docker attach
Attach to containers
Monthly
docker update
Update container config
Monthly
docker system
System management
Weekly
docker builder
Build management
Weekly
docker container
Container management
Daily
docker image
Image management
Daily
docker events
System events
As needed
🟠 Advanced Commands (Level ⭐⭐⭐)
Command
Primary Use
Frequency
docker swarm
Swarm orchestration
Weekly
docker service
Service management
Weekly
docker stack
Stack deployment
Weekly
docker node
Node management
Weekly
docker secret
Secret management
Weekly
docker config
Config management
Weekly
docker context
Context management
Weekly
docker manifest
Multi-arch images
Monthly
docker trust
Image signing
Monthly
docker plugin
Plugin management
Monthly
docker checkpoint
Container checkpointing
Rarely
🔴 Expert Commands (Level ⭐⭐⭐⭐)
Command
Primary Use
Frequency
docker buildx
Multi-platform builds
Weekly
docker manifest
Manifest lists
Weekly
docker trust
Content trust
Weekly
docker plugin
Custom plugins
Monthly
docker checkpoint
Live migration
Rarely
docker swarm ca
Swarm security
Monthly
🎯 Command Combinations & Recipes
Container Lifecycle Management
# Complete container lifecycle
docker pull nginx:alpine
docker create --name web -p 8080:80 nginx:alpine
docker start web
docker ps
docker exec -it web bash
docker stop web
docker rm web
# One-liner: run and remove after exit
docker run --rm -it ubuntu bash
# Stop and remove all containers
docker stop $( docker ps -aq) && docker rm $( docker ps -aq)
# Remove all unused resources
docker system prune -a --volumes -f
# Build, tag, push pipeline
docker build -t myapp:1.0 .
docker tag myapp:1.0 myregistry/myapp:1.0
docker push myregistry/myapp:1.0
# Save and load images for offline transfer
docker save -o myapp.tar myapp:1.0
scp myapp.tar user@remote-server:/tmp/
ssh user@remote-server " docker load -i /tmp/myapp.tar"
# Multi-architecture build
docker buildx build --platform linux/amd64,linux/arm64 -t myapp:multi --push .
# Complete container diagnostics
docker inspect mycontainer | jq ' .[0].State'
docker logs --tail 50 -f mycontainer
docker exec mycontainer ps aux
docker top mycontainer
docker stats --no-stream mycontainer
docker diff mycontainer
# Network diagnostics
docker exec mycontainer ping google.com
docker exec mycontainer curl http://localhost:80
docker port mycontainer
docker network inspect bridge
# Volume backup and restore
docker run --rm -v myvolume:/data -v $( pwd) :/backup alpine tar czf /backup/volume-backup.tar.gz -C /data .
docker run --rm -v newvolume:/data -v $( pwd) :/backup alpine tar xzf /backup/volume-backup.tar.gz -C /data
# Copy between containers
docker cp container1:/app/data/. container2:/app/data/
docker run --rm --volumes-from container1 -v $( pwd) :/backup alpine tar czf /backup/backup.tar.gz /app/data
# Initialize and manage swarm
docker swarm init --advertise-addr eth0
docker swarm join-token worker
docker node ls
docker service create --name web --replicas 3 --publish 80:80 nginx
docker service scale web=5
docker service update --image nginx:alpine web
docker stack deploy -c docker-compose.yml prod
docker stack services prod
docker stack rm prod
# Aggressive cleanup
docker stop $( docker ps -aq) 2> /dev/null
docker rm $( docker ps -aq) 2> /dev/null
docker rmi -f $( docker images -q) 2> /dev/null
docker volume prune -f
docker network prune -f
docker system prune -a --volumes -f
docker builder prune -a -f
📝 Quick Reference by Task
Task
Command
Pull image
docker pull nginx:alpine
List running containers
docker ps
List all containers
docker ps -a
Run container
docker run -d --name web -p 80:80 nginx
Run interactive container
docker run -it --rm ubuntu bash
Stop container
docker stop web
Start container
docker start web
Restart container
docker restart web
Remove container
docker rm -f web
Remove all containers
docker rm -f $(docker ps -aq)
View logs
docker logs -f web
Execute command
docker exec -it web bash
Copy to container
docker cp file.txt web:/app/
Copy from container
docker cp web:/app/logs.txt ./
Build image
docker build -t myapp:1.0 .
Tag image
docker tag myapp:1.0 myregistry/myapp:1.0
Push image
docker push myregistry/myapp:1.0
List images
docker images
Remove image
docker rmi myapp:1.0
Remove all images
docker rmi -f $(docker images -q)
Create network
docker network create mynet
List networks
docker network ls
Connect to network
docker network connect mynet web
Create volume
docker volume create myvol
List volumes
docker volume ls
Inspect object
docker inspect web
View stats
docker stats
System info
docker info
Prune system
docker system prune -a
Initialize swarm
docker swarm init
Create service
docker service create --name web -p 80:80 nginx
List services
docker service ls
Scale service
docker service scale web=5
Deploy stack
docker stack deploy -c docker-compose.yml app
Login to registry
docker login -u username -p password
Version info
docker version
🎓 Command Progression Path
docker pull → docker run → docker ps → docker stop → docker rm → docker images → docker rmi
docker build → docker tag → docker push → docker exec → docker logs → docker cp → docker inspect
Week 5-6: Networking & Storage
docker network create → docker network connect → docker volume create → docker volume ls → docker run -v
Week 7-8: Advanced Features
docker commit → docker save → docker load → docker export → docker import → docker diff → docker history
docker swarm init → docker swarm join → docker service create → docker service scale → docker stack deploy
docker buildx → docker manifest → docker trust → docker checkpoint → docker plugin