Skip to content

Instantly share code, notes, and snippets.

@jingwangsg
Created July 16, 2024 03:45
Show Gist options
  • Select an option

  • Save jingwangsg/f6719135ebd68fa730b37fe1f8884f14 to your computer and use it in GitHub Desktop.

Select an option

Save jingwangsg/f6719135ebd68fa730b37fe1f8884f14 to your computer and use it in GitHub Desktop.
fish
if status is-interactive
# Commands to run in interactive sessions can go here
end
# ================== PATH ==================
# set -x PATH
set -x AWS_ACCESS_KEY_ID wangjing01
set -x AWS_SECRET_ACCESS_KEY jm2E6gBaFZvD
set -x TZ Asia/Singapore
# ================== colored print ==================
function mount
rclone mount sail_sg:/home/aiops/wangjing/ ~/MOUNT/sail_sg/ --daemon --daemon-wait 0
rclone mount sail_my:/home/aiops/wangjing/ ~/MOUNT/sail_my/ --daemon --daemon-wait 0
end
function red_print
echo -e "\033[31m$argv\033[0m"
end
function green_print
echo -e "\033[32m$argv\033[0m"
end
function blue_print
echo -e "\033[34m$argv\033[0m"
end
# ================== other functions ==================
# https://github.com/junegunn/fzf/issues/868 implements fuzzy complete in fish
# function clia
# read -l line
# commandline -a $line
# # commandline -a
# end
# function fuzzy_complete
# complete -C | sort -u | fzf --height 40% --multi --reverse -q (commandline -t) | cut --output-delimiter ' ' -f1 | sed s/-//g | clia
# commandline -f end-of-line
# end
# bind -M insert \t fuzzy_complete
function knkill
if test "$argv[1]" = ALL
set argv
end
if test -z "$argv[1]"
set filter python
else
set filter $argv[1]
end
ps -u (whoami) --no-headers -o pid,comm= | grep -v -E "^\$|((string echo $PPID))|tmux|bash|fish" | grep -- $filter | awk '{print $1}' | xargs kill -9
end
function rl
readlink -f $argv
end
# ================ sail utility functions ================
function select_jobs
# Get a list of all job IDs
set job_ids (sailctl job list)
# Prompt the user to enter an index
echo "Enter the index of the job you want to select:"
read -l index
# Check if the entered index is valid
if test $index -ge 1 -a $index -le (count $job_ids)
# Select the job ID from the list
set selected_job_id $job_ids[$index]
# Do something with the selected job ID
echo $selected_job_id
else
echo "Invalid index. Please enter a number between 1 and "(count $job_ids)"."
end
end
function scto_my
curl -SsL https://download.sail.insea.io/quickstart.sh | bash -s -- --project generative-model --cluster tmkv-1
end
function scto_sg
curl -SsL https://download.sail.insea.io/quickstart.sh | bash -s -- --project generative-model --cluster sail-im-1
end
function scfwd
if test -z $argv
scfwd_auto
else
_scfwd $argv
end
end
function _scfwd
set -l pod_name $argv[1]
set -l port 2222
set -l remote_port 22
set -l num_args (count $argv)
switch $num_args
case 2
set port $argv[2]
case 3
set port $argv[2]
set remote_port $argv[3]
end
green_print "Forwarding port $port to pod $pod_name ..."
kubectl port-forward pod/$pod_name $port:$remote_port
end
function scfwd_auto
set -l port (get_avail_port)
set -l remote_port 22
set pod_name (knp | fzf | awk '{print $1}')
_scfwd $pod_name $port $remote_port
end
function _delete_empty_args
for arg in $argv
if test -n "$arg"
echo $arg
end
end
end
function _scssh
set -l pod_name $argv[1]
set argv[1] ""
set -l PATH_VAR "export HOME=/home/aiops/wangjing; export PATH=~/homebrew/bin/:~/miniconda3/bin:\$PATH "
set -l conda_run ""
set -l num_args (count $argv)
for i in (seq $num_args)
switch $argv[$i]
case -c --conda
set name $argv[(math $i + 1)]
set conda_run "conda run --no-capture-output -n $name"
set argv[$i] ""
set argv[(math $i + 1)] ""
end
end
set argv (_delete_empty_args $argv)
if test $num_args -gt 1
set cmd "$conda_run $argv"
green_print "ENV: $PATH_VAR"
green_print "Execute on $pod_name: $cmd"
set cmd bash -c "$PATH_VAR; $cmd"
else
set cmd bash -c "$PATH_VAR; cd \$HOME; exec /bin/bash"
green_print "Connecting to $pod_name ..."
end
kubectl exec -it $pod_name -- $cmd
end
function scssh_auto
set pod_name (knp | fzf | awk '{print $1}')
_scssh $pod_name
end
function scssh
if test -z "$argv"
scssh_auto
else
_scssh $argv
end
end
function scjc_tunnel
set -l job_name (scjc $argv | tee /dev/tty | tail -n 1)
set -l pod_name (scpod $job_name | awk '{print $1}')
green_print "Pod created: $pod_name"
scssh_tunnel $pod_name
end
function _scssh_tunnel
set -l pod_name $argv[1]
set -l tunnel_name $argv[2]
scssh $pod_name "bash /home/aiops/wangjing/WORKSPACE/sail_util/scripts/init.sh $tunnel_name"
end
function scssh_tunnel_auto
set -l pod_name (knp | fzf | awk '{print $1}')
_scssh_tunnel $pod_name $argv[1]
end
function scssh_tunnel
set -l num_args (count $argv)
if test $num_args -lt 2
set tunnel_name debug
if test $num_args -eq 1
set tunnel_name $argv[1]
end
scssh_tunnel_auto $tunnel_name
else
_scssh_tunnel $argv
end
end
function sc
sailctl $argv
end
function scj
sailctl job $argv
end
function _scjc_parse_args
set name gpu
set gpu 0
set nodes 1
set priority low
set time 16
set high_vram 0
set email 0
set tmux 0
set is_wait 0
set mount_s3 0
set config ""
set args ""
# if first argument does not start with "-", it is the name of the job
if not string match -q -- "-*" $argv[1]
set name $argv[1]
set --erase argv[1]
end
set num_argv (count $argv)
if test $num_argv -gt 0
for i in (seq $num_argv)
switch $argv[$i]
case -g --gpu
set gpu $argv[(math $i + 1)]
set argv[$i] ""
set argv[(math $i + 1)] ""
case -r --replica
set nodes $argv[(math $i + 1)]
set argv[$i] ""
set argv[(math $i + 1)] ""
case -p --priority
set priority $argv[(math $i + 1)]
set argv[$i] ""
set argv[(math $i + 1)] ""
case -a --args
# read until next flag
set args ""
set j (math $i + 1)
while test (math $num_argv + 1) -gt $j
if string match -q -- "-*" $argv[$j]
break
end
set args "$args $argv[$j]"
set argv[$j] ""
set j (math $j + 1)
end
set argv[$i] ""
case -h --highvram
set argv[$i] ""
set high_vram 1
case -t --time
set time $argv[(math $i + 1)]
set argv[$i] ""
set argv[(math $i + 1)] ""
case --tmux
set argv[$i] ""
set tmux 1
case -e --email
set argv[$i] ""
set email 1
case --config
set config $argv[(math $i + 1)]
set argv[$i] ""
set argv[(math $i + 1)] ""
case -w --wait
set argv[$i] ""
set is_wait 1
case --mount-s3
set argv[$i] ""
set mount_s3 1
end
end
end
set other_argv (_delete_empty_args $argv)
echo $name"<SEP>"$gpu"<SEP>"$nodes"<SEP>"$priority"<SEP>"$time"<SEP>"$high_vram"<SEP>"$email"<SEP>"$config"<SEP>"$tmux"<SEP>"$is_wait"<SEP>"$mount_s3"<SEP>"$args"<SEP>"
echo $other_argv
end
function scjc
set -l context (string split " " (_get_context))
set platform $context[1]
set cluster $context[2]
# get args
set -l outputs (string split "<SEP>" (_scjc_parse_args $argv))
set -l name $outputs[1]
set -l gpu $outputs[2]
set -l nodes $outputs[3]
set -l priority $outputs[4]
set -l time $outputs[5]
set -l high_vram $outputs[6]
set -l email $outputs[7]
set -l config $outputs[8]
set -l tmux $outputs[9]
set -l is_wait $outputs[10]
set -l mount_s3 $outputs[11]
set -l args $outputs[12]
set -l other_argv $outputs[13]
if test $nodes -gt 1
red_print "Creating a job with $nodes nodes leads to high priority!"
set priority high
end
set -l job_name $name"g"$gpu"r"$nodes
if test $high_vram -eq 1
set job_name $job_name"hv"
end
green_print "name\t\t$name"
green_print "gpu\t\t$gpu"
green_print "nodes\t\t$nodes"
green_print "priority\t$priority"
green_print "job_name\t$job_name"
green_print "high_vram\t$high_vram"
green_print "time\t\t$time"
green_print "email\t\t$email"
green_print "mount_s3\t$mount_s3"
green_print "config\t\t$config"
green_print "tmux\t\t$tmux"
# warning for high priority jobs
if test $priority = high
red_print "[Warning] Creating a job with high priority!"
end
# if args is empty, don't use --args for sailtctl
set sailctl_kwargs ""
if test $gpu -gt 0
set sailctl_kwargs "$sailctl_kwargs -g $gpu"
end
if test $high_vram -eq 1
set sailctl_kwargs "$sailctl_kwargs --high-vram"
end
if test -n $config
set config_dir "$HOME/WORKSPACE/sail_util/configs/sailctl"
set sailctl_kwargs "$sailctl_kwargs -f $config_dir/$config.yaml"
end
if test $email -eq 1
set email_command "~/miniconda3/bin/email -m \'Job $job_name ($cluster) is running\'"
end
if test $mount_s3 -eq 1
set sailctl_kwargs "$sailctl_kwargs --mount-s3"
end
if test $nodes -gt 1
set sailctl_kwargs "$sailctl_kwargs --image asia-docker.pkg.dev/sail-tpu-02/images/common/golden-image:12.3"
end
set cmd "expect -c '
spawn sailctl job create $job_name --debug --mount-dataset -r $nodes -p $priority $sailctl_kwargs --args
"
set sleep_seconds (math $time x 3600)
if test -z $args
set args "echo Hi"
end
set run_cmd "$args"
if test $tmux -eq 1
set run_cmd "tmux new-session -d -s main \\\"source ~/.bashrc; $args; sleep $sleep_seconds\\\" ;sleep $sleep_seconds"
end
if test $nodes -eq 1
set cmd "$cmd
expect \"Please enter your arguments (multi-line format, press Ctrl+D to finish):\"
send \"export HOME=/home/aiops/wangjing/\\r\"
send \"export PATH=~/homebrew/bin/:~/miniconda3/bin:\\\$PATH\\r\"
send \"source \\\$HOME/.bashrc\\r\"
send \"cd \\\$HOME\\r\"
send \"$email_command\\r\"
send \"$run_cmd\\r\"
send \"\004\"
"
else
set cmd "$cmd
expect \"Please enter your master command (multi-line format, press Ctrl+D to finish):\"
send \"export HOME=/home/aiops/wangjing/\\r\"
send \"export PATH=~/homebrew/bin/:~/miniconda3/bin:\\\$PATH\\r\"
send \"source \\\$HOME/.bashrc\\r\"
send \"cd \\\$HOME\\r\"
send $email_command\\r
send \"$run_cmd\\r\"
send \"\004\"
"
set cmd "$cmd
expect \"Please enter your worker command (multi-line format, press Ctrl+D to finish):\"
send \"export HOME=/home/aiops/wangjing/\\r\"
send \"export PATH=~/homebrew/bin/:~/miniconda3/bin:\\\$PATH\\r\"
send \"source \\\$HOME/.bashrc\\r\"
send \"cd \\\$HOME\\r\"
send $email_command\\r
send \"$run_cmd\\r\"
send \"\004\"
"
end
set cmd "$cmd
interact
'"
set -l create_output (eval $cmd | tee /dev/tty | tail -n 1)
set pod_status NA
set job_name (string trim -- (string split "/" $create_output)[-1]) # a \r will be appended to the job_name
# ! Deprecated
# blue_print "Waiting for pod to be Running..."
# set schedule_info_printed false
# while true
# set pod_status (knp | grep $job_name | awk '{print $6}')
# # when length larger then 0
# if test (count $pod_status) -gt 0
# # pod is created
# set pod_counts (count $pod_status)
# set pod_status (string split "\n" $pod_status | head -n 1)
# else
# if test $schedule_info_printed = false
# set schedule_info (curl https://scheduler.$cluster.insea.io/scheduler_info.txt -s)
# set cur_schedule_info (printf "%s\n" $schedule_info | grep $job_name | grep "priority_class" | awk '{print $4,$5,$17,$18,$19}')
# if [ "$cur_schedule_info" != "" ]
# red_print $cur_schedule_info
# set schedule_info_printed true
# if test $is_wait -eq 0
# # query only once
# break
# end
# end
# end
# sleep 5
# continue
# end
# # master node is running and pod_counts is equal to nodes
# if test $pod_status = Running; and test $pod_counts -eq $nodes
# set priority (knp | grep $job_name | awk '{print $4}' | head -n 1)
# if test $priority = high
# red_print "[Warning] High priority job is running!"
# end
# green_print "Ready!"
# echo $job_name
# break
# else
# sleep 3
# end
# end
end
function aws3
aws s3 --endpoint-url https://pub2.s3g.data-infra.shopee.io $argv
end
function get_avail_port
set -l port 2222
while true
nc -z localhost $port >/dev/null 2>&1
if test $status -ne 0
break
end
set port (math $port + 1)
end
echo $port
end
function scjc_fwd
set -l job_name (scjc $argv | tee /dev/tty | tail -n 1)
set -l pod_name (scpod $job_name | awk '{print $1}')
green_print "Pod created: $pod_name"
set -l port (get_avail_port)
scfwd $pod_name $port 22
end
function scpods_all
set temp1 (mktemp)
set temp2 (mktemp)
kubectl get pods -o 'custom-columns=NAME:.metadata.name,OWNER:.metadata.labels.owner,GPU:.spec.containers[0].resources.requests.nvidia\.com/gpu,READY:.status.conditions[?(@.type=="Ready")].status,PRIORITY:.spec.priorityClassName,STATUS:.status.phase,IP:.status.podIP' $argv >$temp1
kubectl get pods --sort-by=.metadata.creationTimestamp $argv | awk '{print $5}' >$temp2
paste $temp1 $temp2
rm $temp1 $temp2
end
function scpods_running
scpods_all --sort-by=.metadata.creationTimestamp --field-selector=status.phase=Running
end
function scpods
# for unknown reason, the output of scpods cannot be used for fzf
# thus, most function ends with _auto will fail
set -l result (scpods_running | tee /dev/tty)
# replace space in result with newline
set -l num_gpus (string split "\n" -- $result | awk '$3 != "<none>" && $3 != "GPU" {print $3}' | awk '{sum += $1} END {print sum}')
green_print "Total GPUs: $num_gpus"
end
function _scpod_args
set pod_name $argv[1]
kubectl get pod $pod_name -o jsonpath='{.spec.containers[0].args}'
end
function scpod_args_auto
set pod_info (scpods_running | tail -n +2 | fzf )
set pod_name (echo $pod_info | awk '{print $1}')
echo $pod_info
_scpod_args $pod_name
end
function scpod_args
if test -z "$argv"
scpod_args_auto
else
_scpod_args $argv
end
end
function knp
scpods_running | grep wangjing
end
function knp_all
scpods_all | grep wangjing
end
function scq
sailctl get quota
end
function scpod
set job $argv[1]
kubectl get pods -l job-name=$job --no-headers
end
function _get_context
set context (kubectl config current-context)
if test $context = "generative-model@sail-im-1"
echo argo sail
else
echo margo tmkv-1
end
end
function scl
set -l context (string split " " (_get_context))
set platform $context[1]
set cluster $context[2]
set jobs (sailctl job list)
set running_info (scpods_running)
set schedule_info (curl https://scheduler.$cluster.insea.io/scheduler_info.txt -s)
for job in $jobs
set pod_info (string split "\n" $running_info | grep $job | awk '{print $1,$6,$8}' | head -n 1)
if test -z $pod_info
# https://stackoverflow.com/questions/34166077/multi-line-variables-remove-new-line-character-fish
set cur_schedule_info (printf "%s\n" $schedule_info | grep $job | grep "priority_class" | awk '{print $4,$5,$17,$18,$19}')
echo $job https://$platform.sail.insea.io/applications/$job (red_print $cur_schedule_info)
else
echo $job https://$platform.sail.insea.io/applications/$job (green_print $pod_info)
end
end
end
function _sckill
set job_pattern $argv[1]
# ! sailctl job delete not working so far
# ! supposedly, sailctl should also terminate pods, but it does not
set job_names (scl | awk '{print $1}' | grep $job_pattern)
for job in $job_names
red_print "sailctl job delete --force $job"
set pod_names (knp | grep $job | awk '{print $1}')
sailctl job delete --force $job
for pod_name in $pod_names
set cmd "kubectl delete pod $pod_name -n generative-model --now=true &"
# --now=true means terminate immediately
red_print $cmd
eval $cmd
end
end
end
function sckill
set job $argv[1]
if test -z $job
sckill_auto
return
else
_sckill $job
end
end
function sckill_auto
set job ( scl | fzf | awk '{print $1}')
_sckill $job
end
function sckillall
set -l jobs (sailctl job list)
set -l force false
for arg in $argv
switch $arg
case -f --force
set force true
end
end
if test $force = false
read -P (red_print "Do you want to proceed with killing $jobs: ") choice
if test "$choice" != y
echo "Aborting killing process."
return
end
end
for job in $jobs
sckill $job
end
end
# >>> conda initialize >>>
# !! Contents within this block are managed by 'conda init' !!
if status is-interactive; and test -f /Users/SG3736/miniconda3/bin/conda
eval /Users/SG3736/miniconda3/bin/conda "shell.fish" hook $argv | source
else
if test -f "/Users/SG3736/miniconda3/etc/fish/conf.d/conda.fish"
. "/Users/SG3736/miniconda3/etc/fish/conf.d/conda.fish"
else
set -x PATH /Users/SG3736/miniconda3/bin $PATH
end
end
# <<< conda initialize <<<
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment