Lyken17 · February 12, 2026 03:05
diff --git a/gh_slime_log.txt b/gh_slime_log.txt
 root@batch-block1-3665:/home/ligengz/workspace/slime# bash scripts/run-qwen3-4B.sh

 Did not find any active Ray processes.
 + export PYTHONBUFFERED=16
 + PYTHONBUFFERED=16
 ++ nvidia-smi topo -m
 ++ grep -o 'NV[0-9][0-9]*'
 ++ wc -l
 + NVLINK_COUNT=56
 + '[' 56 -gt 0 ']'
 + HAS_NVLINK=1
 + echo 'HAS_NVLINK: 1 (detected 56 NVLink references)'
 HAS_NVLINK: 1 (detected 56 NVLink references)
 +++ dirname -- scripts/run-qwen3-4B.sh
 ++ cd -- scripts
 ++ pwd
 + SCRIPT_DIR=/home/ligengz/workspace/slime/scripts
 + source /home/ligengz/workspace/slime/scripts/models/qwen3-4B.sh
 ++ MODEL_ARGS=(--swiglu --num-layers 36 --hidden-size 2560 --ffn-hidden-size 9728 --num-attention-heads 32 --group-query-attention --num-query-groups 8 --use-rotary-position-embeddings --disable-bias-linear --normalization "RMSNorm" --norm-epsilon 1e-6 --rotary-base "${MODEL_ARGS_ROTARY_BASE:-1000000}" --vocab-size 151936 --kv-channels 128 --qk-layernorm)
 + CKPT_ARGS=(--hf-checkpoint /root/Qwen3-4B --ref-load /root/Qwen3-4B_torch_dist --load /root/Qwen3-4B_slime/ --save /root/Qwen3-4B_slime/ --save-interval 20)
 + ROLLOUT_ARGS=(--prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl --input-key prompt --label-key label --apply-chat-template --rollout-shuffle --rm-type deepscaler --num-rollout 3000 --rollout-batch-size 32 --n-samples-per-prompt 8 --rollout-max-response-len 8192 --rollout-temperature 1 --global-batch-size 256 --balance-data)
 + EVAL_ARGS=(--eval-interval 20 --eval-prompt-data aime /root/aime-2024/aime-2024.jsonl --n-samples-per-eval-prompt 16 --eval-max-response-len 16384 --eval-top-p 1)
 + PERF_ARGS=(--tensor-model-parallel-size 2 --sequence-parallel --pipeline-model-parallel-size 1 --context-parallel-size 1 --expert-model-parallel-size 1 --expert-tensor-parallel-size 1 --recompute-granularity full --recompute-method uniform --recompute-num-layers 1 --use-dynamic-batch-size --max-tokens-per-gpu 9216)
 + GRPO_ARGS=(--advantage-estimator grpo --use-kl-loss --kl-loss-coef 0.00 --kl-loss-type low_var_kl --entropy-coef 0.00 --eps-clip 0.2 --eps-clip-high 0.28)
 + OPTIMIZER_ARGS=(--optimizer adam --lr 1e-6 --lr-decay-style constant --weight-decay 0.1 --adam-beta1 0.9 --adam-beta2 0.98)
 + WANDB_ARGS=()
 + SGLANG_ARGS=(--rollout-num-gpus-per-engine 2 --sglang-mem-fraction-static 0.7)
 + MISC_ARGS=(--attention-dropout 0.0 --hidden-dropout 0.0 --accumulate-allreduce-grads-in-fp32 --attention-softmax-in-fp32 --attention-backend flash)
 + export MASTER_ADDR=127.0.0.1
 + MASTER_ADDR=127.0.0.1
 + ray start --head --node-ip-address 127.0.0.1 --num-gpus 8 --disable-usage-stats --dashboard-host=127.0.0.1 --dashboard-port=8265
 Usage stats collection is disabled.

 Local node IP: 10.49.135.76
 2026-02-11 18:31:25,545	WARNING utils.py:458 -- Detecting docker specified CPUs. In previous versions of Ray, CPU detection in containers was incorrect. Please ensure that Ray has enough CPUs allocated. As a temporary workaround to revert to the prior behavior, set `RAY_USE_MULTIPROCESSING_CPU_COUNT=1` as an env var before starting Ray. Set the env var: `RAY_DISABLE_DOCKER_CPU_WARNING=1` to mute this warning.

 --------------------
 Ray runtime started.
 --------------------

 Next steps
  To add another node to this Ray cluster, run
    ray start --address='10.49.135.76:6379'

  To connect to this Ray cluster:
    import ray
    ray.init(_node_ip_address='10.49.135.76')

  To submit a Ray job using the Ray Jobs CLI:
    RAY_API_SERVER_ADDRESS='http://127.0.0.1:8265' ray job submit --working-dir . -- python my_script.py

  See https://docs.ray.io/en/latest/cluster/running-applications/job-submission/index.html
  for more information on submitting Ray jobs to the Ray cluster.

  To terminate the Ray runtime, run
    ray stop

  To view the status of the cluster, use
    ray status

  To monitor and debug Ray, view the dashboard at
    127.0.0.1:8265

  If connection to the dashboard fails, check your firewall settings and network configuration.
 + RUNTIME_ENV_JSON='{
  "env_vars": {
    "PYTHONPATH": "/root/Megatron-LM/",
    "CUDA_DEVICE_MAX_CONNECTIONS": "1",
    "NCCL_NVLS_ENABLE": "1"
  }
 }'
 + ray job submit --address=http://127.0.0.1:8265 '--runtime-env-json={
  "env_vars": {
    "PYTHONPATH": "/root/Megatron-LM/",
    "CUDA_DEVICE_MAX_CONNECTIONS": "1",
    "NCCL_NVLS_ENABLE": "1"
  }
 }' -- python3 train.py --actor-num-nodes 1 --actor-num-gpus-per-node 8 --colocate --swiglu --num-layers 36 --hidden-size 2560 --ffn-hidden-size 9728 --num-attention-heads 32 --group-query-attention --num-query-groups 8 --use-rotary-position-embeddings --disable-bias-linear --normalization RMSNorm --norm-epsilon 1e-6 --rotary-base 1000000 --vocab-size 151936 --kv-channels 128 --qk-layernorm --hf-checkpoint /root/Qwen3-4B --ref-load /root/Qwen3-4B_torch_dist --load /root/Qwen3-4B_slime/ --save /root/Qwen3-4B_slime/ --save-interval 20 --prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl --input-key prompt --label-key label --apply-chat-template --rollout-shuffle --rm-type deepscaler --num-rollout 3000 --rollout-batch-size 32 --n-samples-per-prompt 8 --rollout-max-response-len 8192 --rollout-temperature 1 --global-batch-size 256 --balance-data --optimizer adam --lr 1e-6 --lr-decay-style constant --weight-decay 0.1 --adam-beta1 0.9 --adam-beta2 0.98 --advantage-estimator grpo --use-kl-loss --kl-loss-coef 0.00 --kl-loss-type low_var_kl --entropy-coef 0.00 --eps-clip 0.2 --eps-clip-high 0.28 --tensor-model-parallel-size 2 --sequence-parallel --pipeline-model-parallel-size 1 --context-parallel-size 1 --expert-model-parallel-size 1 --expert-tensor-parallel-size 1 --recompute-granularity full --recompute-method uniform --recompute-num-layers 1 --use-dynamic-batch-size --max-tokens-per-gpu 9216 --eval-interval 20 --eval-prompt-data aime /root/aime-2024/aime-2024.jsonl --n-samples-per-eval-prompt 16 --eval-max-response-len 16384 --eval-top-p 1 --rollout-num-gpus-per-engine 2 --sglang-mem-fraction-static 0.7 --attention-dropout 0.0 --hidden-dropout 0.0 --accumulate-allreduce-grads-in-fp32 --attention-softmax-in-fp32 --attention-backend flash
 Job submission server address: http://127.0.0.1:8265
 Traceback (most recent call last):
  File "/usr/local/bin/ray", line 7, in <module>
    sys.exit(main())
             ^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/scripts/scripts.py", line 2758, in main
    return cli()
           ^^^^^
  File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1485, in __call__
    return self.main(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1406, in main
    rv = self.invoke(ctx)
         ^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1873, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1873, in invoke
    return _process_result(sub_ctx.command.invoke(sub_ctx))
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1269, in invoke
    return ctx.invoke(self.callback, **ctx.params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 824, in invoke
    return callback(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/dashboard/modules/job/cli_utils.py", line 54, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/autoscaler/_private/cli_logger.py", line 823, in wrapper
    return f(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/dashboard/modules/job/cli.py", line 278, in submit
    job_id = client.submit_job(
             ^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ray/dashboard/modules/job/sdk.py", line 253, in submit_job
    self._raise_error(r)
  File "/usr/local/lib/python3.12/dist-packages/ray/dashboard/modules/dashboard_sdk.py", line 290, in _raise_error
    raise RuntimeError(
 RuntimeError: Request failed with status code 504: .
	root@batch-block1-3665:/home/ligengz/workspace/slime# bash scripts/run-qwen3-4B.sh

	Did not find any active Ray processes.
	+ export PYTHONBUFFERED=16
	+ PYTHONBUFFERED=16
	++ nvidia-smi topo -m
	++ grep -o 'NV[0-9][0-9]*'
	++ wc -l
	+ NVLINK_COUNT=56
	+ '[' 56 -gt 0 ']'
	+ HAS_NVLINK=1
	+ echo 'HAS_NVLINK: 1 (detected 56 NVLink references)'
	HAS_NVLINK: 1 (detected 56 NVLink references)
	+++ dirname -- scripts/run-qwen3-4B.sh
	++ cd -- scripts
	++ pwd
	+ SCRIPT_DIR=/home/ligengz/workspace/slime/scripts
	+ source /home/ligengz/workspace/slime/scripts/models/qwen3-4B.sh
	++ MODEL_ARGS=(--swiglu --num-layers 36 --hidden-size 2560 --ffn-hidden-size 9728 --num-attention-heads 32 --group-query-attention --num-query-groups 8 --use-rotary-position-embeddings --disable-bias-linear --normalization "RMSNorm" --norm-epsilon 1e-6 --rotary-base "${MODEL_ARGS_ROTARY_BASE:-1000000}" --vocab-size 151936 --kv-channels 128 --qk-layernorm)
	+ CKPT_ARGS=(--hf-checkpoint /root/Qwen3-4B --ref-load /root/Qwen3-4B_torch_dist --load /root/Qwen3-4B_slime/ --save /root/Qwen3-4B_slime/ --save-interval 20)
	+ ROLLOUT_ARGS=(--prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl --input-key prompt --label-key label --apply-chat-template --rollout-shuffle --rm-type deepscaler --num-rollout 3000 --rollout-batch-size 32 --n-samples-per-prompt 8 --rollout-max-response-len 8192 --rollout-temperature 1 --global-batch-size 256 --balance-data)
	+ EVAL_ARGS=(--eval-interval 20 --eval-prompt-data aime /root/aime-2024/aime-2024.jsonl --n-samples-per-eval-prompt 16 --eval-max-response-len 16384 --eval-top-p 1)
	+ PERF_ARGS=(--tensor-model-parallel-size 2 --sequence-parallel --pipeline-model-parallel-size 1 --context-parallel-size 1 --expert-model-parallel-size 1 --expert-tensor-parallel-size 1 --recompute-granularity full --recompute-method uniform --recompute-num-layers 1 --use-dynamic-batch-size --max-tokens-per-gpu 9216)
	+ GRPO_ARGS=(--advantage-estimator grpo --use-kl-loss --kl-loss-coef 0.00 --kl-loss-type low_var_kl --entropy-coef 0.00 --eps-clip 0.2 --eps-clip-high 0.28)
	+ OPTIMIZER_ARGS=(--optimizer adam --lr 1e-6 --lr-decay-style constant --weight-decay 0.1 --adam-beta1 0.9 --adam-beta2 0.98)
	+ WANDB_ARGS=()
	+ SGLANG_ARGS=(--rollout-num-gpus-per-engine 2 --sglang-mem-fraction-static 0.7)
	+ MISC_ARGS=(--attention-dropout 0.0 --hidden-dropout 0.0 --accumulate-allreduce-grads-in-fp32 --attention-softmax-in-fp32 --attention-backend flash)
	+ export MASTER_ADDR=127.0.0.1
	+ MASTER_ADDR=127.0.0.1
	+ ray start --head --node-ip-address 127.0.0.1 --num-gpus 8 --disable-usage-stats --dashboard-host=127.0.0.1 --dashboard-port=8265
	Usage stats collection is disabled.

	Local node IP: 10.49.135.76
	2026-02-11 18:31:25,545 WARNING utils.py:458 -- Detecting docker specified CPUs. In previous versions of Ray, CPU detection in containers was incorrect. Please ensure that Ray has enough CPUs allocated. As a temporary workaround to revert to the prior behavior, set `RAY_USE_MULTIPROCESSING_CPU_COUNT=1` as an env var before starting Ray. Set the env var: `RAY_DISABLE_DOCKER_CPU_WARNING=1` to mute this warning.

	--------------------
	Ray runtime started.
	--------------------

	Next steps
	To add another node to this Ray cluster, run
	ray start --address='10.49.135.76:6379'

	To connect to this Ray cluster:
	import ray
	ray.init(_node_ip_address='10.49.135.76')

	To submit a Ray job using the Ray Jobs CLI:
	RAY_API_SERVER_ADDRESS='http://127.0.0.1:8265' ray job submit --working-dir . -- python my_script.py

	See https://docs.ray.io/en/latest/cluster/running-applications/job-submission/index.html
	for more information on submitting Ray jobs to the Ray cluster.

	To terminate the Ray runtime, run
	ray stop

	To view the status of the cluster, use
	ray status

	To monitor and debug Ray, view the dashboard at
	127.0.0.1:8265

	If connection to the dashboard fails, check your firewall settings and network configuration.
	+ RUNTIME_ENV_JSON='{
	"env_vars": {
	"PYTHONPATH": "/root/Megatron-LM/",
	"CUDA_DEVICE_MAX_CONNECTIONS": "1",
	"NCCL_NVLS_ENABLE": "1"
	}
	}'
	+ ray job submit --address=http://127.0.0.1:8265 '--runtime-env-json={
	"env_vars": {
	"PYTHONPATH": "/root/Megatron-LM/",
	"CUDA_DEVICE_MAX_CONNECTIONS": "1",
	"NCCL_NVLS_ENABLE": "1"
	}
	}' -- python3 train.py --actor-num-nodes 1 --actor-num-gpus-per-node 8 --colocate --swiglu --num-layers 36 --hidden-size 2560 --ffn-hidden-size 9728 --num-attention-heads 32 --group-query-attention --num-query-groups 8 --use-rotary-position-embeddings --disable-bias-linear --normalization RMSNorm --norm-epsilon 1e-6 --rotary-base 1000000 --vocab-size 151936 --kv-channels 128 --qk-layernorm --hf-checkpoint /root/Qwen3-4B --ref-load /root/Qwen3-4B_torch_dist --load /root/Qwen3-4B_slime/ --save /root/Qwen3-4B_slime/ --save-interval 20 --prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl --input-key prompt --label-key label --apply-chat-template --rollout-shuffle --rm-type deepscaler --num-rollout 3000 --rollout-batch-size 32 --n-samples-per-prompt 8 --rollout-max-response-len 8192 --rollout-temperature 1 --global-batch-size 256 --balance-data --optimizer adam --lr 1e-6 --lr-decay-style constant --weight-decay 0.1 --adam-beta1 0.9 --adam-beta2 0.98 --advantage-estimator grpo --use-kl-loss --kl-loss-coef 0.00 --kl-loss-type low_var_kl --entropy-coef 0.00 --eps-clip 0.2 --eps-clip-high 0.28 --tensor-model-parallel-size 2 --sequence-parallel --pipeline-model-parallel-size 1 --context-parallel-size 1 --expert-model-parallel-size 1 --expert-tensor-parallel-size 1 --recompute-granularity full --recompute-method uniform --recompute-num-layers 1 --use-dynamic-batch-size --max-tokens-per-gpu 9216 --eval-interval 20 --eval-prompt-data aime /root/aime-2024/aime-2024.jsonl --n-samples-per-eval-prompt 16 --eval-max-response-len 16384 --eval-top-p 1 --rollout-num-gpus-per-engine 2 --sglang-mem-fraction-static 0.7 --attention-dropout 0.0 --hidden-dropout 0.0 --accumulate-allreduce-grads-in-fp32 --attention-softmax-in-fp32 --attention-backend flash
	Job submission server address: http://127.0.0.1:8265
	Traceback (most recent call last):
	File "/usr/local/bin/ray", line 7, in <module>
	sys.exit(main())
	^^^^^^
	File "/usr/local/lib/python3.12/dist-packages/ray/scripts/scripts.py", line 2758, in main
	return cli()
	^^^^^
	File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1485, in __call__
	return self.main(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1406, in main
	rv = self.invoke(ctx)
	^^^^^^^^^^^^^^^^
	File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1873, in invoke
	return _process_result(sub_ctx.command.invoke(sub_ctx))
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1873, in invoke
	return _process_result(sub_ctx.command.invoke(sub_ctx))
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 1269, in invoke
	return ctx.invoke(self.callback, **ctx.params)
	^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/usr/local/lib/python3.12/dist-packages/click/core.py", line 824, in invoke
	return callback(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^^^^^
	File "/usr/local/lib/python3.12/dist-packages/ray/dashboard/modules/job/cli_utils.py", line 54, in wrapper
	return func(args, *kwargs)
	^^^^^^^^^^^^^^^^^^^^^
	File "/usr/local/lib/python3.12/dist-packages/ray/autoscaler/_private/cli_logger.py", line 823, in wrapper
	return f(args, *kwargs)
	^^^^^^^^^^^^^^^^^^
	File "/usr/local/lib/python3.12/dist-packages/ray/dashboard/modules/job/cli.py", line 278, in submit
	job_id = client.submit_job(
	^^^^^^^^^^^^^^^^^^
	File "/usr/local/lib/python3.12/dist-packages/ray/dashboard/modules/job/sdk.py", line 253, in submit_job
	self._raise_error(r)
	File "/usr/local/lib/python3.12/dist-packages/ray/dashboard/modules/dashboard_sdk.py", line 290, in _raise_error
	raise RuntimeError(
	RuntimeError: Request failed with status code 504: .
No results found