Skip to content

Commit 33d9449

Browse files
Attempt at fixing setup.
1 parent 7689eb2 commit 33d9449

File tree

4 files changed

+7
-7
lines changed

4 files changed

+7
-7
lines changed

open_instruct/vllm_utils3.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,9 +361,9 @@ def create_vllm_engines(
361361
use_hybrid_engine = pg is not None
362362
num_gpus = int(tensor_parallel_size == 1)
363363
if use_hybrid_engine and tensor_parallel_size == 1 and single_gpu_mode:
364-
# every worker will use 0.5 GPU, so that we can schedule
365-
# 2 instances on the same GPUs.
366-
num_gpus = 0.5
364+
# every worker will use 0.5/num_engines GPU, so that we can schedule
365+
# multiple instances on the same GPU while leaving 0.5 for the learner.
366+
num_gpus = 0.5 / num_engines
367367

368368
print(f"num_gpus: {num_gpus}")
369369

@@ -381,7 +381,7 @@ def create_vllm_engines(
381381
scheduling_strategy = PlacementGroupSchedulingStrategy(
382382
placement_group=pg,
383383
placement_group_capture_child_tasks=True,
384-
placement_group_bundle_index=i * tensor_parallel_size,
384+
placement_group_bundle_index=0 if single_gpu_mode else i * tensor_parallel_size,
385385
)
386386

387387
additional_kwargs = {}

scripts/train/build_image_and_launch.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ fi
3030

3131
# Install Python dependencies
3232
echo "Installing dependencies with uv..."
33-
uv sync --only-group dev
33+
uv sync
3434

3535
# Run the provided script
3636
bash $1 "$beaker_user/$image_name"

scripts/train/debug/single_gpu_integration_test.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ uv run python mason.py \
3434
--per_device_train_batch_size 1 \
3535
--num_unique_prompts_rollout 8 \
3636
--num_samples_per_prompt_rollout 4 \
37-
--model_name_or_path Qwen/Qwen3-1.7B \
37+
--model_name_or_path EleutherAI/pythia-14m \
3838
--stop_strings "</answer>" \
3939
--apply_r1_style_format_reward \
4040
--apply_verifiable_reward true \
@@ -46,6 +46,7 @@ uv run python mason.py \
4646
--deepspeed_stage 2 \
4747
--num_epochs 1 \
4848
--num_learners_per_node 1 \
49+
--vllm_num_engines 2 \
4950
--vllm_tensor_parallel_size 1 \
5051
--beta 0.01 \
5152
--seed 3 \

scripts/train/debug/single_gpu_on_beaker.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ uv run python mason.py \
4747
--with_tracking \
4848
--num_epochs 1 \
4949
--num_learners_per_node 1 \
50-
--vllm_num_engines 2 \
5150
--vllm_tensor_parallel_size 1 \
5251
--beta 0.01 \
5352
--seed 3 \

0 commit comments

Comments
 (0)