Skip to content

Commit 1792a41

Browse files
committed
feat: allow uv-less execution and fingerprint the environment
Signed-off-by: Terry Kong <[email protected]> fix Signed-off-by: Terry Kong <[email protected]> seed Signed-off-by: Terry Kong <[email protected]> init Signed-off-by: Terry Kong <[email protected]> fix Signed-off-by: Terry Kong <[email protected]> fix Signed-off-by: Terry Kong <[email protected]> fix symlink creation to use exec as opposed to symlink which doesn't work Signed-off-by: Terry Kong <[email protected]> frozen grpo Signed-off-by: Terry Kong <[email protected]> use a json fingerprint Signed-off-by: Terry Kong <[email protected]> logging level Signed-off-by: Terry Kong <[email protected]> docs update Signed-off-by: Terry Kong <[email protected]> fix up Signed-off-by: Terry Kong <[email protected]> more functional test Signed-off-by: Terry Kong <[email protected]> mermaid Signed-off-by: Terry Kong <[email protected]> nrl-force rebuild and build-v will skip fingerprint check Signed-off-by: Terry Kong <[email protected]> fix tests Signed-off-by: Terry Kong <[email protected]> explain the local development Signed-off-by: Terry Kong <[email protected]> fix ci container Signed-off-by: Terry Kong <[email protected]> safe.directory fix Signed-off-by: Terry Kong <[email protected]>
1 parent c32778d commit 1792a41

File tree

16 files changed

+1659
-2
lines changed

16 files changed

+1659
-2
lines changed

.github/workflows/cicd-main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ jobs:
204204
image-name: nemo_rl_container
205205
dockerfile: docker/Dockerfile
206206
image-label: nemo-rl
207-
target: hermetic
207+
target: release
208208
build-contexts: |
209209
nemo-rl=${{ github.run_id }}/
210210
build-args: |

docker/Dockerfile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ ARG NRL_GIT_REF=main
1010
ADD --keep-git-dir=true https://github.com/NVIDIA-NeMo/RL.git#${NRL_GIT_REF} /
1111

1212
FROM ${BASE_IMAGE} AS base
13+
# An environment variable to indicate that we are in a container.
14+
ENV NRL_CONTAINER=1
1315

1416
# It is more convenient for users to run as root
1517
USER root
@@ -76,10 +78,13 @@ ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"
7678

7779
# First copy only the dependency files
7880
COPY --from=nemo-rl pyproject.toml uv.lock ./
81+
# Copy in the top level __init__.py/package_info.py since build-custom-vllm.sh needs the nemo_rl package to exist.
82+
COPY --from=nemo-rl nemo_rl/__init__.py nemo_rl/package_info.py ./nemo_rl/
7983
COPY --from=nemo-rl tools/build-custom-vllm.sh ./tools/build-custom-vllm.sh
8084
COPY --from=nemo-rl --link 3rdparty/ ./3rdparty/
8185

8286
RUN <<"EOF" bash -exu
87+
uv venv --seed
8388
if [[ -n "${BUILD_CUSTOM_VLLM:-}" ]]; then
8489
bash tools/build-custom-vllm.sh
8590
source 3rdparty/vllm/nemo-rl.env
@@ -124,3 +129,7 @@ COPY --from=nemo-rl . /opt/nemo-rl
124129
RUN git rev-parse --is-shallow-repository | grep -q true && git fetch --unshallow || true
125130
RUN UV_LINK_MODE=symlink uv run nemo_rl/utils/prefetch_venvs.py
126131

132+
# Generate container fingerprint for frozen environment support
133+
# Store outside /opt/nemo-rl to avoid being overwritten by user mounts
134+
RUN python tools/generate_fingerprint.py > /opt/nemo_rl_container_fingerprint
135+

docs/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
"tasklist", # Adds support for GitHub-style task lists with [ ] and [x]
6767
]
6868
myst_heading_anchors = 5 # Generates anchor links for headings up to level 5
69+
myst_fence_as_directive = ["mermaid"] # Treat ```mermaid blocks as directives
6970

7071
# -- Options for Autodoc2 ---------------------------------------------------
7172
sys.path.insert(0, os.path.abspath(".."))

docs/design-docs/dependency-management.md

Lines changed: 343 additions & 0 deletions
Large diffs are not rendered by default.

docs/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ design-docs/design-and-philosophy.md
242242
design-docs/padding.md
243243
design-docs/logger.md
244244
design-docs/uv.md
245+
design-docs/dependency-management.md
245246
design-docs/chat-datasets.md
246247
design-docs/generation.md
247248
design-docs/checkpointing.md

nemo_rl/__init__.py

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
import sys
1717
from pathlib import Path
1818

19+
# Configure logging to show file location for warnings
20+
logging.basicConfig(
21+
format="%(levelname)s:%(name)s:%(filename)s:%(lineno)d: %(message)s",
22+
level=logging.WARNING,
23+
)
24+
1925
"""
2026
This is a work around to ensure whenever NeMo RL is imported, that we
2127
add Megatron-LM to the python path. This is because the only sub-package
@@ -49,6 +55,159 @@
4955
os.environ["RAY_ENABLE_UV_RUN_RUNTIME_ENV"] = "0"
5056

5157

58+
def _is_build_isolation():
59+
"""Detect if we're running in a uv build isolation environment.
60+
61+
When running uv lock/sync, uv creates a temporary isolated environment
62+
in ~/.cache/uv/builds-v*/ to build packages and introspect metadata.
63+
We skip the fingerprint check in this context since the user is updating dependencies.
64+
65+
Returns True if in build isolation, False otherwise.
66+
"""
67+
# Check if we're in uv's build isolation directory
68+
# uv always uses paths like: /root/.cache/uv/builds-v0/.tmp*/
69+
return "/builds-v" in sys.prefix
70+
71+
72+
def _check_container_fingerprint():
73+
"""Check if container dependencies match the current code (container-only).
74+
75+
This check only runs when NRL_CONTAINER=1 is set (inside containers).
76+
It compares the container's fingerprint (computed at build time) with
77+
the current code's fingerprint to detect dependency drift.
78+
79+
This check is also skipped entirely if NRL_FORCE_REBUILD_VENVS=true is set,
80+
since environment rebuilding will ensure dependencies are consistent regardless
81+
of a mismatch.
82+
83+
If there's a mismatch, raises RuntimeError unless NRL_IGNORE_VERSION_MISMATCH is set.
84+
"""
85+
# Skip check if not in container or if we're going to force venv rebuild anyway
86+
if not os.environ.get("NRL_CONTAINER"):
87+
return
88+
if os.environ.get("NRL_FORCE_REBUILD_VENVS", "").lower() == "true":
89+
logging.info(
90+
"Skipping container fingerprint check because NRL_FORCE_REBUILD_VENVS=true (venvs will be rebuilt anyway)"
91+
)
92+
return
93+
94+
# Skip check if we're in a build isolation environment (e.g., during uv lock/sync)
95+
if _is_build_isolation():
96+
logging.debug(
97+
"Skipping container fingerprint check because we're in a build isolation environment"
98+
)
99+
return
100+
101+
try:
102+
import json
103+
import runpy
104+
import sys
105+
from io import StringIO
106+
107+
# Get repo root (relative to this module)
108+
repo_root = Path(__file__).parent.parent
109+
fingerprint_script = repo_root / "tools" / "generate_fingerprint.py"
110+
111+
# Check if script exists
112+
if not fingerprint_script.exists():
113+
logging.warning(
114+
f"Fingerprint script not found at {fingerprint_script}, skipping version check"
115+
)
116+
return
117+
118+
# Compute current code fingerprint using runpy (cleaner than subprocess)
119+
old_stdout = sys.stdout
120+
sys.stdout = captured_output = StringIO()
121+
try:
122+
runpy.run_path(str(fingerprint_script), run_name="__main__")
123+
current_fingerprint_json = captured_output.getvalue().strip()
124+
finally:
125+
sys.stdout = old_stdout
126+
127+
if not current_fingerprint_json:
128+
logging.warning("Failed to compute code fingerprint: empty output")
129+
return
130+
131+
current_fingerprint = json.loads(current_fingerprint_json)
132+
133+
# Read container fingerprint
134+
container_fingerprint_file = Path("/opt/nemo_rl_container_fingerprint")
135+
if not container_fingerprint_file.exists():
136+
logging.warning(
137+
"Container fingerprint file not found, skipping version check"
138+
)
139+
return
140+
141+
container_fingerprint = json.loads(
142+
container_fingerprint_file.read_text().strip()
143+
)
144+
145+
# Compare fingerprints and find differences
146+
all_keys = set(current_fingerprint.keys()) | set(container_fingerprint.keys())
147+
differences = []
148+
149+
for key in sorted(all_keys):
150+
current_val = current_fingerprint.get(key, "missing")
151+
container_val = container_fingerprint.get(key, "missing")
152+
153+
if current_val != container_val:
154+
differences.append(f" - {key}:")
155+
differences.append(f" Container: {container_val}")
156+
differences.append(f" Current: {current_val}")
157+
158+
if differences:
159+
diff_text = "\n".join(differences)
160+
sep_line = "\n" + ("-" * 80)
161+
warning_msg = (
162+
f"{sep_line}\n"
163+
"WARNING: Container/Code Version Mismatch Detected!\n"
164+
f"{sep_line}\n"
165+
"Your container's dependencies do not match your current code.\n"
166+
"\n"
167+
"Differences found:\n"
168+
f"{diff_text}\n"
169+
"\n"
170+
"This can lead to unexpected behavior or errors.\n"
171+
"\n"
172+
"Solutions:\n"
173+
" 1. Rebuild the container to match your code\n"
174+
" 2. Set NRL_FORCE_REBUILD_VENVS=true to rebuild virtual environments\n"
175+
" (This forces Ray workers to recreate their venvs with updated dependencies)\n"
176+
" 3. Update the container fingerprint to match your current code (for local dev):\n"
177+
" python tools/generate_fingerprint.py > /opt/nemo_rl_container_fingerprint\n"
178+
" 4. Set NRL_IGNORE_VERSION_MISMATCH=1 to bypass this check (not recommended)\n"
179+
"\n"
180+
"Learn more about dependency management:\n"
181+
" https://github.com/NVIDIA-NeMo/RL/blob/main/docs/design-docs/dependency-management.md\n"
182+
f"{sep_line}\n"
183+
)
184+
185+
# Check if user wants to ignore the mismatch
186+
if os.environ.get("NRL_IGNORE_VERSION_MISMATCH"):
187+
logging.warning(
188+
warning_msg
189+
+ "Proceeding anyway (NRL_IGNORE_VERSION_MISMATCH is set)..."
190+
)
191+
else:
192+
raise RuntimeError(
193+
warning_msg
194+
+ "To proceed anyway, set: export NRL_IGNORE_VERSION_MISMATCH=1"
195+
)
196+
else:
197+
logging.debug("Container fingerprint matches code fingerprint")
198+
199+
except RuntimeError:
200+
# Re-raise RuntimeError for version mismatches (user should see this)
201+
raise
202+
except Exception as e:
203+
# Log other errors but don't crash on version check failures
204+
logging.debug(f"Version check failed (non-fatal): {e}")
205+
206+
207+
# Perform container version check
208+
_check_container_fingerprint()
209+
210+
52211
def _patch_nsight_file():
53212
"""Patch the nsight.py file to fix the context.py_executable assignment.
54213

nemo_rl/utils/prefetch_venvs.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
import os
1415
import sys
16+
from pathlib import Path
1517

1618
from nemo_rl.distributed.ray_actor_environment_registry import (
1719
ACTOR_ENVIRONMENT_REGISTRY,
@@ -52,6 +54,100 @@ def prefetch_venvs():
5254

5355
print("\nVenv prefetching complete!")
5456

57+
# Create convenience python wrapper scripts for frozen environment support (container-only)
58+
create_frozen_environment_symlinks(venv_configs)
59+
60+
61+
def create_frozen_environment_symlinks(venv_configs):
62+
"""Create python-{ClassName} wrapper scripts in /usr/local/bin for frozen environment support.
63+
64+
Only runs in container (when NRL_CONTAINER=1 is set).
65+
66+
Args:
67+
venv_configs: Dictionary mapping py_executable to list of actor FQNs
68+
"""
69+
# Only create wrapper scripts in container
70+
if not os.environ.get("NRL_CONTAINER"):
71+
print(
72+
"\nSkipping frozen environment wrapper script creation (not in container)"
73+
)
74+
return
75+
76+
print("\nCreating frozen environment wrapper scripts...")
77+
78+
# Collect all wrapper mappings: class_name -> venv_path
79+
wrapper_mappings = {}
80+
81+
for py_executable, actor_fqns in venv_configs.items():
82+
for actor_fqn in actor_fqns:
83+
# Extract class name from FQN (last part)
84+
# e.g., "nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker" -> "MegatronPolicyWorker"
85+
class_name = actor_fqn.split(".")[-1]
86+
87+
# Get the venv path that was created
88+
try:
89+
python_path = create_local_venv(py_executable, actor_fqn)
90+
91+
# Check for collisions
92+
if class_name in wrapper_mappings:
93+
existing_path = wrapper_mappings[class_name]
94+
if existing_path != python_path:
95+
raise RuntimeError(
96+
f"Collision detected: Multiple venvs want to use name '{class_name}'\n"
97+
f" Existing: {existing_path}\n"
98+
f" New: {python_path}\n"
99+
f"This indicates two different worker classes have the same name."
100+
)
101+
else:
102+
wrapper_mappings[class_name] = python_path
103+
except Exception as e:
104+
print(f" Warning: Could not get venv path for {actor_fqn}: {e}")
105+
continue
106+
107+
# Create wrapper scripts
108+
wrapper_dir = Path("/usr/local/bin")
109+
created_wrappers = []
110+
111+
for class_name, python_path in sorted(wrapper_mappings.items()):
112+
wrapper_name = f"python-{class_name}"
113+
wrapper_path = wrapper_dir / wrapper_name
114+
115+
# Get the venv directory path (parent of bin/python)
116+
venv_path = Path(python_path).parent.parent
117+
118+
# Create wrapper script content
119+
wrapper_content = f"""#!/bin/bash
120+
VENV_PATH="{venv_path}"
121+
export VIRTUAL_ENV="$VENV_PATH"
122+
export PATH="$VENV_PATH/bin:$PATH"
123+
exec "$VENV_PATH/bin/python" "$@"
124+
"""
125+
126+
try:
127+
# Remove existing wrapper if present
128+
if wrapper_path.exists() or wrapper_path.is_symlink():
129+
wrapper_path.unlink()
130+
131+
# Write wrapper script
132+
wrapper_path.write_text(wrapper_content)
133+
134+
# Make executable
135+
wrapper_path.chmod(0o755)
136+
137+
created_wrappers.append(wrapper_name)
138+
print(f" Created: {wrapper_name} -> {python_path}")
139+
except Exception as e:
140+
print(f" Warning: Could not create wrapper script {wrapper_name}: {e}")
141+
continue
142+
143+
if created_wrappers:
144+
print(f"\nCreated {len(created_wrappers)} frozen environment wrapper scripts")
145+
print("Users can now use these python executables directly:")
146+
for name in created_wrappers:
147+
print(f" - {name}")
148+
else:
149+
print("\nNo frozen environment wrapper scripts were created")
150+
55151

56152
if __name__ == "__main__":
57153
prefetch_venvs()

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ requires-python = ">=3.12"
1717
license = { text = "Apache 2.0" }
1818
dependencies = [
1919
"setuptools",
20+
"pip", # Required for frozen environments; uv venv --seed may not reliably install pip
2021
"ninja", # for flash-attn parallel build
2122
"torch==2.8.0",
2223
"triton; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')",

tests/functional/L1_Functional_Tests_GPU.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
1919
PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
2020

2121
cd ${PROJECT_ROOT}
22+
# This test is intentionally not run with uv run --no-sync to verify that the frozen environment is working correctly.
23+
time bash ./tests/functional/grpo_frozen_env.sh
24+
time bash ./tests/functional/test_frozen_env.sh
25+
2226
time uv run --no-sync bash ./tests/functional/sft.sh
2327
time uv run --no-sync bash ./tests/functional/grpo.sh
2428
time uv run --no-sync bash ./tests/functional/grpo_async.sh

0 commit comments

Comments
 (0)