Skip to content

Commit 7c21b5c

Browse files
cuichenxyaoyu-33liding-nv
authored andcommitted
Add Nemotron nano v2 vl (NVIDIA-NeMo#1136)
Signed-off-by: yaoyu-33 <[email protected]> Signed-off-by: Chen Cui <[email protected]> Co-authored-by: yaoyu-33 <[email protected]> Co-authored-by: Li Ding <[email protected]>
1 parent bd43592 commit 7c21b5c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+4331
-37
lines changed

.github/workflows/cicd-main.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ jobs:
222222
- script: L2_Launch_models_mistral
223223
- script: L2_Launch_models_nemotron
224224
- script: L2_Launch_models_nemotronh
225+
- script: L2_Launch_models_nemotron_vl
225226
- script: L2_Launch_models_olmoe
226227
- script: L2_Launch_models_qwen
227228
- script: L2_Launch_models_qwen_vl

examples/conversion/convert_checkpoints.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ def export_megatron_to_hf(
141141
megatron_path: str,
142142
hf_path: str,
143143
show_progress: bool = True,
144+
strict: bool = True,
144145
) -> None:
145146
"""
146147
Export a Megatron checkpoint to HuggingFace format.
@@ -175,14 +176,15 @@ def export_megatron_to_hf(
175176

176177
# For demonstration, we'll create a bridge from a known config
177178
# This would typically be extracted from the checkpoint metadata
178-
bridge = AutoBridge.from_hf_pretrained(hf_model)
179+
bridge = AutoBridge.from_hf_pretrained(hf_model, trust_remote_code=True)
179180

180181
# Export using the convenience method
181182
print("📤 Exporting to HuggingFace format...")
182183
bridge.export_ckpt(
183184
megatron_path=megatron_path,
184185
hf_path=hf_path,
185186
show_progress=show_progress,
187+
strict=strict,
186188
)
187189

188190
print(f"✅ Successfully exported model to: {hf_path}")
@@ -232,6 +234,9 @@ def main():
232234
"--hf-path", required=True, help="Directory path where the HuggingFace model will be saved"
233235
)
234236
export_parser.add_argument("--no-progress", action="store_true", help="Disable progress bar during export")
237+
export_parser.add_argument(
238+
"--not-strict", action="store_true", help="Allow source and target checkpoint to have different keys"
239+
)
235240

236241
args = parser.parse_args()
237242

@@ -254,6 +259,7 @@ def main():
254259
megatron_path=args.megatron_path,
255260
hf_path=args.hf_path,
256261
show_progress=not args.no_progress,
262+
strict=not args.not_strict,
257263
)
258264
else:
259265
raise RuntimeError(f"Unknown command: {args.command}")

examples/conversion/hf_megatron_roundtrip_multi_gpu.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def main(
6363
etp: int = 1,
6464
megatron_save_path: str | None = None,
6565
megatron_load_path: str | None = None,
66+
strict: bool = False,
6667
) -> None:
6768
"""Perform round-trip conversion between HuggingFace and Megatron-LM models on multiple GPUs."""
6869
if os.environ.get("WORLD_SIZE") is None:
@@ -151,7 +152,7 @@ def main(
151152
console.print(table)
152153
console.print(f"Saving HF-ckpt in {save_path}...")
153154

154-
bridge.save_hf_pretrained(megatron_model, save_path)
155+
bridge.save_hf_pretrained(megatron_model, save_path, strict=strict)
155156

156157
# Save in Megatron format if path is provided
157158
if megatron_save_path:
@@ -188,6 +189,7 @@ def main(
188189
default=None,
189190
help="Path to load the model in Megatron checkpoint format. If provided, model will not start from HF checkpoint.",
190191
)
192+
parser.add_argument("--not-strict", action="store_true", help="Perform loose validation during weight export")
191193
args = parser.parse_args()
192194
main(
193195
args.hf_model_id,

0 commit comments

Comments
 (0)