You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
help="Extra flags to pass to 'docker build' when validating evaluator (quoted string, e.g. \"--no-cache --pull --progress=plain\")",
395
+
metavar="",
396
+
help="Extra flags to pass to 'docker build' when validating evaluator",
467
397
)
468
398
rft_parser.add_argument(
469
399
"--docker-run-extra",
470
400
default="",
471
-
help="Extra flags to pass to 'docker run' when validating evaluator (quoted string, e.g. \"--env-file .env --memory=8g\")",
401
+
metavar="",
402
+
help="Extra flags to pass to 'docker run' when validating evaluator",
403
+
)
404
+
405
+
# Everything below has to manually be maintained, can't be auto-generated
406
+
rft_parser.add_argument(
407
+
"--source-job",
408
+
metavar="",
409
+
help="The source reinforcement fine-tuning job to copy configuration from. If other flags are set, they will override the source job's configuration.",
410
+
)
411
+
rft_parser.add_argument(
412
+
"--quiet",
413
+
action="store_true",
414
+
help="If set, only errors will be printed.",
415
+
)
416
+
skip_fields= {
417
+
"__top_level__": {
418
+
"extra_headers",
419
+
"extra_query",
420
+
"extra_body",
421
+
"timeout",
422
+
"node_count",
423
+
"display_name",
424
+
"account_id",
425
+
},
426
+
"loss_config": {"kl_beta", "method"},
427
+
"training_config": {"region", "jinja_template"},
428
+
"wandb_config": {"run_id"},
429
+
}
430
+
aliases= {
431
+
"wandb_config.api_key": ["--wandb-api-key"],
432
+
"wandb_config.project": ["--wandb-project"],
433
+
"wandb_config.entity": ["--wandb-entity"],
434
+
"wandb_config.enabled": ["--wandb"],
435
+
"reinforcement_fine_tuning_job_id": ["--job-id"],
436
+
}
437
+
help_overrides= {
438
+
"training_config.gradient_accumulation_steps": "The number of batches to accumulate gradients before updating the model parameters. The effective batch size will be batch-size multiplied by this value.",
439
+
"training_config.learning_rate_warmup_steps": "The number of learning rate warmup steps for the reinforcement fine-tuning job.",
440
+
"mcp_server": "The MCP server resource name to use for the reinforcement fine-tuning job. (Optional)",
0 commit comments