@@ -363,18 +363,21 @@ def _resolve_evaluator(
363363 return evaluator_id , evaluator_resource_name , selected_test_file_path , selected_test_func_name
364364
365365
366- def _resolve_and_prepare_dataset (
366+ def _resolve_dataset (
367367 project_root : str ,
368368 account_id : str ,
369- api_key : str ,
370- api_base : str ,
371369 evaluator_id : str ,
372370 args : argparse .Namespace ,
373371 selected_test_file_path : Optional [str ],
374372 selected_test_func_name : Optional [str ],
375- dry_run : bool ,
376373) -> tuple [Optional [str ], Optional [str ], Optional [str ]]:
377- """Resolve dataset id/resource and ensure dataset exists if using JSONL."""
374+ """Resolve dataset source without performing any uploads.
375+
376+ Returns a tuple of:
377+ - dataset_id: existing dataset id when using --dataset or fully-qualified dataset resource
378+ - dataset_resource: fully-qualified dataset resource for existing datasets; None for JSONL sources
379+ - dataset_jsonl: local JSONL path when using --dataset-jsonl or inferred sources; None for id-only datasets
380+ """
378381 dataset_id = getattr (args , "dataset" , None )
379382 dataset_jsonl = getattr (args , "dataset_jsonl" , None )
380383 dataset_display_name = getattr (args , "dataset_display_name" , None )
@@ -432,40 +435,72 @@ def _resolve_and_prepare_dataset(
432435 )
433436 return None , None , None
434437
435- inferred_dataset_id = _build_trimmed_dataset_id (evaluator_id )
436- if dry_run :
437- print ("--dry-run: would create dataset and upload JSONL" )
438- dataset_id = inferred_dataset_id
439- else :
440- try :
441- # Resolve dataset_jsonl path relative to CWD if needed
442- jsonl_path_for_upload = (
443- dataset_jsonl
444- if os .path .isabs (dataset_jsonl )
445- else os .path .abspath (os .path .join (project_root , dataset_jsonl ))
446- )
447- dataset_id , _ = create_dataset_from_jsonl (
448- account_id = account_id ,
449- api_key = api_key ,
450- api_base = api_base ,
451- dataset_id = inferred_dataset_id ,
452- display_name = dataset_display_name or inferred_dataset_id ,
453- jsonl_path = jsonl_path_for_upload ,
454- )
455- print (f"✓ Created and uploaded dataset: { dataset_id } " )
456- except Exception as e :
457- print (f"Error creating/uploading dataset: { e } " )
458- return None , None , None
459-
460- if not dataset_id :
461- return None , None , None
438+ # Build dataset resource for existing datasets; JSONL-based datasets will be uploaded later.
439+ dataset_resource = None
440+ if dataset_id :
441+ dataset_resource = dataset_resource_override or f"accounts/{ account_id } /datasets/{ dataset_id } "
462442
463- # Build dataset resource (prefer override when provided)
464- dataset_resource = dataset_resource_override or f"accounts/{ account_id } /datasets/{ dataset_id } "
465443 return dataset_id , dataset_resource , dataset_jsonl
466444
467445
468- def _ensure_evaluator_active (
446+ def _upload_dataset (
447+ project_root : str ,
448+ account_id : str ,
449+ api_key : str ,
450+ api_base : str ,
451+ evaluator_id : str ,
452+ dataset_id : Optional [str ],
453+ dataset_resource : Optional [str ],
454+ dataset_jsonl : Optional [str ],
455+ args : argparse .Namespace ,
456+ dry_run : bool ,
457+ ) -> tuple [Optional [str ], Optional [str ]]:
458+ """Create/upload the dataset when using a local JSONL source.
459+
460+ For existing datasets (--dataset or fully-qualified ids), this is a no-op that
461+ simply ensures dataset_id and dataset_resource are populated.
462+ """
463+ # Existing dataset case: nothing to upload
464+ if not dataset_jsonl :
465+ if not dataset_id :
466+ return None , None
467+ if not dataset_resource :
468+ dataset_resource = f"accounts/{ account_id } /datasets/{ dataset_id } "
469+ return dataset_id , dataset_resource
470+
471+ # JSONL-based dataset: upload or simulate upload
472+ inferred_dataset_id = _build_trimmed_dataset_id (evaluator_id )
473+ dataset_display_name = getattr (args , "dataset_display_name" , None ) or inferred_dataset_id
474+
475+ # Resolve dataset_jsonl path relative to CWD if needed
476+ jsonl_path_for_upload = (
477+ dataset_jsonl if os .path .isabs (dataset_jsonl ) else os .path .abspath (os .path .join (project_root , dataset_jsonl ))
478+ )
479+
480+ if dry_run :
481+ print ("--dry-run: would create dataset and upload JSONL" )
482+ dataset_id = inferred_dataset_id
483+ dataset_resource = f"accounts/{ account_id } /datasets/{ dataset_id } "
484+ return dataset_id , dataset_resource
485+
486+ try :
487+ dataset_id , _ = create_dataset_from_jsonl (
488+ account_id = account_id ,
489+ api_key = api_key ,
490+ api_base = api_base ,
491+ dataset_id = inferred_dataset_id ,
492+ display_name = dataset_display_name ,
493+ jsonl_path = jsonl_path_for_upload ,
494+ )
495+ print (f"✓ Created and uploaded dataset: { dataset_id } " )
496+ dataset_resource = f"accounts/{ account_id } /datasets/{ dataset_id } "
497+ return dataset_id , dataset_resource
498+ except Exception as e :
499+ print (f"Error creating/uploading dataset: { e } " )
500+ return None , None
501+
502+
503+ def _upload_and_ensure_evaluator (
469504 project_root : str ,
470505 evaluator_id : str ,
471506 evaluator_resource_name : str ,
@@ -726,19 +761,17 @@ def create_rft_command(args) -> int:
726761 if not evaluator_id or not evaluator_resource_name :
727762 return 1
728763
729- # 2) Resolve dataset (id/resource) and underlying JSONL (if any )
730- dataset_id , dataset_resource , dataset_jsonl = _resolve_and_prepare_dataset (
764+ # 2) Resolve dataset source (id or JSONL path )
765+ dataset_id , dataset_resource , dataset_jsonl = _resolve_dataset (
731766 project_root = project_root ,
732767 account_id = account_id ,
733- api_key = api_key ,
734- api_base = api_base ,
735768 evaluator_id = evaluator_id ,
736769 args = args ,
737770 selected_test_file_path = selected_test_file_path ,
738771 selected_test_func_name = selected_test_func_name ,
739- dry_run = dry_run ,
740772 )
741- if not dataset_id or not dataset_resource :
773+ # Require either an existing dataset id or a JSONL source to materialize from
774+ if dataset_jsonl is None and not dataset_id :
742775 return 1
743776
744777 # 3) Optional local validation
@@ -758,8 +791,24 @@ def create_rft_command(args) -> int:
758791 ):
759792 return 1
760793
761- # 4) Ensure evaluator exists and is ACTIVE (upload + poll if needed)
762- if not _ensure_evaluator_active (
794+ # 4) Upload dataset when using JSONL sources (no-op for existing datasets)
795+ dataset_id , dataset_resource = _upload_dataset (
796+ project_root = project_root ,
797+ account_id = account_id ,
798+ api_key = api_key ,
799+ api_base = api_base ,
800+ evaluator_id = evaluator_id ,
801+ dataset_id = dataset_id ,
802+ dataset_resource = dataset_resource ,
803+ dataset_jsonl = dataset_jsonl ,
804+ args = args ,
805+ dry_run = dry_run ,
806+ )
807+ if not dataset_id or not dataset_resource :
808+ return 1
809+
810+ # 5) Ensure evaluator exists and is ACTIVE (upload + poll if needed)
811+ if not _upload_and_ensure_evaluator (
763812 project_root = project_root ,
764813 evaluator_id = evaluator_id ,
765814 evaluator_resource_name = evaluator_resource_name ,
@@ -769,7 +818,7 @@ def create_rft_command(args) -> int:
769818 ):
770819 return 1
771820
772- # 5 ) Create the RFT job
821+ # 6 ) Create the RFT job
773822 return _create_rft_job (
774823 account_id = account_id ,
775824 api_key = api_key ,
0 commit comments