@@ -222,10 +222,10 @@ def _validate_data(self, data: List[Dict[str, Union[str, Dict[str, Any]]]]) -> N
222
222
f"got { sorted (new_keys )} "
223
223
)
224
224
225
- required_keys = {'input' , 'expected_output' }
225
+ required_keys = {'input' }
226
226
if not required_keys .issubset (first_row_keys ):
227
227
missing = required_keys - first_row_keys
228
- raise ValueError (f"Records must contain 'input' and 'expected_output' fields. Missing: { missing } " )
228
+ raise ValueError (f"Records must contain the 'input' field " )
229
229
230
230
# Validate consistency within new data
231
231
for row in data :
@@ -363,7 +363,10 @@ def _prepare_batch_payload(self, overwrite: bool) -> Dict[str, Any]:
363
363
if self ._changes ['added' ]:
364
364
insert_records = []
365
365
for record in self ._changes ['added' ]:
366
- new_record = {"input" : record ["input" ], "expected_output" : record ["expected_output" ]}
366
+ new_record = {"input" : record ["input" ]}
367
+ if record .get ("expected_output" ):
368
+ new_record ["expected_output" ] = record ["expected_output" ]
369
+
367
370
metadata = {k : v for k , v in record .items () if k not in ["input" , "expected_output" , "record_id" ]}
368
371
if metadata :
369
372
new_record ["metadata" ] = metadata
@@ -539,8 +542,10 @@ def _build_insert_record(record: Dict[str, Any]) -> Dict[str, Any]:
539
542
"""Convert an internal record representation into the *insert_records* payload format."""
540
543
new_rec = {
541
544
"input" : record ["input" ],
542
- "expected_output" : record [ "expected_output" ],
545
+
543
546
}
547
+ if record .get ("expected_output" ):
548
+ new_rec ["expected_output" ] = record ["expected_output" ]
544
549
metadata = {k : v for k , v in record .items () if k not in ["input" , "expected_output" , "record_id" ]}
545
550
if metadata :
546
551
new_rec ["metadata" ] = metadata
@@ -556,7 +561,7 @@ def _build_update_record(old: Dict[str, Any], new: Dict[str, Any]) -> Dict[str,
556
561
if old .get ("input" ) != new .get ("input" ):
557
562
upd ["input" ] = new ["input" ]
558
563
if old .get ("expected_output" ) != new .get ("expected_output" ):
559
- upd ["expected_output" ] = new [ "expected_output" ]
564
+ upd ["expected_output" ] = new . get ( "expected_output" )
560
565
# Diff metadata.
561
566
old_meta = {k : v for k , v in old .items () if k not in ["input" , "expected_output" , "record_id" ]}
562
567
new_meta = {k : v for k , v in new .items () if k not in ["input" , "expected_output" , "record_id" ]}
@@ -605,7 +610,7 @@ def _send_batch_updates(
605
610
attrs ["update_records" ] = update_records
606
611
if delete_records :
607
612
attrs ["delete_records" ] = delete_records
608
-
613
+
609
614
# Use create_new_version for first chunk, then overwrite=True for subsequent chunks
610
615
# to append to the version established by the first chunk
611
616
if idx == 0 :
@@ -657,12 +662,11 @@ def from_csv(
657
662
Dataset: A new Dataset instance containing the CSV data, structured for LLM experiments.
658
663
659
664
Raises:
660
- ValueError: If input_columns or expected_output_columns are not provided,
661
- or if the CSV is missing those columns, or if the file is empty.
665
+ ValueError: If input_columns is not provided, or if the CSV is missing those columns, or if the file is empty.
662
666
DatasetFileError: If there are issues reading the CSV file (e.g., file not found, permission error, malformed).
663
667
"""
664
- if input_columns is None or expected_output_columns is None :
665
- raise ValueError ("`input_columns` and `expected_output_columns` must be provided." )
668
+ if input_columns is None :
669
+ raise ValueError ("`input_columns` must be provided." )
666
670
667
671
data = []
668
672
try :
@@ -683,7 +687,9 @@ def from_csv(
683
687
684
688
header_columns = reader .fieldnames
685
689
missing_input_columns = [col for col in input_columns if col not in header_columns ]
686
- missing_output_columns = [col for col in expected_output_columns if col not in header_columns ]
690
+ missing_output_columns = False
691
+ if expected_output_columns is not None :
692
+ missing_output_columns = [col for col in expected_output_columns if col not in header_columns ]
687
693
688
694
if missing_input_columns :
689
695
raise ValueError (f"Input columns not found in CSV header: { missing_input_columns } " )
@@ -698,7 +704,7 @@ def from_csv(
698
704
699
705
# Determine metadata columns (all columns not used for input or expected output)
700
706
metadata_columns = [
701
- col for col in header_columns if col not in input_columns and col not in expected_output_columns
707
+ col for col in header_columns if col not in input_columns and ( expected_output_columns is not None and col not in expected_output_columns )
702
708
]
703
709
704
710
for row in rows :
@@ -713,7 +719,9 @@ def from_csv(
713
719
714
720
try :
715
721
input_data = row [input_columns [0 ]] if len (input_columns ) == 1 else {col : row [col ] for col in input_columns }
716
- expected_output_data = row [expected_output_columns [0 ]] if len (expected_output_columns ) == 1 else {col : row [col ] for col in expected_output_columns }
722
+ expected_output_data = None
723
+ if expected_output_columns is not None and len (expected_output_columns ) > 0 :
724
+ expected_output_data = row [expected_output_columns [0 ]] if len (expected_output_columns ) == 1 else {col : row [col ] for col in expected_output_columns }
717
725
718
726
metadata = {}
719
727
for col in metadata_columns :
@@ -726,13 +734,14 @@ def from_csv(
726
734
# Other errors during row processing also indicate CSV issues
727
735
raise DatasetFileError (f"Error parsing CSV file (row processing): { e } " )
728
736
729
- data .append (
730
- {
731
- "input" : input_data ,
732
- "expected_output" : expected_output_data ,
733
- ** metadata ,
734
- }
735
- )
737
+ to_append = {
738
+ "input" : input_data ,
739
+ ** metadata ,
740
+ }
741
+
742
+ if expected_output_data :
743
+ to_append ["expected_output" ] = expected_output_data
744
+ data .append (to_append )
736
745
except csv .Error as e :
737
746
# Catch CSV-specific parsing errors
738
747
raise DatasetFileError (f"Error parsing CSV file: { e } " )
0 commit comments