File tree Expand file tree Collapse file tree 2 files changed +36
-0
lines changed
Expand file tree Collapse file tree 2 files changed +36
-0
lines changed Original file line number Diff line number Diff line change @@ -496,6 +496,8 @@ class EvaluationRow(BaseModel):
496496 supporting both row-wise batch evaluation and trajectory-based RL evaluation.
497497 """
498498
499+ model_config = ConfigDict (extra = "allow" )
500+
499501 # Core OpenAI ChatCompletion compatible conversation data
500502 messages : List [Message ] = Field (description = "List of messages in the conversation. Also known as a trajectory." )
501503
Original file line number Diff line number Diff line change 11import json
2+ import logging
23from typing import Dict
34
45import pytest
@@ -660,3 +661,36 @@ def test_stable_hash_across_subprocess():
660661
661662 assert isinstance (child_hash , int )
662663 assert parent_hash == child_hash
664+
665+
666+ def test_evaluation_row_extra_fields ():
667+ example = {
668+ "messages" : [
669+ {"role" : "user" , "content" : "What is the capital of France?" },
670+ {"role" : "assistant" , "content" : "The capital of France is Paris." },
671+ ],
672+ "ground_truth" : "Paris" ,
673+ "evaluation_result" : {"score" : 1.0 , "reason" : "Correct" },
674+ "input_metadata" : {"model" : "gpt-4" },
675+ "eval" : {"score" : 0.5 },
676+ "eval_details" : {
677+ "score" : 0.5 ,
678+ "reason" : "Correct" ,
679+ "is_score_valid" : True ,
680+ "metrics" : {
681+ "accuracy" : {
682+ "score" : 1.0 ,
683+ "reason" : "Correct" ,
684+ "is_score_valid" : True ,
685+ },
686+ },
687+ },
688+ "extra_fields" : {
689+ "test" : "test" ,
690+ },
691+ }
692+ row = EvaluationRow (** example )
693+ dictionary = json .loads (row .model_dump_json ())
694+ assert "eval" in dictionary
695+ assert "accuracy" in dictionary ["eval_details" ]["metrics" ]
696+ assert "test" in dictionary ["extra_fields" ]
You can’t perform that action at this time.
0 commit comments