Skip to content

Commit 07fda02

Browse files
committed
add json schema example
1 parent 96fcc09 commit 07fda02

File tree

2 files changed

+49
-0
lines changed

2 files changed

+49
-0
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{"messages": [{"role": "user", "content": "Give me user information in the following JSON format:\n```json\n{\n \"user\": {\n \"name\": \"string\",\n \"age\": \"number\",\n \"isActive\": \"boolean\"\n },\n \"products\": [\n {\n \"id\": \"string\",\n \"price\": \"number\"\n }\n ]\n}\n```"}, {"role": "assistant", "content": "Here's the user information:\n```json\n{\n \"user\": {\n \"name\": \"John Doe\",\n \"age\": 30,\n \"isActive\": true\n },\n \"products\": [\n {\n \"id\": \"prod-1\",\n \"price\": 29.99\n }\n ]\n}\n```"}], "ground_truth": null, "evaluation_result": {"score": 1.0, "reason": "Perfect schema match", "metrics": {"schema_similarity": {"score": 1.0, "reason": "Schema similarity: 1.00", "is_score_valid": true}}}, "input_metadata": {"dataset_info": {"test_case": "exact_match", "description": "JSON content that exactly matches the expected schema", "expected_schema": {"type": "object", "properties": {"user": {"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}, "isActive": {"type": "boolean"}}}, "products": {"type": "array", "items": {"type": "object", "properties": {"id": {"type": "string"}, "price": {"type": "number"}}}}}}}}}
2+
{"messages": [{"role": "user", "content": "Give me user information in the following JSON format:\n```json\n{\n \"user\": {\n \"name\": \"string\",\n \"age\": \"number\",\n \"isActive\": \"boolean\",\n \"address\": {\n \"street\": \"string\",\n \"city\": \"string\"\n }\n }\n}\n```"}, {"role": "assistant", "content": "Here's the user information:\n```json\n{\n \"user\": {\n \"name\": \"John Doe\",\n \"age\": 30,\n \"email\": \"[email protected]\"\n }\n}\n```"}], "ground_truth": null, "evaluation_result": {"score": 0.5, "reason": "Partial schema match with missing and extra properties", "metrics": {"schema_similarity": {"score": 0.5, "reason": "Schema similarity: 0.50", "is_score_valid": false}}}, "input_metadata": {"dataset_info": {"test_case": "partial_match", "description": "JSON content with missing and extra properties compared to expected schema", "expected_schema": {"type": "object", "properties": {"user": {"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}, "isActive": {"type": "boolean"}, "address": {"type": "object", "properties": {"street": {"type": "string"}, "city": {"type": "string"}}}}}}}}}}
3+
{"messages": [{"role": "user", "content": "Give me user information in the following JSON format:\n```json\n{\n \"name\": \"string\",\n \"age\": \"number\"\n}\n```"}, {"role": "assistant", "content": "Here's the user information:\n```json\n{\n \"name\": \"John Doe\",\n \"age\": 30\n}\n```"}], "ground_truth": null, "evaluation_result": {"score": 1.0, "reason": "Perfect schema match for simple object", "metrics": {"schema_similarity": {"score": 1.0, "reason": "Schema similarity: 1.00", "is_score_valid": true}}}, "input_metadata": {"dataset_info": {"test_case": "extract_from_message", "description": "JSON extracted from message content with code block formatting", "expected_schema": {"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}}}}}}
4+
{"messages": [{"role": "user", "content": "Give me user information in the following JSON format:\n```json\n{\n \"name\": \"string\",\n \"age\": \"number\",\n \"tags\": [\"string\"]\n}\n```"}, {"role": "assistant", "content": "Here's the user information:\n```json\n{\n \"name\": \"John Doe\",\n \"age\": \"30\",\n \"tags\": {\n \"tag1\": \"value1\"\n }\n}\n```"}], "ground_truth": null, "evaluation_result": {"score": 0.33, "reason": "Type mismatches in schema validation", "metrics": {"schema_similarity": {"score": 0.33, "reason": "Schema similarity: 0.33", "is_score_valid": false}}}, "input_metadata": {"dataset_info": {"test_case": "mismatched_types", "description": "JSON content with type mismatches (string instead of number, object instead of array)", "expected_schema": {"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}, "tags": {"type": "array", "items": {"type": "string"}}}}}}}
5+
{"messages": [{"role": "user", "content": "Give me user information in the following JSON format:\n```json\n{\n \"name\": \"string\",\n \"age\": \"number\"\n}\n```"}, {"role": "assistant", "content": "Here's the user information:\n```json\n{\n \"name\": \"John Doe\",\n \"age\": 30\n}\n```"}], "ground_truth": null, "evaluation_result": {"score": 1.0, "reason": "Perfect schema match using JSON string input", "metrics": {"schema_similarity": {"score": 1.0, "reason": "Schema similarity: 1.00", "is_score_valid": true}}}, "input_metadata": {"dataset_info": {"test_case": "json_string_input", "description": "JSON schema validation using JSON string instead of object", "expected_schema": {"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}}}}}}
6+
{"messages": [{"role": "user", "content": "Give me an empty object in the following JSON format:\n```json\n{}\n```"}, {"role": "assistant", "content": "Here's an empty object:\n```json\n{}\n```"}], "ground_truth": null, "evaluation_result": {"score": 1.0, "reason": "Perfect match for empty object schema", "metrics": {"schema_similarity": {"score": 1.0, "reason": "Schema similarity: 1.00", "is_score_valid": true}}}, "input_metadata": {"dataset_info": {"test_case": "empty_properties", "description": "JSON schema validation with empty object properties", "expected_schema": {"type": "object", "properties": {}}}}}
7+
{"messages": [{"role": "user", "content": "Give me user information in the following JSON format:\n```json\n{\n \"name\": \"string\",\n \"age\": \"number\"\n}\n```"}, {"role": "assistant", "content": "Here's the user information:\n```json\n{\n \"name\": \"John Doe\",\n \"age\": \n}\n```"}], "ground_truth": null, "evaluation_result": {"score": 0.0, "reason": "Invalid JSON content", "metrics": {"error": {"score": 0.0, "reason": "Invalid JSON content: Here's the user information:\n```json\n{\n \"name\": \"John Doe\",\n \"age\": \n}\n```", "is_score_valid": false}}}, "input_metadata": {"dataset_info": {"test_case": "invalid_json", "description": "JSON schema validation with invalid JSON syntax", "expected_schema": {"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}}}}}}
8+
{"messages": [{"role": "user", "content": "Give me user information in the following JSON format:\n```json\n{\n \"name\": \"string\",\n \"age\": \"number\"\n}\n```\n\nPlease provide a user object with name and age."}, {"role": "assistant", "content": "Here's the user information:\n```json\n{\n \"name\": \"John Doe\",\n \"age\": 30\n}\n```"}], "ground_truth": null, "evaluation_result": {"score": 0.925, "reason": "Combined schema and LLM evaluation", "metrics": {"schema_similarity": {"score": 1.0, "reason": "Schema similarity: 1.00\nMatching properties (2):\n - age: number\n - name: string", "is_score_valid": true}, "llm_judge": {"score": 0.85, "reason": "This is a good JSON structure that matches the expected schema. The data is well-formatted and contains the required fields with appropriate types.", "is_score_valid": false}, "schema_score": {"score": 1.0, "reason": "Schema validation score: 1.00", "is_score_valid": true}, "llm_score": {"score": 0.85, "reason": "LLM judge score: 0.85", "is_score_valid": false}, "weights": {"score": 0.0, "reason": "Weights used - Schema: 0.7, LLM: 0.3", "is_score_valid": true}}}, "input_metadata": {"dataset_info": {"test_case": "llm_judge_integration", "description": "Combined schema validation and LLM judgment evaluation", "expected_schema": {"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}}}, "expected_behavior": "Provide a user object with name and age"}}}
9+
{"messages": [{"role": "user", "content": "Give me user information in the following JSON format:\n```json\n{\n \"name\": \"string\",\n \"age\": \"number\"\n}\n```\n\nPlease provide a user object with name and age."}, {"role": "assistant", "content": "Here's the user information:\n```json\n{\n \"name\": \"John Doe\",\n \"age\": 30\n}\n```"}], "ground_truth": null, "evaluation_result": {"score": 0.72, "reason": "Combined evaluation with custom weights", "metrics": {"schema_similarity": {"score": 1.0, "reason": "Schema similarity: 1.00\nMatching properties (2):\n - age: number\n - name: string", "is_score_valid": true}, "llm_judge": {"score": 0.6, "reason": "This JSON is acceptable but has some issues. The structure is correct but could be more comprehensive.", "is_score_valid": false}, "schema_score": {"score": 1.0, "reason": "Schema validation score: 1.00", "is_score_valid": true}, "llm_score": {"score": 0.6, "reason": "LLM judge score: 0.6", "is_score_valid": false}, "weights": {"score": 0.0, "reason": "Weights used - Schema: 0.3, LLM: 0.7", "is_score_valid": true}}}, "input_metadata": {"dataset_info": {"test_case": "custom_weights", "description": "Combined evaluation with custom weights (schema=0.3, llm=0.7)", "expected_schema": {"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}}}, "expected_behavior": "Provide a user object with name and age"}}}
10+
{"messages": [{"role": "user", "content": "Create a complex nested object in the following JSON format:\n```json\n{\n \"company\": {\n \"name\": \"string\",\n \"employees\": [\n {\n \"id\": \"string\",\n \"name\": \"string\",\n \"department\": {\n \"name\": \"string\",\n \"location\": \"string\",\n \"code\": \"string\"\n },\n \"salary\": \"number\",\n \"skills\": [\"string\"]\n }\n ],\n \"metadata\": {\n \"founded\": \"number\",\n \"isPublic\": \"boolean\",\n \"tags\": [\"string\"]\n }\n }\n}\n```"}, {"role": "assistant", "content": "Here's a complex nested object:\n```json\n{\n \"company\": {\n \"name\": \"Tech Corp\",\n \"employees\": [\n {\n \"id\": \"emp-001\",\n \"name\": \"Alice Johnson\",\n \"department\": {\n \"name\": \"Engineering\",\n \"location\": \"Building A\",\n \"extra_field\": \"some value\"\n },\n \"skills\": [\"Python\", \"JavaScript\", \"SQL\"]\n },\n {\n \"id\": \"emp-002\",\n \"name\": \"Bob Smith\",\n \"department\": {\n \"name\": \"Marketing\",\n \"location\": \"Building B\"\n },\n \"skills\": [\"Content Writing\", \"SEO\"]\n }\n ],\n \"metadata\": {\n \"founded\": 2010,\n \"isPublic\": false,\n \"tags\": [\"technology\", \"startup\"]\n }\n }\n}\n```"}], "ground_truth": null, "evaluation_result": {"score": 0.8, "reason": "Complex nested schema with some mismatches", "metrics": {"schema_similarity": {"score": 0.8, "reason": "Schema similarity: 0.80", "is_score_valid": false}}}, "input_metadata": {"dataset_info": {"test_case": "complex_nested", "description": "Complex nested JSON object with arrays and objects", "expected_schema": {"type": "object", "properties": {"company": {"type": "object", "properties": {"name": {"type": "string"}, "employees": {"type": "array", "items": {"type": "object", "properties": {"id": {"type": "string"}, "name": {"type": "string"}, "department": {"type": "object", "properties": {"name": {"type": "string"}, "location": {"type": "string"}, "code": {"type": "string"}}}, "salary": {"type": "number"}, "skills": {"type": "array", "items": {"type": "string"}}}}}}}, "metadata": {"type": "object", "properties": {"founded": {"type": "number"}, "isPublic": {"type": "boolean"}, "tags": {"type": "array", "items": {"type": "string"}}}}}}}}}
11+
{"messages": [{"role": "user", "content": "Provide an array of simple objects in the following JSON format:\n```json\n[\n {\n \"id\": \"number\",\n \"name\": \"string\",\n \"active\": \"boolean\"\n }\n]\n```"}, {"role": "assistant", "content": "Here's an array of simple objects:\n```json\n[\n {\n \"id\": 1,\n \"name\": \"Item One\",\n \"active\": true\n },\n {\n \"id\": 2,\n \"name\": \"Item Two\",\n \"active\": false\n },\n {\n \"id\": 3,\n \"name\": \"Item Three\",\n \"active\": true\n }\n]\n```"}], "ground_truth": null, "evaluation_result": {"score": 1.0, "reason": "Perfect array schema match", "metrics": {"schema_similarity": {"score": 1.0, "reason": "Schema similarity: 1.00", "is_score_valid": true}}}, "input_metadata": {"dataset_info": {"test_case": "array_of_objects", "description": "JSON array containing simple objects with consistent structure", "expected_schema": {"type": "array", "items": {"type": "object", "properties": {"id": {"type": "number"}, "name": {"type": "string"}, "active": {"type": "boolean"}}}}}}}
12+
{"messages": [{"role": "user", "content": "Give me a mixed data structure in the following JSON format:\n```json\n{\n \"string_field\": \"string\",\n \"number_field\": \"number\",\n \"boolean_field\": \"boolean\",\n \"null_field\": \"null\",\n \"array_field\": [\"number\"],\n \"object_field\": {\n \"nested_string\": \"string\",\n \"nested_number\": \"number\"\n },\n \"date_field\": \"string\",\n \"enum_field\": \"string\"\n}\n```"}, {"role": "assistant", "content": "Here's a mixed data structure:\n```json\n{\n \"string_field\": \"Hello World\",\n \"number_field\": 42,\n \"boolean_field\": true,\n \"null_field\": null,\n \"array_field\": [1, 2, 3, 4, 5],\n \"object_field\": {\n \"nested_string\": \"Nested Value\",\n \"nested_number\": 123.45\n },\n \"extra_field\": \"unexpected field\"\n}\n```"}], "ground_truth": null, "evaluation_result": {"score": 0.75, "reason": "Mixed data types with some schema mismatches", "metrics": {"schema_similarity": {"score": 0.75, "reason": "Schema similarity: 0.75", "is_score_valid": false}}}, "input_metadata": {"dataset_info": {"test_case": "mixed_data_types", "description": "JSON object with various data types including strings, numbers, booleans, null, arrays, and objects", "expected_schema": {"type": "object", "properties": {"string_field": {"type": "string"}, "number_field": {"type": "number"}, "boolean_field": {"type": "boolean"}, "null_field": {"type": "null"}, "array_field": {"type": "array", "items": {"type": "number"}}, "object_field": {"type": "object", "properties": {"nested_string": {"type": "string"}, "nested_number": {"type": "number"}}}, "date_field": {"type": "string"}, "enum_field": {"type": "string"}}}}}}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import json
2+
from typing import Any, Dict, List
3+
from eval_protocol.models import EvaluationRow
4+
from eval_protocol.pytest import default_single_turn_rollout_processor, evaluation_test
5+
from eval_protocol.rewards.json_schema import json_schema_reward
6+
7+
8+
def json_schema_to_evaluation_row(rows: List[Dict[str, Any]]) -> List[EvaluationRow]:
9+
"""
10+
Convert a json schema row to an evaluation row.
11+
"""
12+
dataset: List[EvaluationRow] = []
13+
for row in rows:
14+
dataset.append(
15+
EvaluationRow(
16+
messages=row["messages"][:1],
17+
ground_truth=row["ground_truth"],
18+
input_metadata=row["input_metadata"],
19+
)
20+
)
21+
return dataset
22+
23+
24+
@evaluation_test(
25+
input_dataset=["tests/pytest/data/json_schema.jsonl"],
26+
model=["accounts/fireworks/models/kimi-k2-instruct"],
27+
mode="pointwise",
28+
rollout_processor=default_single_turn_rollout_processor,
29+
dataset_adapter=json_schema_to_evaluation_row,
30+
)
31+
async def test_pytest_function_calling(row: EvaluationRow) -> EvaluationRow:
32+
"""Run pointwise evaluation on sample dataset using pytest interface."""
33+
expected_schema = row.input_metadata.dataset_info["expected_schema"]
34+
result = json_schema_reward(row.messages, expected_schema=expected_schema)
35+
row.evaluation_result = result
36+
print(row.evaluation_result)
37+
return row

0 commit comments

Comments
 (0)