Skip to content

Commit bd57f76

Browse files
google-genai-botcopybara-github
authored andcommitted
feat: Implement GCS eval set results manager to store eval history in GCS
Eval results will be stored as json files under `gs://{bucket_name}/{app_name}/evals/eval_history/` PiperOrigin-RevId: 766808381
1 parent 54367dc commit bd57f76

11 files changed

+972
-1
lines changed

src/google/adk/evaluation/eval_result.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ class EvalSetResult(BaseModel):
8080
populate_by_name=True,
8181
)
8282
eval_set_result_id: str
83-
eval_set_result_name: str
83+
eval_set_result_name: Optional[str] = None
8484
eval_set_id: str
8585
eval_case_results: list[EvalCaseResult] = Field(default_factory=list)
8686
creation_timestamp: float = 0.0

src/google/adk/evaluation/eval_set_results_manager.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,34 @@
1616

1717
from abc import ABC
1818
from abc import abstractmethod
19+
from typing import Optional
1920

2021
from .eval_result import EvalCaseResult
2122
from .eval_result import EvalSetResult
2223

2324

25+
class EvalSetResultsStorageManager(ABC):
26+
"""An interface to manage storage of eval set results."""
27+
28+
@abstractmethod
29+
def get_eval_set_result_path(
30+
self, app_name: str, eval_set_result_id: str
31+
) -> str:
32+
"""Gets the path to the EvalSetResult identified by app_name and eval_set_result_id."""
33+
34+
@abstractmethod
35+
def list_eval_set_results(self, app_name: str) -> list[str]:
36+
"""Gets the EvalSetResult id from the given path."""
37+
38+
@abstractmethod
39+
def save_eval_set_result(self, path: str, eval_set_result: EvalSetResult):
40+
"""Writes the EvalSetResult to the given path."""
41+
42+
@abstractmethod
43+
def load_eval_set_result(self, path: str) -> Optional[EvalSetResult]:
44+
"""Loads the EvalSetResult from the given path."""
45+
46+
2447
class EvalSetResultsManager(ABC):
2548
"""An interface to manage Eval Set Results."""
2649

src/google/adk/evaluation/eval_sets_manager.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,26 @@
2323
from .eval_set import EvalSet
2424

2525

26+
class EvalSetStorageManager(ABC):
27+
"""An interface to manage storage of eval sets."""
28+
29+
@abstractmethod
30+
def get_eval_set_path(self, app_name: str, eval_set_id: str) -> str:
31+
"""Gets the path to the EvalSet identified by app_name and eval_set_id."""
32+
33+
@abstractmethod
34+
def list_eval_sets(self, app_name: str) -> list[str]:
35+
"""Lists all eval sets for the given app_name."""
36+
37+
@abstractmethod
38+
def save_eval_set(self, path: str, eval_set: EvalSet):
39+
"""Writes the EvalSet to the given path."""
40+
41+
@abstractmethod
42+
def load_eval_set(self, path: str) -> Optional[EvalSet]:
43+
"""Loads the EvalSet from the given path."""
44+
45+
2646
class EvalSetsManager(ABC):
2747
"""An interface to manage an Eval Sets."""
2848

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import logging
18+
import os
19+
from typing import Optional
20+
21+
from google.cloud import exceptions as cloud_exceptions
22+
from google.cloud import storage
23+
from typing_extensions import override
24+
25+
from .eval_result import EvalSetResult
26+
from .eval_set_results_manager import EvalSetResultsStorageManager
27+
28+
logger = logging.getLogger("google_adk." + __name__)
29+
30+
_EVAL_HISTORY_DIR = "evals/eval_history"
31+
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
32+
33+
34+
class GcsEvalSetResultsStorageManager(EvalSetResultsStorageManager):
35+
"""An EvalSetResultsStorageManager that stores eval set results in a GCS bucket."""
36+
37+
def __init__(self, bucket_name: str, **kwargs):
38+
"""Initializes the GcsEvalSetResultsStorageManager.
39+
40+
Args:
41+
bucket_name: The name of the bucket to use.
42+
**kwargs: Keyword arguments to pass to the Google Cloud Storage client.
43+
"""
44+
self.bucket_name = bucket_name
45+
self.storage_client = storage.Client(**kwargs)
46+
self.bucket = self.storage_client.bucket(self.bucket_name)
47+
# Check if the bucket exists.
48+
if not self.bucket.exists():
49+
raise ValueError(
50+
f"Bucket `{self.bucket_name}` does not exist. Please create it before"
51+
" using the GcsEvalSetResultsStorageManager."
52+
)
53+
54+
def _get_eval_history_dir(self, app_name: str) -> str:
55+
return f"{app_name}/{_EVAL_HISTORY_DIR}"
56+
57+
@override
58+
def get_eval_set_result_path(
59+
self, app_name: str, eval_set_result_id: str
60+
) -> str:
61+
"""Gets the path to the EvalSetResult identified by app_name and eval_set_result_id."""
62+
eval_history_dir = self._get_eval_history_dir(app_name)
63+
return f"{eval_history_dir}/{eval_set_result_id}{_EVAL_SET_RESULT_FILE_EXTENSION}"
64+
65+
@override
66+
def list_eval_set_results(self, app_name: str) -> list[str]:
67+
"""Gets the EvalSetResult id from the given path."""
68+
eval_history_dir = self._get_eval_history_dir(app_name)
69+
eval_set_results = []
70+
try:
71+
for blob in self.bucket.list_blobs(prefix=eval_history_dir):
72+
eval_set_result_id = blob.name.split("/")[-1].removesuffix(
73+
_EVAL_SET_RESULT_FILE_EXTENSION
74+
)
75+
eval_set_results.append(eval_set_result_id)
76+
return sorted(eval_set_results)
77+
except cloud_exceptions.NotFound as e:
78+
raise ValueError(
79+
f"App `{app_name}` not found in GCS bucket `{self.bucket_name}`."
80+
) from e
81+
82+
@override
83+
def save_eval_set_result(self, path: str, eval_set_result: EvalSetResult):
84+
"""Writes the EvalSetResult to the given path."""
85+
logger.info("Saving EvalSetResult to gs://%s/%s", self.bucket_name, path)
86+
blob = self.bucket.blob(path)
87+
blob.upload_from_string(
88+
eval_set_result.model_dump_json(indent=2),
89+
content_type="application/json",
90+
)
91+
92+
@override
93+
def load_eval_set_result(self, path: str) -> Optional[EvalSetResult]:
94+
"""Loads the EvalSetResult from the given path."""
95+
try:
96+
blob = self.bucket.blob(path)
97+
eval_set_result_data = blob.download_as_text()
98+
return EvalSetResult.model_validate_json(eval_set_result_data)
99+
except cloud_exceptions.NotFound:
100+
return None
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import logging
18+
import os
19+
import re
20+
21+
from google.cloud import exceptions as cloud_exceptions
22+
from google.cloud import storage
23+
24+
from .eval_set import EvalSet
25+
from .eval_sets_manager import EvalSetStorageManager
26+
27+
logger = logging.getLogger("google_adk." + __name__)
28+
29+
_EVAL_SETS_DIR = "evals/eval_sets"
30+
_EVAL_SET_FILE_EXTENSION = ".evalset.json"
31+
32+
33+
class GcsEvalSetStorageManager(EvalSetStorageManager):
34+
"""An EvalSetStorageManager that stores eval sets in a GCS bucket."""
35+
36+
def __init__(self, bucket_name: str, **kwargs):
37+
"""Initializes the GcsEvalSetStorageManager.
38+
39+
Args:
40+
bucket_name: The name of the bucket to use.
41+
**kwargs: Keyword arguments to pass to the Google Cloud Storage client.
42+
"""
43+
self.bucket_name = bucket_name
44+
self.storage_client = storage.Client(**kwargs)
45+
self.bucket = self.storage_client.bucket(self.bucket_name)
46+
# Check if the bucket exists.
47+
if not self.bucket.exists():
48+
raise ValueError(
49+
f"Bucket `{self.bucket_name}` does not exist. Please create it before"
50+
" using the GcsEvalSetStorageManager."
51+
)
52+
53+
def _get_eval_sets_dir(self, app_name: str) -> str:
54+
return f"{app_name}/{_EVAL_SETS_DIR}"
55+
56+
def _validate_id(self, id_name: str, id_value: str):
57+
pattern = r"^[a-zA-Z0-9_]+$"
58+
if not bool(re.fullmatch(pattern, id_value)):
59+
raise ValueError(
60+
f"Invalid {id_name}. {id_name} should have the `{pattern}` format",
61+
)
62+
63+
def get_eval_set_path(self, app_name: str, eval_set_id: str) -> str:
64+
"""Gets the path to the EvalSet identified by app_name and eval_set_id."""
65+
eval_sets_dir = self._get_eval_sets_dir(app_name)
66+
return f"{eval_sets_dir}/{eval_set_id}{_EVAL_SET_FILE_EXTENSION}"
67+
68+
def list_eval_sets(self, app_name: str) -> list[str]:
69+
"""Gets the EvalSet id from the given path."""
70+
eval_sets_dir = self._get_eval_sets_dir(app_name)
71+
eval_sets = []
72+
try:
73+
for blob in self.bucket.list_blobs(prefix=eval_sets_dir):
74+
eval_set_id = blob.name.split("/")[-1].removesuffix(
75+
_EVAL_SET_FILE_EXTENSION
76+
)
77+
eval_sets.append(eval_set_id)
78+
return sorted(eval_sets)
79+
80+
except cloud_exceptions.NotFound as e:
81+
raise ValueError(
82+
f"App `{app_name}` not found in GCS bucket `{self.bucket_name}`."
83+
) from e
84+
85+
def save_eval_set(self, path: str, eval_set: EvalSet):
86+
"""Writes the EvalSet to the given path."""
87+
blob = self.bucket.blob(path)
88+
blob.upload_from_string(
89+
eval_set.model_dump_json(indent=2),
90+
content_type="application/json",
91+
)
92+
93+
def load_eval_set(self, path: str) -> EvalSet | None:
94+
"""Loads the EvalSet from the given path."""
95+
try:
96+
blob = self.bucket.blob(path)
97+
eval_set_data = blob.download_as_text()
98+
return EvalSet.model_validate_json(eval_set_data)
99+
except cloud_exceptions.NotFound:
100+
return None
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import logging
18+
import os
19+
from typing import Optional
20+
21+
from typing_extensions import override
22+
23+
from .eval_result import EvalSetResult
24+
from .eval_set_results_manager import EvalSetResultsStorageManager
25+
26+
logger = logging.getLogger("google_adk." + __name__)
27+
28+
_ADK_EVAL_HISTORY_DIR = ".adk/eval_history"
29+
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
30+
31+
32+
class LocalEvalSetResultsStorageManager(EvalSetResultsStorageManager):
33+
"""An EvalSetResultsStorageManager that stores eval set results locally on disk."""
34+
35+
def __init__(self, agents_dir: str):
36+
self._agents_dir = agents_dir
37+
38+
def _get_eval_history_dir(self, app_name: str) -> str:
39+
return os.path.join(self._agents_dir, app_name, _ADK_EVAL_HISTORY_DIR)
40+
41+
@override
42+
def get_eval_set_result_path(
43+
self, app_name: str, eval_set_result_id: str
44+
) -> str:
45+
"""Gets the path to the EvalSetResult identified by app_name and eval_set_result_id."""
46+
return os.path.join(
47+
self._agents_dir,
48+
app_name,
49+
_ADK_EVAL_HISTORY_DIR,
50+
eval_set_result_id + _EVAL_SET_RESULT_FILE_EXTENSION,
51+
)
52+
53+
@override
54+
def list_eval_set_results(self, app_name: str) -> list[str]:
55+
"""Gets the EvalSetResult id from the given path."""
56+
app_eval_history_directory = self._get_eval_history_dir(app_name)
57+
58+
if not os.path.exists(app_eval_history_directory):
59+
return []
60+
61+
eval_result_files = [
62+
file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
63+
for file in os.listdir(app_eval_history_directory)
64+
if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
65+
]
66+
return sorted(eval_result_files)
67+
68+
@override
69+
def save_eval_set_result(self, path: str, eval_set_result: EvalSetResult):
70+
"""Writes the EvalSetResult to the given path."""
71+
if not os.path.exists(os.path.dirname(path)):
72+
os.makedirs(os.path.dirname(path), exist_ok=True)
73+
logger.info("Saving EvalSetResult to %s", path)
74+
with open(path, "w") as f:
75+
f.write(eval_set_result.model_dump_json(indent=2))
76+
77+
@override
78+
def load_eval_set_result(self, path: str) -> Optional[EvalSetResult]:
79+
"""Loads the EvalSetResult from the given path."""
80+
try:
81+
with open(path, "r", encoding="utf-8") as f:
82+
content = f.read()
83+
return EvalSetResult.model_validate_json(content)
84+
except FileNotFoundError:
85+
return None

0 commit comments

Comments
 (0)