Skip to content

feat: Implement GCS eval set results manager to store eval history in GCS #1200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/google/adk/evaluation/eval_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class EvalSetResult(BaseModel):
populate_by_name=True,
)
eval_set_result_id: str
eval_set_result_name: str
eval_set_result_name: Optional[str] = None
eval_set_id: str
eval_case_results: list[EvalCaseResult] = Field(default_factory=list)
creation_timestamp: float = 0.0
23 changes: 23 additions & 0 deletions src/google/adk/evaluation/eval_set_results_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,34 @@

from abc import ABC
from abc import abstractmethod
from typing import Optional

from .eval_result import EvalCaseResult
from .eval_result import EvalSetResult


class EvalSetResultsStorageManager(ABC):
"""An interface to manage storage of eval set results."""

@abstractmethod
def get_eval_set_result_path(
self, app_name: str, eval_set_result_id: str
) -> str:
"""Gets the path to the EvalSetResult identified by app_name and eval_set_result_id."""

@abstractmethod
def list_eval_set_results(self, app_name: str) -> list[str]:
"""Gets the EvalSetResult id from the given path."""

@abstractmethod
def save_eval_set_result(self, path: str, eval_set_result: EvalSetResult):
"""Writes the EvalSetResult to the given path."""

@abstractmethod
def load_eval_set_result(self, path: str) -> Optional[EvalSetResult]:
"""Loads the EvalSetResult from the given path."""


class EvalSetResultsManager(ABC):
"""An interface to manage Eval Set Results."""

Expand Down
20 changes: 20 additions & 0 deletions src/google/adk/evaluation/eval_sets_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,26 @@
from .eval_set import EvalSet


class EvalSetStorageManager(ABC):
"""An interface to manage storage of eval sets."""

@abstractmethod
def get_eval_set_path(self, app_name: str, eval_set_id: str) -> str:
"""Gets the path to the EvalSet identified by app_name and eval_set_id."""

@abstractmethod
def list_eval_sets(self, app_name: str) -> list[str]:
"""Lists all eval sets for the given app_name."""

@abstractmethod
def save_eval_set(self, path: str, eval_set: EvalSet):
"""Writes the EvalSet to the given path."""

@abstractmethod
def load_eval_set(self, path: str) -> Optional[EvalSet]:
"""Loads the EvalSet from the given path."""


class EvalSetsManager(ABC):
"""An interface to manage an Eval Sets."""

Expand Down
100 changes: 100 additions & 0 deletions src/google/adk/evaluation/gcs_eval_set_results_storage_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import logging
import os
from typing import Optional

from google.cloud import exceptions as cloud_exceptions
from google.cloud import storage
from typing_extensions import override

from .eval_result import EvalSetResult
from .eval_set_results_manager import EvalSetResultsStorageManager

logger = logging.getLogger("google_adk." + __name__)

_EVAL_HISTORY_DIR = "evals/eval_history"
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"


class GcsEvalSetResultsStorageManager(EvalSetResultsStorageManager):
"""An EvalSetResultsStorageManager that stores eval set results in a GCS bucket."""

def __init__(self, bucket_name: str, **kwargs):
"""Initializes the GcsEvalSetResultsStorageManager.

Args:
bucket_name: The name of the bucket to use.
**kwargs: Keyword arguments to pass to the Google Cloud Storage client.
"""
self.bucket_name = bucket_name
self.storage_client = storage.Client(**kwargs)
self.bucket = self.storage_client.bucket(self.bucket_name)
# Check if the bucket exists.
if not self.bucket.exists():
raise ValueError(
f"Bucket `{self.bucket_name}` does not exist. Please create it before"
" using the GcsEvalSetResultsStorageManager."
)

def _get_eval_history_dir(self, app_name: str) -> str:
return f"{app_name}/{_EVAL_HISTORY_DIR}"

@override
def get_eval_set_result_path(
self, app_name: str, eval_set_result_id: str
) -> str:
"""Gets the path to the EvalSetResult identified by app_name and eval_set_result_id."""
eval_history_dir = self._get_eval_history_dir(app_name)
return f"{eval_history_dir}/{eval_set_result_id}{_EVAL_SET_RESULT_FILE_EXTENSION}"

@override
def list_eval_set_results(self, app_name: str) -> list[str]:
"""Gets the EvalSetResult id from the given path."""
eval_history_dir = self._get_eval_history_dir(app_name)
eval_set_results = []
try:
for blob in self.bucket.list_blobs(prefix=eval_history_dir):
eval_set_result_id = blob.name.split("/")[-1].removesuffix(
_EVAL_SET_RESULT_FILE_EXTENSION
)
eval_set_results.append(eval_set_result_id)
return sorted(eval_set_results)
except cloud_exceptions.NotFound as e:
raise ValueError(
f"App `{app_name}` not found in GCS bucket `{self.bucket_name}`."
) from e

@override
def save_eval_set_result(self, path: str, eval_set_result: EvalSetResult):
"""Writes the EvalSetResult to the given path."""
logger.info("Saving EvalSetResult to gs://%s/%s", self.bucket_name, path)
blob = self.bucket.blob(path)
blob.upload_from_string(
eval_set_result.model_dump_json(indent=2),
content_type="application/json",
)

@override
def load_eval_set_result(self, path: str) -> Optional[EvalSetResult]:
"""Loads the EvalSetResult from the given path."""
try:
blob = self.bucket.blob(path)
eval_set_result_data = blob.download_as_text()
return EvalSetResult.model_validate_json(eval_set_result_data)
except cloud_exceptions.NotFound:
return None
100 changes: 100 additions & 0 deletions src/google/adk/evaluation/gcs_eval_set_storage_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import logging
import os
import re

from google.cloud import exceptions as cloud_exceptions
from google.cloud import storage

from .eval_set import EvalSet
from .eval_sets_manager import EvalSetStorageManager

logger = logging.getLogger("google_adk." + __name__)

_EVAL_SETS_DIR = "evals/eval_sets"
_EVAL_SET_FILE_EXTENSION = ".evalset.json"


class GcsEvalSetStorageManager(EvalSetStorageManager):
"""An EvalSetStorageManager that stores eval sets in a GCS bucket."""

def __init__(self, bucket_name: str, **kwargs):
"""Initializes the GcsEvalSetStorageManager.

Args:
bucket_name: The name of the bucket to use.
**kwargs: Keyword arguments to pass to the Google Cloud Storage client.
"""
self.bucket_name = bucket_name
self.storage_client = storage.Client(**kwargs)
self.bucket = self.storage_client.bucket(self.bucket_name)
# Check if the bucket exists.
if not self.bucket.exists():
raise ValueError(
f"Bucket `{self.bucket_name}` does not exist. Please create it before"
" using the GcsEvalSetStorageManager."
)

def _get_eval_sets_dir(self, app_name: str) -> str:
return f"{app_name}/{_EVAL_SETS_DIR}"

def _validate_id(self, id_name: str, id_value: str):
pattern = r"^[a-zA-Z0-9_]+$"
if not bool(re.fullmatch(pattern, id_value)):
raise ValueError(
f"Invalid {id_name}. {id_name} should have the `{pattern}` format",
)

def get_eval_set_path(self, app_name: str, eval_set_id: str) -> str:
"""Gets the path to the EvalSet identified by app_name and eval_set_id."""
eval_sets_dir = self._get_eval_sets_dir(app_name)
return f"{eval_sets_dir}/{eval_set_id}{_EVAL_SET_FILE_EXTENSION}"

def list_eval_sets(self, app_name: str) -> list[str]:
"""Gets the EvalSet id from the given path."""
eval_sets_dir = self._get_eval_sets_dir(app_name)
eval_sets = []
try:
for blob in self.bucket.list_blobs(prefix=eval_sets_dir):
eval_set_id = blob.name.split("/")[-1].removesuffix(
_EVAL_SET_FILE_EXTENSION
)
eval_sets.append(eval_set_id)
return sorted(eval_sets)

except cloud_exceptions.NotFound as e:
raise ValueError(
f"App `{app_name}` not found in GCS bucket `{self.bucket_name}`."
) from e

def save_eval_set(self, path: str, eval_set: EvalSet):
"""Writes the EvalSet to the given path."""
blob = self.bucket.blob(path)
blob.upload_from_string(
eval_set.model_dump_json(indent=2),
content_type="application/json",
)

def load_eval_set(self, path: str) -> EvalSet | None:
"""Loads the EvalSet from the given path."""
try:
blob = self.bucket.blob(path)
eval_set_data = blob.download_as_text()
return EvalSet.model_validate_json(eval_set_data)
except cloud_exceptions.NotFound:
return None
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import logging
import os
from typing import Optional

from typing_extensions import override

from .eval_result import EvalSetResult
from .eval_set_results_manager import EvalSetResultsStorageManager

logger = logging.getLogger("google_adk." + __name__)

_ADK_EVAL_HISTORY_DIR = ".adk/eval_history"
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"


class LocalEvalSetResultsStorageManager(EvalSetResultsStorageManager):
"""An EvalSetResultsStorageManager that stores eval set results locally on disk."""

def __init__(self, agents_dir: str):
self._agents_dir = agents_dir

def _get_eval_history_dir(self, app_name: str) -> str:
return os.path.join(self._agents_dir, app_name, _ADK_EVAL_HISTORY_DIR)

@override
def get_eval_set_result_path(
self, app_name: str, eval_set_result_id: str
) -> str:
"""Gets the path to the EvalSetResult identified by app_name and eval_set_result_id."""
return os.path.join(
self._agents_dir,
app_name,
_ADK_EVAL_HISTORY_DIR,
eval_set_result_id + _EVAL_SET_RESULT_FILE_EXTENSION,
)

@override
def list_eval_set_results(self, app_name: str) -> list[str]:
"""Gets the EvalSetResult id from the given path."""
app_eval_history_directory = self._get_eval_history_dir(app_name)

if not os.path.exists(app_eval_history_directory):
return []

eval_result_files = [
file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
for file in os.listdir(app_eval_history_directory)
if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
]
return sorted(eval_result_files)

@override
def save_eval_set_result(self, path: str, eval_set_result: EvalSetResult):
"""Writes the EvalSetResult to the given path."""
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path), exist_ok=True)
logger.info("Saving EvalSetResult to %s", path)
with open(path, "w") as f:
f.write(eval_set_result.model_dump_json(indent=2))

@override
def load_eval_set_result(self, path: str) -> Optional[EvalSetResult]:
"""Loads the EvalSetResult from the given path."""
try:
with open(path, "r", encoding="utf-8") as f:
content = f.read()
return EvalSetResult.model_validate_json(content)
except FileNotFoundError:
return None
Loading