Open
Description
Describe the bug
The evaluations withpytest
seem to be broken and throw a TypeError: bad argument type for built-in operation
To Reproduce
Steps to reproduce the behavior:
(Important: My root_agent
is a coroutine as it uses MCP servers)
My example.test.json
:
[
{
"query": "hi",
"expected_tool_use": [],
"expected_intermediate_agent_responses": [],
"reference": "Hello! What can I do for you?\n"
},
{
"query": "Search google for the top 10 news articles about AI",
"expected_tool_use": [],
"expected_intermediate_agent_responses": []
}
]
I have the following in test_subagent.py
def test_with_single_test_file():
"""Test the agent's basic ability via a session file."""
AgentEvaluator.evaluate(
agent_module="my_module",
eval_dataset_file_path_or_dir="tests/example.test.json",
)
When running pytest I get the following error:
../.venv/lib/python3.12/site-packages/google/adk/evaluation/agent_evaluator.py:132: in evaluate
AgentEvaluator._evaluate_response_scores(
../.venv/lib/python3.12/site-packages/google/adk/evaluation/agent_evaluator.py:287: in _evaluate_response_scores
metrics = ResponseEvaluator.evaluate(
../.venv/lib/python3.12/site-packages/google/adk/evaluation/response_evaluator.py:110: in evaluate
eval_result = ResponseEvaluator._perform_eval(
../.venv/lib/python3.12/site-packages/google/adk/evaluation/response_evaluator.py:142: in _perform_eval
return eval_task.evaluate()
../.venv/lib/python3.12/site-packages/vertexai/preview/evaluation/eval_task.py:514: in evaluate
eval_result = _evaluation.evaluate(
../.venv/lib/python3.12/site-packages/vertexai/preview/evaluation/_evaluation.py:1174: in evaluate
evaluation_result = _compute_metrics(evaluation_run_config)
../.venv/lib/python3.12/site-packages/vertexai/preview/evaluation/_evaluation.py:902: in _compute_metrics
request=_instance_evaluation.build_request(
../.venv/lib/python3.12/site-packages/vertexai/preview/evaluation/metrics/_instance_evaluation.py:316: in build_request
gapic_eval_service_types.RougeInstance(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <[AttributeError('Unknown field for RougeInstance: _pb') raised in repr()] RougeInstance object at 0xffff6dd121b0>, mapping = {'prediction': None, 'reference': nan}
ignore_unknown_fields = False, kwargs = {'prediction': None, 'reference': nan}, params = {'reference': nan}, marshal = <proto.marshal.marshal.Marshal object at 0xffffb63afa70>
key = 'reference', value = nan, pb_type = <ProtoType.STRING: 9>, pb_value = nan
def __init__(
self,
mapping=None,
*,
ignore_unknown_fields=False,
**kwargs,
):
# We accept several things for `mapping`:
# * An instance of this class.
# * An instance of the underlying protobuf descriptor class.
# * A dict
# * Nothing (keyword arguments only).
if mapping is None:
if not kwargs:
# Special fast path for empty construction.
super().__setattr__("_pb", self._meta.pb())
return
mapping = kwargs
elif isinstance(mapping, self._meta.pb):
# Make a copy of the mapping.
# This is a constructor for a new object, so users will assume
# that it will not have side effects on the arguments being
# passed in.
#
# The `wrap` method on the metaclass is the public API for taking
# ownership of the passed in protobuf object.
mapping = copy.deepcopy(mapping)
if kwargs:
mapping.MergeFrom(self._meta.pb(**kwargs))
super().__setattr__("_pb", mapping)
return
elif isinstance(mapping, type(self)):
# Just use the above logic on mapping's underlying pb.
self.__init__(mapping=mapping._pb, **kwargs)
return
elif isinstance(mapping, collections.abc.Mapping):
# Can't have side effects on mapping.
mapping = copy.copy(mapping)
# kwargs entries take priority for duplicate keys.
mapping.update(kwargs)
else:
# Sanity check: Did we get something not a map? Error if so.
raise TypeError(
"Invalid constructor input for %s: %r"
% (
self.__class__.__name__,
mapping,
)
)
params = {}
# Update the mapping to address any values that need to be
# coerced.
marshal = self._meta.marshal
for key, value in mapping.items():
(key, pb_type) = self._get_pb_type_from_key(key)
if pb_type is None:
if ignore_unknown_fields:
continue
raise ValueError(
"Unknown field for {}: {}".format(self.__class__.__name__, key)
)
pb_value = marshal.to_proto(pb_type, value)
if pb_value is not None:
params[key] = pb_value
# Create the internal protocol buffer.
> super().__setattr__("_pb", self._meta.pb(**params))
E TypeError: bad argument type for built-in operation
../.venv/lib/python3.12/site-packages/proto/message.py:734: TypeError
Expected behavior
I should be able to run Evals with pytest as described by the docs.
Desktop (please complete the following information):
- OS: Linux
- Python version(python -V): python3-3.12
- ADK version(pip show google-adk): 0.4.0