Skip to content
58 changes: 58 additions & 0 deletions alembic/versions/2026_05_06_update_evaluations_hydrated_view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Update evaluations hydrated view

Revision ID: 234ed0606f2a
Revises: 34e1f2c7a9bd
Create Date: 2026-05-06 11:03:17.836098

"""

from typing import Sequence, Union

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "234ed0606f2a"
down_revision: Union[str, Sequence[str], None] = "34e1f2c7a9bd"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.execute(
"""
CREATE OR REPLACE VIEW evaluations_hydrated AS
SELECT
evaluations.*,
(CASE
WHEN EVERY(erh.status = 'finished' OR (erh.status = 'error' AND erh.error_code BETWEEN 1000 AND 1999)) THEN 'success'
WHEN EVERY(erh.status IN ('finished', 'error')) THEN 'failure'
ELSE 'running'
END)::evaluationstatus AS status,
COUNT(*) FILTER (WHERE erh.solved)::float / COUNT(*) AS score,
AVG(
EXTRACT(EPOCH FROM (erh.finished_or_errored_at - erh.started_running_agent_at))
) FILTER (WHERE erh.solved) AS avg_running_secs
FROM evaluations
INNER JOIN evaluation_runs_hydrated erh USING (evaluation_id)
GROUP BY evaluations.evaluation_id;
"""
)


def downgrade() -> None:
op.execute(
"""
CREATE OR REPLACE VIEW evaluations_hydrated AS
SELECT
evaluations.*,
(CASE
WHEN EVERY(erh.status = 'finished' OR (erh.status = 'error' AND erh.error_code BETWEEN 1000 AND 1999)) THEN 'success'
WHEN EVERY(erh.status IN ('finished', 'error')) THEN 'failure'
ELSE 'running'
END)::evaluationstatus AS status,
COUNT(*) FILTER (WHERE erh.solved)::float / COUNT(*) AS score
FROM evaluations
INNER JOIN evaluation_runs_hydrated erh USING (evaluation_id)
GROUP BY evaluations.evaluation_id;
"""
)
37 changes: 33 additions & 4 deletions queries/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,16 +289,45 @@ async def record_upload_attempt(conn: DatabaseConnection, upload_type: str, succ

@db_operation
async def get_top_agents(conn: DatabaseConnection, number_of_agents: int = 10, page: int = 1) -> list[AgentScored]:
"""Retrieve the top agents.

Agents are ordered by the score they got on "Validator" runs, then by their average running time on "Validator" runs, then by their creation time.

You can specify the number of results to return and the page number (for pagination).

Parameters
----------
conn : DatabaseConnection
Database connection to use for the query
number_of_agents : int, optional
Number of agents to return, by default 10
page : int, optional
Page number for pagination, by default 1

Returns
-------
list[AgentScored]
List of top agents with their scores.
"""
# TODO ADAM: this query was supposed to be fixed to remove the pagination concept
# TODO ADAM: maybe edge case bugs here if pagenum is 0,negative,or too high etc
offset = (page - 1) * number_of_agents

results = await conn.fetch(
"""
select * from agent_scores
where set_id = (select max(set_id) from evaluation_sets)
and agent_id not in (select agent_id from benchmark_agent_ids)
order by round(final_score::numeric, 6) desc, created_at asc
select ass.*
from agent_scores ass
left join lateral (
select avg(eh.avg_running_secs) as avg_running_secs
from evaluations_hydrated eh
where eh.agent_id = ass.agent_id
and eh.set_id = ass.set_id
and eh.evaluation_set_group = 'validator'::EvaluationSetGroup
and eh.status = 'success'::EvaluationStatus
) rt on true
where ass.set_id = (select max(set_id) from evaluation_sets)
and ass.agent_id not in (select agent_id from benchmark_agent_ids)
order by round(ass.final_score::numeric, 6) desc, rt.avg_running_secs asc nulls last, ass.created_at asc
limit $1 offset $2
""",
number_of_agents,
Expand Down
20 changes: 18 additions & 2 deletions queries/scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,21 @@ async def get_weight_receiving_agent_hotkey(conn: DatabaseConnection) -> Optiona
SELECT
ass.miner_hotkey AS miner_hotkey
FROM agent_scores ass
LEFT JOIN LATERAL (
SELECT AVG(eh.avg_running_secs) AS avg_running_secs
FROM evaluations_hydrated eh
WHERE eh.agent_id = ass.agent_id
AND eh.set_id = ass.set_id
AND eh.evaluation_set_group = 'validator'::EvaluationSetGroup
AND eh.status = 'success'::EvaluationStatus
) rt ON true
WHERE
ass.approved
AND ass.approved_at <= NOW()
AND ass.approved_at >= NOW() - INTERVAL '12 hours'
AND ass.set_id = (SELECT MAX(set_id) FROM evaluation_sets)
AND ass.agent_id NOT IN (SELECT agent_id FROM benchmark_agent_ids)
ORDER BY ass.final_score DESC, ass.created_at ASC
ORDER BY ass.final_score DESC, rt.avg_running_secs ASC NULLS LAST, ass.created_at ASC
LIMIT 1
"""
)
Expand All @@ -34,13 +42,21 @@ async def get_weight_receiving_agent_info(conn: DatabaseConnection) -> Optional[
ass.miner_hotkey AS miner_hotkey,
ass.agent_id AS agent_id
FROM agent_scores ass
LEFT JOIN LATERAL (
SELECT AVG(eh.avg_running_secs) AS avg_running_secs
FROM evaluations_hydrated eh
WHERE eh.agent_id = ass.agent_id
AND eh.set_id = ass.set_id
AND eh.evaluation_set_group = 'validator'::EvaluationSetGroup
AND eh.status = 'success'::EvaluationStatus
) rt ON true
WHERE
ass.approved
AND ass.approved_at <= NOW()
AND ass.approved_at >= NOW() - INTERVAL '12 hours'
AND ass.set_id = (SELECT MAX(set_id) FROM evaluation_sets)
AND ass.agent_id NOT IN (SELECT agent_id FROM benchmark_agent_ids)
ORDER BY ass.final_score DESC, ass.created_at ASC
ORDER BY ass.final_score DESC, rt.avg_running_secs ASC NULLS LAST, ass.created_at ASC
LIMIT 1
"""
)
Expand Down
Loading