From 893696fb6afbaab86b7ef4e56487ef35310f9c62 Mon Sep 17 00:00:00 2001 From: Andrew Lai Date: Wed, 22 Apr 2026 10:13:17 -0700 Subject: [PATCH 1/6] Add RO-Crate entity filters for targeted lookups --- api/openapi.codegen.yaml | 15 + api/openapi.yaml | 15 + .../src/api/apis/api_RoCrateEntitiesApi.jl | 14 +- .../julia_client/docs/RoCrateEntitiesApi.md | 7 +- .../api/ro_crate_entities_api.py | 35 +- reviews/ro-crate-filter-staged-review.md | 112 +++++++ src/client/apis/ro_crate_api.rs | 90 ++++++ src/client/apis/ro_crate_entities_api.rs | 10 + .../commands/pagination/ro_crate_entities.rs | 2 + src/client/commands/workflows.rs | 2 + src/client/ro_crate_utils.rs | 16 +- src/server/api/ro_crate.rs | 92 +++++- src/server/api_contract.rs | 2 + src/server/api_responses.rs | 2 + src/server/http_server.rs | 4 + src/server/http_server/ro_crate_transport.rs | 13 +- src/server/http_transport/response_mapping.rs | 2 +- src/server/live_router.rs | 6 + tests/test_auto_ro_crate.rs | 112 +++++-- tests/test_ro_crate.rs | 302 +++++++++++++++++- tests/test_workflow_export.rs | 14 +- ..._crate_entity_workflow_file_index.down.sql | 3 + ...ro_crate_entity_workflow_file_index.up.sql | 13 + 23 files changed, 800 insertions(+), 83 deletions(-) create mode 100644 reviews/ro-crate-filter-staged-review.md create mode 100644 torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.down.sql create mode 100644 torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.up.sql diff --git a/api/openapi.codegen.yaml b/api/openapi.codegen.yaml index ee9913966..ecb562f21 100644 --- a/api/openapi.codegen.yaml +++ b/api/openapi.codegen.yaml @@ -4815,6 +4815,21 @@ paths: - integer - 'null' format: int64 + - name: file_id + in: query + required: false + schema: + type: + - integer + - 'null' + format: int64 + - name: entity_id + in: query + required: false + schema: + type: + - string + - 'null' - name: sort_by in: query required: false diff --git a/api/openapi.yaml b/api/openapi.yaml index ee9913966..ecb562f21 100644 --- a/api/openapi.yaml +++ b/api/openapi.yaml @@ -4815,6 +4815,21 @@ paths: - integer - 'null' format: int64 + - name: file_id + in: query + required: false + schema: + type: + - integer + - 'null' + format: int64 + - name: entity_id + in: query + required: false + schema: + type: + - string + - 'null' - name: sort_by in: query required: false diff --git a/julia_client/Torc/src/api/apis/api_RoCrateEntitiesApi.jl b/julia_client/Torc/src/api/apis/api_RoCrateEntitiesApi.jl index e02167eed..da11a7645 100644 --- a/julia_client/Torc/src/api/apis/api_RoCrateEntitiesApi.jl +++ b/julia_client/Torc/src/api/apis/api_RoCrateEntitiesApi.jl @@ -138,11 +138,13 @@ const _returntypes_list_ro_crate_entities_RoCrateEntitiesApi = Dict{Regex,Type}( Regex("^" * replace("500", "x"=>".") * "\$") => ErrorResponse, ) -function _oacinternal_list_ro_crate_entities(_api::RoCrateEntitiesApi, id::Int64; offset=nothing, limit=nothing, sort_by=nothing, reverse_sort=nothing, _mediaType=nothing) +function _oacinternal_list_ro_crate_entities(_api::RoCrateEntitiesApi, id::Int64; offset=nothing, limit=nothing, file_id=nothing, entity_id=nothing, sort_by=nothing, reverse_sort=nothing, _mediaType=nothing) _ctx = OpenAPI.Clients.Ctx(_api.client, "GET", _returntypes_list_ro_crate_entities_RoCrateEntitiesApi, "/workflows/{id}/ro_crate_entities", []) OpenAPI.Clients.set_param(_ctx.path, "id", id) # type Int64 OpenAPI.Clients.set_param(_ctx.query, "offset", offset; style="form", is_explode=true) # type Int64 OpenAPI.Clients.set_param(_ctx.query, "limit", limit; style="form", is_explode=true) # type Int64 + OpenAPI.Clients.set_param(_ctx.query, "file_id", file_id; style="form", is_explode=true) # type Int64 + OpenAPI.Clients.set_param(_ctx.query, "entity_id", entity_id; style="form", is_explode=true) # type String OpenAPI.Clients.set_param(_ctx.query, "sort_by", sort_by; style="form", is_explode=true) # type String OpenAPI.Clients.set_param(_ctx.query, "reverse_sort", reverse_sort; style="form", is_explode=true) # type Bool OpenAPI.Clients.set_header_accept(_ctx, ["application/json", ]) @@ -154,18 +156,20 @@ end - id::Int64 (required) - offset::Int64 - limit::Int64 +- file_id::Int64 +- entity_id::String - sort_by::String - reverse_sort::Bool Return: ListRoCrateEntitiesResponse, OpenAPI.Clients.ApiResponse """ -function list_ro_crate_entities(_api::RoCrateEntitiesApi, id::Int64; offset=nothing, limit=nothing, sort_by=nothing, reverse_sort=nothing, _mediaType=nothing) - _ctx = _oacinternal_list_ro_crate_entities(_api, id; offset=offset, limit=limit, sort_by=sort_by, reverse_sort=reverse_sort, _mediaType=_mediaType) +function list_ro_crate_entities(_api::RoCrateEntitiesApi, id::Int64; offset=nothing, limit=nothing, file_id=nothing, entity_id=nothing, sort_by=nothing, reverse_sort=nothing, _mediaType=nothing) + _ctx = _oacinternal_list_ro_crate_entities(_api, id; offset=offset, limit=limit, file_id=file_id, entity_id=entity_id, sort_by=sort_by, reverse_sort=reverse_sort, _mediaType=_mediaType) return OpenAPI.Clients.exec(_ctx) end -function list_ro_crate_entities(_api::RoCrateEntitiesApi, response_stream::Channel, id::Int64; offset=nothing, limit=nothing, sort_by=nothing, reverse_sort=nothing, _mediaType=nothing) - _ctx = _oacinternal_list_ro_crate_entities(_api, id; offset=offset, limit=limit, sort_by=sort_by, reverse_sort=reverse_sort, _mediaType=_mediaType) +function list_ro_crate_entities(_api::RoCrateEntitiesApi, response_stream::Channel, id::Int64; offset=nothing, limit=nothing, file_id=nothing, entity_id=nothing, sort_by=nothing, reverse_sort=nothing, _mediaType=nothing) + _ctx = _oacinternal_list_ro_crate_entities(_api, id; offset=offset, limit=limit, file_id=file_id, entity_id=entity_id, sort_by=sort_by, reverse_sort=reverse_sort, _mediaType=_mediaType) return OpenAPI.Clients.exec(_ctx, response_stream) end diff --git a/julia_client/julia_client/docs/RoCrateEntitiesApi.md b/julia_client/julia_client/docs/RoCrateEntitiesApi.md index c4ebb5295..52e181e61 100644 --- a/julia_client/julia_client/docs/RoCrateEntitiesApi.md +++ b/julia_client/julia_client/docs/RoCrateEntitiesApi.md @@ -125,8 +125,8 @@ No authorization required [[Back to top]](#) [[Back to API list]](../README.md#api-endpoints) [[Back to Model list]](../README.md#models) [[Back to README]](../README.md) # **list_ro_crate_entities** -> list_ro_crate_entities(_api::RoCrateEntitiesApi, id::Int64; offset=nothing, limit=nothing, sort_by=nothing, reverse_sort=nothing, _mediaType=nothing) -> ListRoCrateEntitiesResponse, OpenAPI.Clients.ApiResponse
-> list_ro_crate_entities(_api::RoCrateEntitiesApi, response_stream::Channel, id::Int64; offset=nothing, limit=nothing, sort_by=nothing, reverse_sort=nothing, _mediaType=nothing) -> Channel{ ListRoCrateEntitiesResponse }, OpenAPI.Clients.ApiResponse +> list_ro_crate_entities(_api::RoCrateEntitiesApi, id::Int64; offset=nothing, limit=nothing, file_id=nothing, entity_id=nothing, sort_by=nothing, reverse_sort=nothing, _mediaType=nothing) -> ListRoCrateEntitiesResponse, OpenAPI.Clients.ApiResponse
+> list_ro_crate_entities(_api::RoCrateEntitiesApi, response_stream::Channel, id::Int64; offset=nothing, limit=nothing, file_id=nothing, entity_id=nothing, sort_by=nothing, reverse_sort=nothing, _mediaType=nothing) -> Channel{ ListRoCrateEntitiesResponse }, OpenAPI.Clients.ApiResponse @@ -143,6 +143,8 @@ Name | Type | Description | Notes ------------- | ------------- | ------------- | ------------- **offset** | **Int64** | | [default to nothing] **limit** | **Int64** | | [default to nothing] + **file_id** | **Int64** | | [default to nothing] + **entity_id** | **String** | | [default to nothing] **sort_by** | **String** | | [default to nothing] **reverse_sort** | **Bool** | | [default to nothing] @@ -189,4 +191,3 @@ No authorization required - **Accept**: application/json [[Back to top]](#) [[Back to API list]](../README.md#api-endpoints) [[Back to Model list]](../README.md#models) [[Back to README]](../README.md) - diff --git a/python_client/src/torc/openapi_client/api/ro_crate_entities_api.py b/python_client/src/torc/openapi_client/api/ro_crate_entities_api.py index aaed26d06..c25a3e5b2 100644 --- a/python_client/src/torc/openapi_client/api/ro_crate_entities_api.py +++ b/python_client/src/torc/openapi_client/api/ro_crate_entities_api.py @@ -1128,6 +1128,8 @@ def list_ro_crate_entities( id: Annotated[StrictInt, Field(description="Workflow ID")], offset: Optional[StrictInt] = None, limit: Optional[StrictInt] = None, + file_id: Optional[StrictInt] = None, + entity_id: Optional[StrictStr] = None, sort_by: Optional[StrictStr] = None, reverse_sort: Optional[StrictBool] = None, _request_timeout: Union[ @@ -1152,6 +1154,10 @@ def list_ro_crate_entities( :type offset: int :param limit: :type limit: int + :param file_id: + :type file_id: int + :param entity_id: + :type entity_id: str :param sort_by: :type sort_by: str :param reverse_sort: @@ -1182,6 +1188,8 @@ def list_ro_crate_entities( id=id, offset=offset, limit=limit, + file_id=file_id, + entity_id=entity_id, sort_by=sort_by, reverse_sort=reverse_sort, _request_auth=_request_auth, @@ -1213,6 +1221,8 @@ def list_ro_crate_entities_with_http_info( id: Annotated[StrictInt, Field(description="Workflow ID")], offset: Optional[StrictInt] = None, limit: Optional[StrictInt] = None, + file_id: Optional[StrictInt] = None, + entity_id: Optional[StrictStr] = None, sort_by: Optional[StrictStr] = None, reverse_sort: Optional[StrictBool] = None, _request_timeout: Union[ @@ -1237,6 +1247,10 @@ def list_ro_crate_entities_with_http_info( :type offset: int :param limit: :type limit: int + :param file_id: + :type file_id: int + :param entity_id: + :type entity_id: str :param sort_by: :type sort_by: str :param reverse_sort: @@ -1267,6 +1281,8 @@ def list_ro_crate_entities_with_http_info( id=id, offset=offset, limit=limit, + file_id=file_id, + entity_id=entity_id, sort_by=sort_by, reverse_sort=reverse_sort, _request_auth=_request_auth, @@ -1298,6 +1314,8 @@ def list_ro_crate_entities_without_preload_content( id: Annotated[StrictInt, Field(description="Workflow ID")], offset: Optional[StrictInt] = None, limit: Optional[StrictInt] = None, + file_id: Optional[StrictInt] = None, + entity_id: Optional[StrictStr] = None, sort_by: Optional[StrictStr] = None, reverse_sort: Optional[StrictBool] = None, _request_timeout: Union[ @@ -1322,6 +1340,10 @@ def list_ro_crate_entities_without_preload_content( :type offset: int :param limit: :type limit: int + :param file_id: + :type file_id: int + :param entity_id: + :type entity_id: str :param sort_by: :type sort_by: str :param reverse_sort: @@ -1352,6 +1374,8 @@ def list_ro_crate_entities_without_preload_content( id=id, offset=offset, limit=limit, + file_id=file_id, + entity_id=entity_id, sort_by=sort_by, reverse_sort=reverse_sort, _request_auth=_request_auth, @@ -1378,6 +1402,8 @@ def _list_ro_crate_entities_serialize( id, offset, limit, + file_id, + entity_id, sort_by, reverse_sort, _request_auth, @@ -1412,6 +1438,14 @@ def _list_ro_crate_entities_serialize( _query_params.append(('limit', limit)) + if file_id is not None: + + _query_params.append(('file_id', file_id)) + + if entity_id is not None: + + _query_params.append(('entity_id', entity_id)) + if sort_by is not None: _query_params.append(('sort_by', sort_by)) @@ -1750,4 +1784,3 @@ def _update_ro_crate_entity_serialize( _request_auth=_request_auth ) - diff --git a/reviews/ro-crate-filter-staged-review.md b/reviews/ro-crate-filter-staged-review.md new file mode 100644 index 000000000..6703555b9 --- /dev/null +++ b/reviews/ro-crate-filter-staged-review.md @@ -0,0 +1,112 @@ +# RO-Crate Filter Staged Review + +## Scope + +Review covers the currently staged changes only. + +Agent assignments: + +- Server/API contract/spec: `api/openapi.codegen.yaml`, `api/openapi.yaml`, `src/server/api/ro_crate.rs`, `src/server/api_contract.rs`, `src/server/http_server.rs`, `src/server/http_server/ro_crate_transport.rs`, `src/server/live_router.rs` +- Client/generated clients: `src/client/apis/ro_crate_api.rs`, `src/client/apis/ro_crate_entities_api.rs`, `src/client/commands/pagination/ro_crate_entities.rs`, `src/client/commands/workflows.rs`, `src/client/ro_crate_utils.rs`, `python_client/src/torc/openapi_client/api/ro_crate_entities_api.py`, `julia_client/Torc/src/api/apis/api_RoCrateEntitiesApi.jl`, `julia_client/julia_client/docs/RoCrateEntitiesApi.md` +- Tests/behavioral coverage: `tests/test_ro_crate.rs`, `tests/test_auto_ro_crate.rs`, `tests/test_workflow_export.rs` + +## Synthesis + +No high-severity correctness defects were identified in the staged diff. The reviewers converged on the main server/client path being internally consistent: the new `file_id` and `entity_id` filters are threaded through the HTTP layer, the server query/bind order looks correct, and the client call sites that should preserve old behavior now pass explicit `None` values. + +The main concerns are around staged-diff completeness, implicit uniqueness assumptions, and test strength. + +## Findings + +### Medium + +1. OpenAPI source-of-truth risk + +Files: + +- [api/openapi.yaml](/home/lai25/torc/api/openapi.yaml) +- [api/openapi.codegen.yaml](/home/lai25/torc/api/openapi.codegen.yaml) +- [src/openapi_spec.rs](/home/lai25/torc/src/openapi_spec.rs) + +The staged diff updates the checked-in OpenAPI YAML to add `file_id` and `entity_id`, but there is no staged change in the Rust-owned spec validation/emission source. This may be fine if the YAML was produced from local Rust changes already present elsewhere, but as staged it creates a drift risk and should be verified with the project’s OpenAPI sync/check flow before merge. + +2. New Rust `find_*` helpers return the first match without enforcing uniqueness + +File: + +- [src/client/apis/ro_crate_api.rs](/home/lai25/torc/src/client/apis/ro_crate_api.rs) + +`find_ro_crate_entity_by_file_id` and `find_ro_crate_entity_by_entity_id` fetch `limit = 1` and return the first item. That matches prior `.find(...)` behavior, but it codifies a silent "first match wins" policy. If the server ever allows duplicate matches for either key, callers will get an arbitrary row with no signal. + +3. Duplicate-file-link test is too weak to prove the intended failure mode + +File: + +- [tests/test_ro_crate.rs](/home/lai25/torc/tests/test_ro_crate.rs) + +`test_ro_crate_rejects_duplicate_workflow_file_link` only asserts `result.is_err()`. Any server failure would satisfy the test, so it does not specifically prove that the duplicate rejection comes from the intended uniqueness behavior or that the API surfaces the right class of error. + +4. Transport trait signature change may break out-of-tree implementors + +File: + +- [src/server/api_contract.rs](/home/lai25/torc/src/server/api_contract.rs) + +`TransportApiCore::list_ro_crate_entities` now takes two additional parameters. In-repo implementations were updated, but any external implementor of that trait will need a coordinated update. + +### Low + +1. Coverage gaps around filter semantics + +File: + +- [tests/test_ro_crate.rs](/home/lai25/torc/tests/test_ro_crate.rs) + +The new filter test covers positive lookups only. It does not cover: + +- empty/unknown `file_id` or `entity_id` +- combined `file_id` + `entity_id` filtering +- multiple-match behavior +- pagination metadata such as `has_more`, nonzero `offset`, or truncated `limit` + +2. Per-workflow uniqueness semantics are not directly tested + +File: + +- [tests/test_ro_crate.rs](/home/lai25/torc/tests/test_ro_crate.rs) + +There is no companion test proving that the same `file_id` may still appear in a different workflow. The staged tests therefore do not directly pin the intended scope of uniqueness as "per workflow" rather than global. + +3. Cross-language convenience API parity is incomplete + +Files: + +- [src/client/apis/ro_crate_api.rs](/home/lai25/torc/src/client/apis/ro_crate_api.rs) +- [python_client/src/torc/openapi_client/api/ro_crate_entities_api.py](/home/lai25/torc/python_client/src/torc/openapi_client/api/ro_crate_entities_api.py) +- [julia_client/Torc/src/api/apis/api_RoCrateEntitiesApi.jl](/home/lai25/torc/julia_client/Torc/src/api/apis/api_RoCrateEntitiesApi.jl) + +Rust adds `find_*` convenience helpers, while Python and Julia expose only the raw filtered list API. Endpoint parity is present; helper-level parity is not. + +4. Count query lost SQLx macro-time checking + +File: + +- [src/server/api/ro_crate.rs](/home/lai25/torc/src/server/api/ro_crate.rs) + +The count path moved from `query!` to dynamic `sqlx::query(...)` so it can share the same optional filters. That is reasonable, but it removes compile-time SQL validation for that query. + +## Clean Checks + +The reviewers found these parts of the staged change to be sound: + +- Server filter plumbing is consistent from router to transport to query execution. +- Query parameter binding order matches the generated SQL placeholders. +- Existing call sites that should preserve old behavior now pass explicit `None` values for the new filters. +- Python and Julia staged client changes appear structurally consistent with the new endpoint parameters. + +## Suggested Follow-Up Before Merge + +1. Verify the OpenAPI source-of-truth flow on the staged tree so the YAML changes are not drifting from Rust-owned spec generation. +2. Strengthen `test_ro_crate_rejects_duplicate_workflow_file_link` to assert the specific failure semantics you want. +3. Add at least one negative/empty filter test and one pagination-metadata test for the new filtered list path. +4. Decide whether the silent "first match wins" behavior in the new Rust helpers is an acceptable long-term contract. diff --git a/src/client/apis/ro_crate_api.rs b/src/client/apis/ro_crate_api.rs index 93872132a..5cedfba03 100644 --- a/src/client/apis/ro_crate_api.rs +++ b/src/client/apis/ro_crate_api.rs @@ -1,3 +1,5 @@ +use log::warn; + pub use super::ro_crate_entities_api::{ CreateRoCrateEntityError, DeleteRoCrateEntitiesError, DeleteRoCrateEntityError, GetRoCrateEntityError, ListRoCrateEntitiesError, UpdateRoCrateEntityError, @@ -7,6 +9,94 @@ pub use super::ro_crate_entities_api::{ use super::{Error, configuration, ro_crate_entities_api}; use crate::models; +#[allow(clippy::too_many_arguments)] +pub fn list_ro_crate_entities_with_filters( + configuration: &configuration::Configuration, + id: i64, + offset: Option, + limit: Option, + file_id: Option, + entity_id: Option<&str>, + sort_by: Option<&str>, + reverse_sort: Option, +) -> Result> { + ro_crate_entities_api::list_ro_crate_entities( + configuration, + id, + offset, + limit, + file_id, + entity_id, + sort_by, + reverse_sort, + ) +} + +fn first_entity_from_filtered_response( + response: models::ListRoCrateEntitiesResponse, + filter_name: &str, + filter_value: impl std::fmt::Display, +) -> Option { + if response.total_count > 1 { + warn!( + "Expected at most one RO-Crate entity for {}={}, found {}. Returning the first match.", + filter_name, filter_value, response.total_count + ); + } + + response.items.into_iter().next() +} + +/// Find the first RO-Crate entity linked to a file. +/// +/// If multiple entities match, logs a warning and returns the first item. +pub fn find_ro_crate_entity_by_file_id( + configuration: &configuration::Configuration, + workflow_id: i64, + file_id: i64, +) -> Result, Error> { + let response = list_ro_crate_entities_with_filters( + configuration, + workflow_id, + Some(0), + Some(2), + Some(file_id), + None, + None, + None, + )?; + + Ok(first_entity_from_filtered_response( + response, "file_id", file_id, + )) +} + +/// Find the first RO-Crate entity with a matching entity ID. +/// +/// If multiple entities match, logs a warning and returns the first item. +pub fn find_ro_crate_entity_by_entity_id( + configuration: &configuration::Configuration, + workflow_id: i64, + entity_id: &str, +) -> Result, Error> { + let response = list_ro_crate_entities_with_filters( + configuration, + workflow_id, + Some(0), + Some(2), + None, + Some(entity_id), + None, + None, + )?; + + Ok(first_entity_from_filtered_response( + response, + "entity_id", + entity_id, + )) +} + pub fn delete_ro_crate_entities( configuration: &configuration::Configuration, id: i64, diff --git a/src/client/apis/ro_crate_entities_api.rs b/src/client/apis/ro_crate_entities_api.rs index fe9ef0346..4596c7d3c 100644 --- a/src/client/apis/ro_crate_entities_api.rs +++ b/src/client/apis/ro_crate_entities_api.rs @@ -308,6 +308,8 @@ pub fn list_ro_crate_entities( id: i64, offset: Option, limit: Option, + file_id: Option, + entity_id: Option<&str>, sort_by: Option<&str>, reverse_sort: Option, ) -> Result> { @@ -315,6 +317,8 @@ pub fn list_ro_crate_entities( let p_path_id = id; let p_query_offset = offset; let p_query_limit = limit; + let p_query_file_id = file_id; + let p_query_entity_id = entity_id; let p_query_sort_by = sort_by; let p_query_reverse_sort = reverse_sort; @@ -331,6 +335,12 @@ pub fn list_ro_crate_entities( if let Some(ref param_value) = p_query_limit { req_builder = req_builder.query(&[("limit", ¶m_value.to_string())]); } + if let Some(ref param_value) = p_query_file_id { + req_builder = req_builder.query(&[("file_id", ¶m_value.to_string())]); + } + if let Some(ref param_value) = p_query_entity_id { + req_builder = req_builder.query(&[("entity_id", ¶m_value.to_string())]); + } if let Some(ref param_value) = p_query_sort_by { req_builder = req_builder.query(&[("sort_by", ¶m_value.to_string())]); } diff --git a/src/client/commands/pagination/ro_crate_entities.rs b/src/client/commands/pagination/ro_crate_entities.rs index b0f997e17..82459b255 100644 --- a/src/client/commands/pagination/ro_crate_entities.rs +++ b/src/client/commands/pagination/ro_crate_entities.rs @@ -72,6 +72,8 @@ impl Paginatable for RoCrateEntityModel { params.workflow_id, Some(params.offset), Some(limit), + None, + None, params.sort_by(), params.reverse_sort(), )?; diff --git a/src/client/commands/workflows.rs b/src/client/commands/workflows.rs index 1dde6f958..fbe86555a 100644 --- a/src/client/commands/workflows.rs +++ b/src/client/commands/workflows.rs @@ -3090,6 +3090,8 @@ fn handle_export( None, None, None, + None, + None, ) { Ok(response) => response.items, Err(e) => { diff --git a/src/client/ro_crate_utils.rs b/src/client/ro_crate_utils.rs index dc18fc22a..f978831e9 100644 --- a/src/client/ro_crate_utils.rs +++ b/src/client/ro_crate_utils.rs @@ -223,18 +223,8 @@ pub fn find_entity_for_file( workflow_id: i64, file_id: i64, ) -> Option { - match apis::ro_crate_entities_api::list_ro_crate_entities( - config, - workflow_id, - None, - None, - None, - None, - ) { - Ok(response) => response - .items - .into_iter() - .find(|e| e.file_id == Some(file_id)), + match apis::ro_crate_api::find_ro_crate_entity_by_file_id(config, workflow_id, file_id) { + Ok(entity) => entity, Err(e) => { warn!("Failed to check for existing RO-Crate entities: {}", e); None @@ -555,6 +545,8 @@ pub fn create_software_entities(config: &Configuration, workflow_id: i64, run_id None, None, None, + None, + None, ) { Ok(response) => response.items.into_iter().map(|e| e.entity_id).collect(), Err(e) => { diff --git a/src/server/api/ro_crate.rs b/src/server/api/ro_crate.rs index 9ed9285bb..a0a94a7d7 100644 --- a/src/server/api/ro_crate.rs +++ b/src/server/api/ro_crate.rs @@ -4,7 +4,7 @@ use crate::server::transport_types::context_types::{ApiError, Has, XSpanIdString}; use async_trait::async_trait; -use log::{debug, info}; +use log::{debug, info, warn}; use sqlx::Row; use crate::server::api_responses::{ @@ -59,6 +59,8 @@ pub trait RoCrateApi { workflow_id: i64, offset: i64, limit: i64, + file_id: Option, + entity_id: Option, sort_by: Option, reverse_sort: Option, context: &C, @@ -364,6 +366,39 @@ where { Ok(result) => result, Err(e) => { + let error_string = e.to_string(); + if error_string.contains("UNIQUE constraint failed") { + let (message, duplicate_field) = if error_string + .contains("ro_crate_entity.workflow_id, ro_crate_entity.file_id") + { + ( + "RO-Crate entity already exists for this workflow/file link", + "file_id", + ) + } else if error_string + .contains("ro_crate_entity.workflow_id, ro_crate_entity.entity_id") + { + ( + "RO-Crate entity already exists for this workflow/entity_id link", + "entity_id", + ) + } else { + ("RO-Crate entity already exists", "unknown") + }; + + warn!( + "Duplicate RO-Crate entity rejected for workflow_id={} duplicate_field={} file_id={:?} entity_id={}", + body.workflow_id, duplicate_field, body.file_id, body.entity_id + ); + let error_response = models::ErrorResponse::new(serde_json::json!({ + "message": message + })); + return Ok( + CreateRoCrateEntityResponse::UnprocessableContentErrorResponse( + error_response, + ), + ); + } return Err(database_error_with_msg( e, "Failed to create RO-Crate entity record", @@ -436,15 +471,19 @@ where workflow_id: i64, offset: i64, limit: i64, + file_id: Option, + entity_id: Option, sort_by: Option, reverse_sort: Option, context: &C, ) -> Result { debug!( - "list_ro_crate_entities({}, {}, {}, {:?}, {:?}) - X-Span-ID: {:?}", + "list_ro_crate_entities({}, {}, {}, {:?}, {:?}, {:?}, {:?}) - X-Span-ID: {:?}", workflow_id, offset, limit, + file_id, + entity_id, sort_by, reverse_sort, context.get().0.clone() @@ -461,11 +500,19 @@ where None => None, }; + let mut where_clauses = vec!["workflow_id = ?".to_string()]; + if file_id.is_some() { + where_clauses.push("file_id = ?".to_string()); + } + if entity_id.is_some() { + where_clauses.push("entity_id = ?".to_string()); + } + let query = SqlQueryBuilder::new( "SELECT id, workflow_id, file_id, entity_id, entity_type, metadata FROM ro_crate_entity" .to_string(), ) - .with_where("workflow_id = ?".to_string()) + .with_where(where_clauses.join(" AND ")) .with_pagination_and_sorting( offset, limit, @@ -476,11 +523,15 @@ where ) .build(); - let records = match sqlx::query(&query) - .bind(workflow_id) - .fetch_all(self.context.pool.as_ref()) - .await - { + let mut query_builder = sqlx::query(&query).bind(workflow_id); + if let Some(file_id) = file_id { + query_builder = query_builder.bind(file_id); + } + if let Some(ref entity_id) = entity_id { + query_builder = query_builder.bind(entity_id); + } + + let records = match query_builder.fetch_all(self.context.pool.as_ref()).await { Ok(records) => records, Err(e) => { return Err(database_error_with_msg( @@ -504,15 +555,24 @@ where let count = items.len() as i64; - // Get total count - let total_count = match sqlx::query!( - r#"SELECT COUNT(*) as total FROM ro_crate_entity WHERE workflow_id = $1"#, - workflow_id - ) - .fetch_one(self.context.pool.as_ref()) - .await + // Get total count with the same filters but without pagination. + let count_query = format!( + "SELECT COUNT(*) as total FROM ro_crate_entity WHERE {}", + where_clauses.join(" AND ") + ); + let mut count_query_builder = sqlx::query(&count_query).bind(workflow_id); + if let Some(file_id) = file_id { + count_query_builder = count_query_builder.bind(file_id); + } + if let Some(ref entity_id) = entity_id { + count_query_builder = count_query_builder.bind(entity_id); + } + + let total_count = match count_query_builder + .fetch_one(self.context.pool.as_ref()) + .await { - Ok(row) => row.total, + Ok(row) => row.get("total"), Err(e) => { return Err(database_error_with_msg( e, diff --git a/src/server/api_contract.rs b/src/server/api_contract.rs index 65878f679..8fb5fca0b 100644 --- a/src/server/api_contract.rs +++ b/src/server/api_contract.rs @@ -212,6 +212,8 @@ pub trait TransportApiCore { workflow_id: i64, offset: Option, limit: Option, + file_id: Option, + entity_id: Option, sort_by: Option, reverse_sort: Option, context: &C, diff --git a/src/server/api_responses.rs b/src/server/api_responses.rs index f574c0439..8f32b7123 100644 --- a/src/server/api_responses.rs +++ b/src/server/api_responses.rs @@ -152,6 +152,8 @@ pub enum CreateRoCrateEntityResponse { ForbiddenErrorResponse(models::ErrorResponse), /// Not found error response NotFoundErrorResponse(models::ErrorResponse), + /// Unprocessable content error response + UnprocessableContentErrorResponse(models::ErrorResponse), /// Default error response DefaultErrorResponse(models::ErrorResponse), } diff --git a/src/server/http_server.rs b/src/server/http_server.rs index 7e66f9251..03463085e 100644 --- a/src/server/http_server.rs +++ b/src/server/http_server.rs @@ -475,6 +475,8 @@ where workflow_id: i64, offset: Option, limit: Option, + file_id: Option, + entity_id: Option, sort_by: Option, reverse_sort: Option, context: &C, @@ -483,6 +485,8 @@ where workflow_id, offset, limit, + file_id, + entity_id, sort_by, reverse_sort, context, diff --git a/src/server/http_server/ro_crate_transport.rs b/src/server/http_server/ro_crate_transport.rs index b3ee0530f..1de5e6461 100644 --- a/src/server/http_server/ro_crate_transport.rs +++ b/src/server/http_server/ro_crate_transport.rs @@ -36,6 +36,8 @@ where workflow_id: i64, offset: Option, limit: Option, + file_id: Option, + entity_id: Option, sort_by: Option, reverse_sort: Option, context: &C, @@ -43,7 +45,16 @@ where authorize_workflow!(self, workflow_id, context, ListRoCrateEntitiesResponse); let (offset, limit) = process_pagination_params(offset, limit)?; self.ro_crate_api - .list_ro_crate_entities(workflow_id, offset, limit, sort_by, reverse_sort, context) + .list_ro_crate_entities( + workflow_id, + offset, + limit, + file_id, + entity_id, + sort_by, + reverse_sort, + context, + ) .await } pub(super) async fn transport_update_ro_crate_entity( diff --git a/src/server/http_transport/response_mapping.rs b/src/server/http_transport/response_mapping.rs index fb0ae734f..134e20a98 100644 --- a/src/server/http_transport/response_mapping.rs +++ b/src/server/http_transport/response_mapping.rs @@ -127,7 +127,7 @@ map_response_std!( CreateSlurmStatsResponse, SuccessfulResponse ); -map_response_std!( +map_response_unprocessable!( create_ro_crate_entity_response, CreateRoCrateEntityResponse, SuccessfulResponse diff --git a/src/server/live_router.rs b/src/server/live_router.rs index 3df818d9d..884e68e81 100644 --- a/src/server/live_router.rs +++ b/src/server/live_router.rs @@ -3698,6 +3698,10 @@ pub struct RoCrateEntitiesQuery { #[param(nullable = true)] pub limit: Option, #[param(nullable = true)] + pub file_id: Option, + #[param(nullable = true)] + pub entity_id: Option, + #[param(nullable = true)] pub sort_by: Option, #[param(nullable = true)] pub reverse_sort: Option, @@ -3925,6 +3929,8 @@ pub async fn list_ro_crate_entities( workflow_id, query.offset, query.limit, + query.file_id, + query.entity_id, query.sort_by, query.reverse_sort, &context, diff --git a/tests/test_auto_ro_crate.rs b/tests/test_auto_ro_crate.rs index dcd7cacf5..0bf7c2ca4 100644 --- a/tests/test_auto_ro_crate.rs +++ b/tests/test_auto_ro_crate.rs @@ -197,9 +197,17 @@ fn test_auto_ro_crate_input_files_on_initialize(start_server: &ServerProcess) { fs::write(work_dir.join("input.json"), input_data).expect("Failed to write input.json"); // Verify no RO-Crate entities exist yet - let entities_before = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .unwrap(); + let entities_before = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .unwrap(); assert_eq!( entities_before.items.len(), 0, @@ -211,9 +219,17 @@ fn test_auto_ro_crate_input_files_on_initialize(start_server: &ServerProcess) { .expect("Failed to initialize jobs"); // Verify RO-Crate entity was created for the input file - let entities_after = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .unwrap(); + let entities_after = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .unwrap(); let items = entities_after.items; // Should have at least one entity (for the input file) @@ -293,9 +309,17 @@ fn test_auto_ro_crate_output_files_on_job_completion(start_server: &ServerProces ); // Verify RO-Crate entities were created - let entities = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .expect("Failed to list RO-Crate entities"); + let entities = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .expect("Failed to list RO-Crate entities"); let items = entities.items; // Should have entities for both input and output files, plus a CreateAction @@ -391,9 +415,17 @@ fn test_auto_ro_crate_disabled_by_default(start_server: &ServerProcess) { apis::workflows_api::initialize_jobs(config, workflow_id, Some(false), Some(false)).unwrap(); // Verify no file-based RO-Crate entities were created (only the SoftwareApplication for torc-server) - let entities = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .unwrap(); + let entities = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .unwrap(); let items = entities.items; let file_entities: Vec<_> = items .iter() @@ -424,9 +456,17 @@ fn test_auto_ro_crate_diamond_workflow(start_server: &ServerProcess) { .expect("Failed to initialize jobs"); // Verify input file entity was created - let entities_after_init = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .unwrap(); + let entities_after_init = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .unwrap(); let items = entities_after_init.items; let input_entity = items.iter().find(|e| e.file_id == Some(input_file_ids[0])); @@ -481,9 +521,17 @@ fn test_auto_ro_crate_diamond_workflow(start_server: &ServerProcess) { assert!(work_dir.join("f4.json").exists(), "f4.json should exist"); // Verify RO-Crate entities were created for output files - let final_entities = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .unwrap(); + let final_entities = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .unwrap(); let final_items = final_entities.items; // Should have entities for: @@ -581,9 +629,17 @@ fn test_auto_ro_crate_second_run_replaces_entities(start_server: &ServerProcess) ); // Capture first run RO-Crate entities - let entities_run1 = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .unwrap(); + let entities_run1 = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .unwrap(); let items_run1 = entities_run1.items; let file_entities_run1: Vec<_> = items_run1 @@ -648,9 +704,17 @@ fn test_auto_ro_crate_second_run_replaces_entities(start_server: &ServerProcess) // --- Verify file entities were replaced, not duplicated --- - let entities_run2 = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .unwrap(); + let entities_run2 = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .unwrap(); let items_run2 = entities_run2.items; let file_entities_run2: Vec<_> = items_run2 diff --git a/tests/test_ro_crate.rs b/tests/test_ro_crate.rs index 50b3278f1..f9575ab4f 100644 --- a/tests/test_ro_crate.rs +++ b/tests/test_ro_crate.rs @@ -4,6 +4,7 @@ use common::{ServerProcess, create_test_workflow, start_server}; use rstest::rstest; use serde_json::json; use torc::client::apis; +use torc::client::apis::Error; use torc::models::RoCrateEntityModel; #[rstest] @@ -52,9 +53,17 @@ fn test_ro_crate_crud(start_server: &ServerProcess) { assert_eq!(result.entity_id, "data/output.parquet"); // List entities - let list_response = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .expect("Failed to list entities"); + let list_response = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .expect("Failed to list entities"); let items = list_response.items; assert_eq!(items.len(), 1); assert_eq!(items[0].entity_type, "Dataset"); @@ -63,9 +72,17 @@ fn test_ro_crate_crud(start_server: &ServerProcess) { apis::ro_crate_api::delete_ro_crate_entity(config, entity_id).expect("Failed to delete entity"); // Verify it's gone - let list_response = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .expect("Failed to list entities after delete"); + let list_response = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .expect("Failed to list entities after delete"); let items = list_response.items; assert_eq!(items.len(), 0); } @@ -102,6 +119,225 @@ fn test_ro_crate_with_file_id(start_server: &ServerProcess) { assert_eq!(created.file_id, Some(file_id)); } +#[rstest] +fn test_ro_crate_list_filters(start_server: &ServerProcess) { + let config = &start_server.config; + + let workflow = create_test_workflow(config, "test_ro_crate_list_filters"); + let workflow_id = workflow.id.unwrap(); + + let file = torc::models::FileModel::new( + workflow_id, + "filtered.csv".to_string(), + "/tmp/filtered.csv".to_string(), + ); + let created_file = + apis::files_api::create_file(config, file).expect("Failed to create test file"); + let file_id = created_file.id.unwrap(); + + let file_entity = RoCrateEntityModel { + id: None, + workflow_id, + file_id: Some(file_id), + entity_id: "filtered.csv".to_string(), + entity_type: "File".to_string(), + metadata: json!({"name": "Filtered CSV"}).to_string(), + }; + apis::ro_crate_api::create_ro_crate_entity(config, file_entity) + .expect("Failed to create file entity"); + + let software_entity = RoCrateEntityModel { + id: None, + workflow_id, + file_id: None, + entity_id: "#software-test-run-1".to_string(), + entity_type: "SoftwareApplication".to_string(), + metadata: json!({"name": "Test Software"}).to_string(), + }; + apis::ro_crate_api::create_ro_crate_entity(config, software_entity) + .expect("Failed to create software entity"); + + let by_file = apis::ro_crate_api::list_ro_crate_entities_with_filters( + config, + workflow_id, + Some(0), + Some(10), + Some(file_id), + None, + None, + None, + ) + .expect("Failed to list filtered by file_id"); + assert_eq!(by_file.items.len(), 1); + assert_eq!(by_file.count, 1); + assert_eq!(by_file.total_count, 1); + assert_eq!(by_file.items[0].file_id, Some(file_id)); + + let by_entity = apis::ro_crate_api::list_ro_crate_entities_with_filters( + config, + workflow_id, + Some(0), + Some(10), + None, + Some("#software-test-run-1"), + None, + None, + ) + .expect("Failed to list filtered by entity_id"); + assert_eq!(by_entity.items.len(), 1); + assert_eq!(by_entity.items[0].entity_id, "#software-test-run-1"); + + let by_both = apis::ro_crate_api::list_ro_crate_entities_with_filters( + config, + workflow_id, + Some(0), + Some(10), + Some(file_id), + Some("filtered.csv"), + None, + None, + ) + .expect("Failed to list filtered by file_id and entity_id"); + assert_eq!(by_both.items.len(), 1); + assert_eq!(by_both.total_count, 1); + assert!(!by_both.has_more); + + let empty = apis::ro_crate_api::list_ro_crate_entities_with_filters( + config, + workflow_id, + Some(0), + Some(10), + Some(file_id), + Some("does-not-match"), + None, + None, + ) + .expect("Failed to list filtered with non-matching combined filters"); + assert!(empty.items.is_empty()); + assert_eq!(empty.count, 0); + assert_eq!(empty.total_count, 0); + assert!(!empty.has_more); + + let paged = apis::ro_crate_api::list_ro_crate_entities_with_filters( + config, + workflow_id, + Some(0), + Some(1), + None, + None, + Some("entity_id"), + Some(false), + ) + .expect("Failed to list paged entities"); + assert_eq!(paged.items.len(), 1); + assert_eq!(paged.count, 1); + assert_eq!(paged.total_count, 2); + assert!(paged.has_more); + + let paged_offset = apis::ro_crate_api::list_ro_crate_entities_with_filters( + config, + workflow_id, + Some(1), + Some(1), + None, + None, + Some("entity_id"), + Some(false), + ) + .expect("Failed to list second page of entities"); + assert_eq!(paged_offset.items.len(), 1); + assert_eq!(paged_offset.count, 1); + assert_eq!(paged_offset.total_count, 2); + assert!(!paged_offset.has_more); + + let missing_by_file = + apis::ro_crate_api::find_ro_crate_entity_by_file_id(config, workflow_id, file_id + 1) + .expect("Failed to lookup missing file_id"); + assert!(missing_by_file.is_none()); + + let missing_by_entity = apis::ro_crate_api::find_ro_crate_entity_by_entity_id( + config, + workflow_id, + "#software-missing", + ) + .expect("Failed to lookup missing entity_id"); + assert!(missing_by_entity.is_none()); + + let found_by_file = + apis::ro_crate_api::find_ro_crate_entity_by_file_id(config, workflow_id, file_id) + .expect("Failed to lookup by file_id"); + assert_eq!( + found_by_file + .expect("Expected entity for file_id") + .entity_id, + "filtered.csv" + ); + + let found_by_entity = apis::ro_crate_api::find_ro_crate_entity_by_entity_id( + config, + workflow_id, + "#software-test-run-1", + ) + .expect("Failed to lookup by entity_id"); + assert_eq!( + found_by_entity + .expect("Expected entity for entity_id") + .entity_type, + "SoftwareApplication" + ); +} + +#[rstest] +fn test_ro_crate_rejects_duplicate_workflow_file_link(start_server: &ServerProcess) { + let config = &start_server.config; + + let workflow = create_test_workflow(config, "test_ro_crate_duplicate_file_link"); + let workflow_id = workflow.id.unwrap(); + + let file = torc::models::FileModel::new( + workflow_id, + "duplicate.csv".to_string(), + "/tmp/duplicate.csv".to_string(), + ); + let created_file = + apis::files_api::create_file(config, file).expect("Failed to create test file"); + let file_id = created_file.id.unwrap(); + + let first = RoCrateEntityModel { + id: None, + workflow_id, + file_id: Some(file_id), + entity_id: "duplicate.csv".to_string(), + entity_type: "File".to_string(), + metadata: json!({"name": "First"}).to_string(), + }; + apis::ro_crate_api::create_ro_crate_entity(config, first) + .expect("Failed to create first entity"); + + let duplicate = RoCrateEntityModel { + id: None, + workflow_id, + file_id: Some(file_id), + entity_id: "duplicate-second.csv".to_string(), + entity_type: "File".to_string(), + metadata: json!({"name": "Second"}).to_string(), + }; + let result = apis::ro_crate_api::create_ro_crate_entity(config, duplicate); + match result { + Err(Error::ResponseError(content)) => { + assert_eq!(content.status.as_u16(), 422); + assert!( + content + .content + .contains("RO-Crate entity already exists for this workflow/file link"), + "unexpected error payload: {}", + content.content + ); + } + other => panic!("Expected 422 duplicate rejection, got: {:?}", other), + } +} + #[rstest] fn test_ro_crate_external_entity(start_server: &ServerProcess) { let config = &start_server.config; @@ -151,9 +387,17 @@ fn test_ro_crate_bulk_delete(start_server: &ServerProcess) { } // Verify all three exist - let list = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .expect("Failed to list"); + let list = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .expect("Failed to list"); assert_eq!(list.items.len(), 3); // Bulk delete all entities for the workflow @@ -162,9 +406,17 @@ fn test_ro_crate_bulk_delete(start_server: &ServerProcess) { assert_eq!(result.deleted_count, 3); // Verify all are gone - let list = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .expect("Failed to list after delete"); + let list = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .expect("Failed to list after delete"); assert_eq!(list.items.len(), 0); } @@ -185,17 +437,33 @@ fn test_ro_crate_cascade_delete(start_server: &ServerProcess) { apis::ro_crate_api::create_ro_crate_entity(config, entity).expect("Failed to create entity"); // Verify it exists - let list = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None) - .expect("Failed to list"); + let list = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ) + .expect("Failed to list"); assert_eq!(list.items.len(), 1); // Delete the workflow (should cascade delete RO-Crate entities) apis::workflows_api::delete_workflow(config, workflow_id).expect("Failed to delete workflow"); // The workflow is gone, so listing should fail or return error - let result = - apis::ro_crate_api::list_ro_crate_entities(config, workflow_id, None, None, None, None); + let result = apis::ro_crate_api::list_ro_crate_entities( + config, + workflow_id, + None, + None, + None, + None, + None, + None, + ); // Either the list returns empty (workflow gone, no entities) or an error match result { Ok(response) => { diff --git a/tests/test_workflow_export.rs b/tests/test_workflow_export.rs index 5a398b51a..b0a81c6d0 100644 --- a/tests/test_workflow_export.rs +++ b/tests/test_workflow_export.rs @@ -793,9 +793,17 @@ fn test_export_import_ro_crate_job_id_remapping(start_server: &ServerProcess) { assert_ne!(new_job_id, job_id, "New job should have different ID"); // Get the imported RO-Crate entities - let entities_response = - apis::ro_crate_api::list_ro_crate_entities(config, new_workflow_id, None, None, None, None) - .expect("Failed to list RO-Crate entities"); + let entities_response = apis::ro_crate_api::list_ro_crate_entities( + config, + new_workflow_id, + None, + None, + None, + None, + None, + None, + ) + .expect("Failed to list RO-Crate entities"); let imported_entities = entities_response.items; assert_eq!(imported_entities.len(), 2); diff --git a/torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.down.sql b/torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.down.sql new file mode 100644 index 000000000..1e20fcdf6 --- /dev/null +++ b/torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.down.sql @@ -0,0 +1,3 @@ +DROP INDEX IF EXISTS idx_ro_crate_entity_workflow_file; +CREATE INDEX idx_ro_crate_entity_workflow_file +ON ro_crate_entity(workflow_id, file_id); diff --git a/torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.up.sql b/torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.up.sql new file mode 100644 index 000000000..900020435 --- /dev/null +++ b/torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.up.sql @@ -0,0 +1,13 @@ +DELETE FROM ro_crate_entity +WHERE file_id IS NOT NULL + AND id NOT IN ( + SELECT MAX(id) + FROM ro_crate_entity + WHERE file_id IS NOT NULL + GROUP BY workflow_id, file_id + ); + +DROP INDEX IF EXISTS idx_ro_crate_entity_workflow_file; +CREATE UNIQUE INDEX idx_ro_crate_entity_workflow_file +ON ro_crate_entity(workflow_id, file_id) +WHERE file_id IS NOT NULL; From e5475a349b603cece6f2ce6922b924aced492166 Mon Sep 17 00:00:00 2001 From: Andrew Lai Date: Wed, 29 Apr 2026 12:13:17 -0700 Subject: [PATCH 2/6] removing review dir --- reviews/ro-crate-filter-staged-review.md | 112 ----------------------- 1 file changed, 112 deletions(-) delete mode 100644 reviews/ro-crate-filter-staged-review.md diff --git a/reviews/ro-crate-filter-staged-review.md b/reviews/ro-crate-filter-staged-review.md deleted file mode 100644 index 6703555b9..000000000 --- a/reviews/ro-crate-filter-staged-review.md +++ /dev/null @@ -1,112 +0,0 @@ -# RO-Crate Filter Staged Review - -## Scope - -Review covers the currently staged changes only. - -Agent assignments: - -- Server/API contract/spec: `api/openapi.codegen.yaml`, `api/openapi.yaml`, `src/server/api/ro_crate.rs`, `src/server/api_contract.rs`, `src/server/http_server.rs`, `src/server/http_server/ro_crate_transport.rs`, `src/server/live_router.rs` -- Client/generated clients: `src/client/apis/ro_crate_api.rs`, `src/client/apis/ro_crate_entities_api.rs`, `src/client/commands/pagination/ro_crate_entities.rs`, `src/client/commands/workflows.rs`, `src/client/ro_crate_utils.rs`, `python_client/src/torc/openapi_client/api/ro_crate_entities_api.py`, `julia_client/Torc/src/api/apis/api_RoCrateEntitiesApi.jl`, `julia_client/julia_client/docs/RoCrateEntitiesApi.md` -- Tests/behavioral coverage: `tests/test_ro_crate.rs`, `tests/test_auto_ro_crate.rs`, `tests/test_workflow_export.rs` - -## Synthesis - -No high-severity correctness defects were identified in the staged diff. The reviewers converged on the main server/client path being internally consistent: the new `file_id` and `entity_id` filters are threaded through the HTTP layer, the server query/bind order looks correct, and the client call sites that should preserve old behavior now pass explicit `None` values. - -The main concerns are around staged-diff completeness, implicit uniqueness assumptions, and test strength. - -## Findings - -### Medium - -1. OpenAPI source-of-truth risk - -Files: - -- [api/openapi.yaml](/home/lai25/torc/api/openapi.yaml) -- [api/openapi.codegen.yaml](/home/lai25/torc/api/openapi.codegen.yaml) -- [src/openapi_spec.rs](/home/lai25/torc/src/openapi_spec.rs) - -The staged diff updates the checked-in OpenAPI YAML to add `file_id` and `entity_id`, but there is no staged change in the Rust-owned spec validation/emission source. This may be fine if the YAML was produced from local Rust changes already present elsewhere, but as staged it creates a drift risk and should be verified with the project’s OpenAPI sync/check flow before merge. - -2. New Rust `find_*` helpers return the first match without enforcing uniqueness - -File: - -- [src/client/apis/ro_crate_api.rs](/home/lai25/torc/src/client/apis/ro_crate_api.rs) - -`find_ro_crate_entity_by_file_id` and `find_ro_crate_entity_by_entity_id` fetch `limit = 1` and return the first item. That matches prior `.find(...)` behavior, but it codifies a silent "first match wins" policy. If the server ever allows duplicate matches for either key, callers will get an arbitrary row with no signal. - -3. Duplicate-file-link test is too weak to prove the intended failure mode - -File: - -- [tests/test_ro_crate.rs](/home/lai25/torc/tests/test_ro_crate.rs) - -`test_ro_crate_rejects_duplicate_workflow_file_link` only asserts `result.is_err()`. Any server failure would satisfy the test, so it does not specifically prove that the duplicate rejection comes from the intended uniqueness behavior or that the API surfaces the right class of error. - -4. Transport trait signature change may break out-of-tree implementors - -File: - -- [src/server/api_contract.rs](/home/lai25/torc/src/server/api_contract.rs) - -`TransportApiCore::list_ro_crate_entities` now takes two additional parameters. In-repo implementations were updated, but any external implementor of that trait will need a coordinated update. - -### Low - -1. Coverage gaps around filter semantics - -File: - -- [tests/test_ro_crate.rs](/home/lai25/torc/tests/test_ro_crate.rs) - -The new filter test covers positive lookups only. It does not cover: - -- empty/unknown `file_id` or `entity_id` -- combined `file_id` + `entity_id` filtering -- multiple-match behavior -- pagination metadata such as `has_more`, nonzero `offset`, or truncated `limit` - -2. Per-workflow uniqueness semantics are not directly tested - -File: - -- [tests/test_ro_crate.rs](/home/lai25/torc/tests/test_ro_crate.rs) - -There is no companion test proving that the same `file_id` may still appear in a different workflow. The staged tests therefore do not directly pin the intended scope of uniqueness as "per workflow" rather than global. - -3. Cross-language convenience API parity is incomplete - -Files: - -- [src/client/apis/ro_crate_api.rs](/home/lai25/torc/src/client/apis/ro_crate_api.rs) -- [python_client/src/torc/openapi_client/api/ro_crate_entities_api.py](/home/lai25/torc/python_client/src/torc/openapi_client/api/ro_crate_entities_api.py) -- [julia_client/Torc/src/api/apis/api_RoCrateEntitiesApi.jl](/home/lai25/torc/julia_client/Torc/src/api/apis/api_RoCrateEntitiesApi.jl) - -Rust adds `find_*` convenience helpers, while Python and Julia expose only the raw filtered list API. Endpoint parity is present; helper-level parity is not. - -4. Count query lost SQLx macro-time checking - -File: - -- [src/server/api/ro_crate.rs](/home/lai25/torc/src/server/api/ro_crate.rs) - -The count path moved from `query!` to dynamic `sqlx::query(...)` so it can share the same optional filters. That is reasonable, but it removes compile-time SQL validation for that query. - -## Clean Checks - -The reviewers found these parts of the staged change to be sound: - -- Server filter plumbing is consistent from router to transport to query execution. -- Query parameter binding order matches the generated SQL placeholders. -- Existing call sites that should preserve old behavior now pass explicit `None` values for the new filters. -- Python and Julia staged client changes appear structurally consistent with the new endpoint parameters. - -## Suggested Follow-Up Before Merge - -1. Verify the OpenAPI source-of-truth flow on the staged tree so the YAML changes are not drifting from Rust-owned spec generation. -2. Strengthen `test_ro_crate_rejects_duplicate_workflow_file_link` to assert the specific failure semantics you want. -3. Add at least one negative/empty filter test and one pagination-metadata test for the new filtered list path. -4. Decide whether the silent "first match wins" behavior in the new Rust helpers is an acceptable long-term contract. From 55108d32e22b4152083e2e6306e10943eb821964 Mon Sep 17 00:00:00 2001 From: Andrew Lai Date: Thu, 30 Apr 2026 12:31:15 -0700 Subject: [PATCH 3/6] Fix duplicate SQLx migration version for RO-Crate index --- ...260422000000_add_ro_crate_entity_workflow_file_index.down.sql} | 0 ...20260422000000_add_ro_crate_entity_workflow_file_index.up.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename torc-server/migrations/{20260421000000_add_ro_crate_entity_workflow_file_index.down.sql => 20260422000000_add_ro_crate_entity_workflow_file_index.down.sql} (100%) rename torc-server/migrations/{20260421000000_add_ro_crate_entity_workflow_file_index.up.sql => 20260422000000_add_ro_crate_entity_workflow_file_index.up.sql} (100%) diff --git a/torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.down.sql b/torc-server/migrations/20260422000000_add_ro_crate_entity_workflow_file_index.down.sql similarity index 100% rename from torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.down.sql rename to torc-server/migrations/20260422000000_add_ro_crate_entity_workflow_file_index.down.sql diff --git a/torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.up.sql b/torc-server/migrations/20260422000000_add_ro_crate_entity_workflow_file_index.up.sql similarity index 100% rename from torc-server/migrations/20260421000000_add_ro_crate_entity_workflow_file_index.up.sql rename to torc-server/migrations/20260422000000_add_ro_crate_entity_workflow_file_index.up.sql From 85684e428ba4b1f3a4ac9aa403fd74170a2c8260 Mon Sep 17 00:00:00 2001 From: Andrew Lai Date: Thu, 30 Apr 2026 15:28:44 -0700 Subject: [PATCH 4/6] Fix generated Julia RO-Crate API doc parity --- julia_client/julia_client/docs/RoCrateEntitiesApi.md | 1 + 1 file changed, 1 insertion(+) diff --git a/julia_client/julia_client/docs/RoCrateEntitiesApi.md b/julia_client/julia_client/docs/RoCrateEntitiesApi.md index 52e181e61..87fc5641c 100644 --- a/julia_client/julia_client/docs/RoCrateEntitiesApi.md +++ b/julia_client/julia_client/docs/RoCrateEntitiesApi.md @@ -191,3 +191,4 @@ No authorization required - **Accept**: application/json [[Back to top]](#) [[Back to API list]](../README.md#api-endpoints) [[Back to Model list]](../README.md#models) [[Back to README]](../README.md) + From cbd83d9c2f0ea080f0fd3feed7be91bf4445926a Mon Sep 17 00:00:00 2001 From: Andrew Lai Date: Thu, 30 Apr 2026 15:43:08 -0700 Subject: [PATCH 5/6] Handle duplicate RO-Crate entity updates consistently --- src/server/api/ro_crate.rs | 89 ++++++++++++++----- src/server/api_responses.rs | 2 + src/server/http_transport/response_mapping.rs | 2 +- 3 files changed, 70 insertions(+), 23 deletions(-) diff --git a/src/server/api/ro_crate.rs b/src/server/api/ro_crate.rs index a0a94a7d7..69b70c11e 100644 --- a/src/server/api/ro_crate.rs +++ b/src/server/api/ro_crate.rs @@ -36,6 +36,46 @@ fn full_version() -> String { format!("{} ({})", SERVER_VERSION, GIT_HASH) } +fn ro_crate_duplicate_details(error_string: &str) -> Option<(&'static str, &'static str)> { + if !error_string.contains("UNIQUE constraint failed") { + return None; + } + + if error_string.contains("ro_crate_entity.workflow_id, ro_crate_entity.file_id") { + Some(( + "RO-Crate entity already exists for this workflow/file link", + "file_id", + )) + } else if error_string.contains("ro_crate_entity.workflow_id, ro_crate_entity.entity_id") { + Some(( + "RO-Crate entity already exists for this workflow/entity_id link", + "entity_id", + )) + } else { + Some(("RO-Crate entity already exists", "unknown")) + } +} + +fn log_duplicate_ro_crate_entity( + operation: &str, + workflow_id: i64, + duplicate_field: &str, + file_id: Option, + entity_id: &str, +) { + if let Some(file_id) = file_id { + warn!( + "Duplicate RO-Crate entity {} for workflow_id={} duplicate_field={} file_id={} has_file_id=true entity_id={}", + operation, workflow_id, duplicate_field, file_id, entity_id + ); + } else { + warn!( + "Duplicate RO-Crate entity {} for workflow_id={} duplicate_field={} has_file_id=false entity_id={}", + operation, workflow_id, duplicate_field, entity_id + ); + } +} + /// Trait defining RO-Crate entity-related API operations #[async_trait] pub trait RoCrateApi { @@ -367,28 +407,14 @@ where Ok(result) => result, Err(e) => { let error_string = e.to_string(); - if error_string.contains("UNIQUE constraint failed") { - let (message, duplicate_field) = if error_string - .contains("ro_crate_entity.workflow_id, ro_crate_entity.file_id") - { - ( - "RO-Crate entity already exists for this workflow/file link", - "file_id", - ) - } else if error_string - .contains("ro_crate_entity.workflow_id, ro_crate_entity.entity_id") - { - ( - "RO-Crate entity already exists for this workflow/entity_id link", - "entity_id", - ) - } else { - ("RO-Crate entity already exists", "unknown") - }; - - warn!( - "Duplicate RO-Crate entity rejected for workflow_id={} duplicate_field={} file_id={:?} entity_id={}", - body.workflow_id, duplicate_field, body.file_id, body.entity_id + if let Some((message, duplicate_field)) = ro_crate_duplicate_details(&error_string) + { + log_duplicate_ro_crate_entity( + "rejected on create", + body.workflow_id, + duplicate_field, + body.file_id, + &body.entity_id, ); let error_response = models::ErrorResponse::new(serde_json::json!({ "message": message @@ -625,6 +651,25 @@ where { Ok(result) => result, Err(e) => { + let error_string = e.to_string(); + if let Some((message, duplicate_field)) = ro_crate_duplicate_details(&error_string) + { + log_duplicate_ro_crate_entity( + "rejected on update", + body.workflow_id, + duplicate_field, + body.file_id, + &body.entity_id, + ); + let error_response = models::ErrorResponse::new(serde_json::json!({ + "message": message + })); + return Ok( + UpdateRoCrateEntityResponse::UnprocessableContentErrorResponse( + error_response, + ), + ); + } return Err(database_error_with_msg( e, "Failed to update RO-Crate entity", diff --git a/src/server/api_responses.rs b/src/server/api_responses.rs index 8f32b7123..556c1c174 100644 --- a/src/server/api_responses.rs +++ b/src/server/api_responses.rs @@ -193,6 +193,8 @@ pub enum UpdateRoCrateEntityResponse { ForbiddenErrorResponse(models::ErrorResponse), /// Not found error response NotFoundErrorResponse(models::ErrorResponse), + /// Unprocessable content error response + UnprocessableContentErrorResponse(models::ErrorResponse), /// Default error response DefaultErrorResponse(models::ErrorResponse), } diff --git a/src/server/http_transport/response_mapping.rs b/src/server/http_transport/response_mapping.rs index 134e20a98..f65d2418a 100644 --- a/src/server/http_transport/response_mapping.rs +++ b/src/server/http_transport/response_mapping.rs @@ -437,7 +437,7 @@ map_response_std!( SuccessfulResponse ); map_response_std!(reload_auth_response, ReloadAuthResponse, SuccessfulResponse); -map_response_std!( +map_response_unprocessable!( update_ro_crate_entity_response, UpdateRoCrateEntityResponse, SuccessfulResponse From 1410bcfa90dc45d166b1f756be7c7e09f920b659 Mon Sep 17 00:00:00 2001 From: Daniel Thom Date: Fri, 1 May 2026 09:31:54 -0600 Subject: [PATCH 6/6] Fix formatting --- .../src/torc/openapi_client/api/ro_crate_entities_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python_client/src/torc/openapi_client/api/ro_crate_entities_api.py b/python_client/src/torc/openapi_client/api/ro_crate_entities_api.py index c25a3e5b2..8cb99292c 100644 --- a/python_client/src/torc/openapi_client/api/ro_crate_entities_api.py +++ b/python_client/src/torc/openapi_client/api/ro_crate_entities_api.py @@ -1784,3 +1784,4 @@ def _update_ro_crate_entity_serialize( _request_auth=_request_auth ) +