diff --git a/api/dependencies/package-lock.json b/api/dependencies/package-lock.json index e88234e2..41057910 100644 --- a/api/dependencies/package-lock.json +++ b/api/dependencies/package-lock.json @@ -1,12 +1,12 @@ { "name": "dc-api-dependencies", - "version": "2.9.3", + "version": "2.9.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dc-api-dependencies", - "version": "2.9.3", + "version": "2.9.4", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "^2.0.1", diff --git a/api/dependencies/package.json b/api/dependencies/package.json index 75e6d2ea..dc063f9b 100644 --- a/api/dependencies/package.json +++ b/api/dependencies/package.json @@ -1,6 +1,6 @@ { "name": "dc-api-dependencies", - "version": "2.9.3", + "version": "2.9.4", "description": "NUL Digital Collections API Dependencies", "repository": "https://github.com/nulib/dc-api-v2", "author": "nulib", diff --git a/api/package-lock.json b/api/package-lock.json index f569e39d..f3a0f7ef 100644 --- a/api/package-lock.json +++ b/api/package-lock.json @@ -1,12 +1,12 @@ { "name": "dc-api-build", - "version": "2.9.3", + "version": "2.9.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dc-api-build", - "version": "2.9.3", + "version": "2.9.4", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { diff --git a/api/package.json b/api/package.json index 0c19bedf..c522db18 100644 --- a/api/package.json +++ b/api/package.json @@ -1,6 +1,6 @@ { "name": "dc-api-build", - "version": "2.9.3", + "version": "2.9.4", "description": "NUL Digital Collections API Build Environment", "repository": "https://github.com/nulib/dc-api-v2", "author": "nulib", diff --git a/api/src/api/request/pipeline.js b/api/src/api/request/pipeline.js index 290eb3b2..87049232 100644 --- a/api/src/api/request/pipeline.js +++ b/api/src/api/request/pipeline.js @@ -1,22 +1,57 @@ const sortJson = require("sort-json"); const { defaultSearchSize } = require("../../environment"); -function filterFor(query, event) { - const matchTheQuery = query; - const beUnpublished = { term: { published: false } }; - const beRestricted = { term: { visibility: "Private" } }; +function filterFor(event) { + const publishedValues = event.userToken.can("read:Unpublished") + ? [true, false] + : [true]; + const userVisibility = new Set( + event.userToken.can("read:Private") + ? ["Private", "Institution", "Public"] + : ["Institution", "Public"] + ); + const requestVisibility = event?.queryStringParameters?.visibility + ?.split(",") + ?.map((v) => v[0].toUpperCase() + v.slice(1)) || [ + "Private", + "Institution", + "Public", + ]; + const visibilityValues = requestVisibility.filter((v) => + userVisibility.has(v) + ); - let filter = { must: [matchTheQuery], must_not: [] }; - - if (!event.userToken.can("read:Unpublished")) { - filter.must_not.push(beUnpublished); - } + return [ + { terms: { published: publishedValues } }, + { terms: { visibility: visibilityValues } }, + ]; +} - if (!event.userToken.can("read:Private")) { - filter.must_not.push(beRestricted); +function addFilter(query, filter) { + let result = { ...query }; + if (result.bool) { + result.bool.filter ||= []; + result.bool.filter.push(...filter); + } else if (result.neural) { + const boolFilter = { bool: { filter: filter } }; + if (result.neural.filter) { + boolFilter.bool.filter.push(result.neural.filter); + } + const neuralField = Object.keys(result.neural)[0]; + result.neural[neuralField].filter = boolFilter; + } else if (result.hybrid) { + result.hybrid.queries = result.hybrid.queries.map((subQuery) => + addFilter(subQuery, filter) + ); + } else { + result = { + bool: { + must: [result], + filter: filter, + }, + }; } - - return { bool: filter }; + return result; } module.exports = class RequestPipeline { @@ -33,23 +68,44 @@ module.exports = class RequestPipeline { // - Add `track_total_hits` to search context (so we can get accurate hits.total.value) authFilter(event) { - if (this.searchContext.query?.hybrid?.queries) { - this.searchContext.query = { - hybrid: { - queries: this.searchContext.query.hybrid.queries.map((query) => - filterFor(query, event) - ), - }, - }; - } else { - this.searchContext.query = filterFor(this.searchContext.query, event); - } + this.searchContext.query = addFilter( + this.searchContext.query, + filterFor(event) + ); this.searchContext.track_total_hits = true; + return this; + } + + addNeuralModelId() { + const neuralModelId = process.env.OPENSEARCH_MODEL_ID; + if (!neuralModelId) return this; + + const recursivelyAddNeuralModelId = (query) => { + if (Array.isArray(query)) { + for (const subQuery of query) { + recursivelyAddNeuralModelId(subQuery); + } + } + + if (typeof query !== "object" || query === null) return this; + + for (const key in query) { + if (key === "neural") { + const [field] = Object.keys(query.neural); + query.neural[field].model_id ||= neuralModelId; + } else { + recursivelyAddNeuralModelId(query[key]); + } + } + }; + + recursivelyAddNeuralModelId(this.searchContext.query); return this; } toJson() { + this.addNeuralModelId(); return JSON.stringify(sortJson(this.searchContext)); } }; diff --git a/api/src/handlers/get-collection-by-id.js b/api/src/handlers/get-collection-by-id.js index fb723b4b..6831e648 100644 --- a/api/src/handlers/get-collection-by-id.js +++ b/api/src/handlers/get-collection-by-id.js @@ -37,6 +37,7 @@ const getIiifCollectionById = async (event) => { return await doSearch(event, { includeToken: false, parameterOverrides, + defaultSort: [{ accession_number: "asc" }], }); }; diff --git a/api/src/handlers/search-runner.js b/api/src/handlers/search-runner.js index e21b7b17..dbdd1f3a 100644 --- a/api/src/handlers/search-runner.js +++ b/api/src/handlers/search-runner.js @@ -39,6 +39,10 @@ const doSearch = async (event, searchParams = {}) => { return invalidRequest(error.message); } + if (!searchContext.sort && searchParams.defaultSort) { + searchContext.sort = searchParams.defaultSort; + } + if (!validModels(models, format)) { return invalidRequest(`Invalid models requested: ${models}`); } diff --git a/api/src/package-lock.json b/api/src/package-lock.json index 8a7e118a..9905221c 100644 --- a/api/src/package-lock.json +++ b/api/src/package-lock.json @@ -1,12 +1,12 @@ { "name": "dc-api", - "version": "2.9.3", + "version": "2.9.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dc-api", - "version": "2.9.3", + "version": "2.9.4", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "^2.0.1", diff --git a/api/src/package.json b/api/src/package.json index 8f12c342..ac44cd51 100644 --- a/api/src/package.json +++ b/api/src/package.json @@ -1,6 +1,6 @@ { "name": "dc-api", - "version": "2.9.3", + "version": "2.9.4", "description": "NUL Digital Collections API", "repository": "https://github.com/nulib/dc-api-v2", "author": "nulib", diff --git a/api/test/integration/get-collection-by-id.test.js b/api/test/integration/get-collection-by-id.test.js index 73e811f6..915fb135 100644 --- a/api/test/integration/get-collection-by-id.test.js +++ b/api/test/integration/get-collection-by-id.test.js @@ -88,6 +88,7 @@ describe("Retrieve collection by id", () => { it("returns a single collection as a IIIF collection", async () => { const originalQuery = { query: { query_string: { query: "collection.id:1234" } }, + sort: [{ accession_number: "asc" }], }; const authQuery = new RequestPipeline(originalQuery) .authFilter(helpers.preprocess({})) @@ -141,6 +142,7 @@ describe("Retrieve collection by id", () => { const originalQuery = { query: { query_string: { query: "collection.id:1234" } }, + sort: [{ accession_number: "asc" }], }; let preProcessedEvent = helpers.preprocess(event); diff --git a/api/test/unit/api/request/pipeline.test.js b/api/test/unit/api/request/pipeline.test.js index 470678d1..701e51f3 100644 --- a/api/test/unit/api/request/pipeline.test.js +++ b/api/test/unit/api/request/pipeline.test.js @@ -36,9 +36,9 @@ describe("RequestPipeline", () => { expect(result.searchContext.query.bool.must).to.deep.include( requestBody.query ); - expect(result.searchContext.query.bool.must_not).to.deep.include( - { term: { visibility: "Private" } }, - { term: { published: false } } + expect(result.searchContext.query.bool.filter).to.deep.include( + { terms: { visibility: ["Institution", "Public"] } }, + { terms: { published: [true] } } ); }); @@ -56,9 +56,9 @@ describe("RequestPipeline", () => { expect(result.searchContext.query.bool.must).to.deep.include( requestBody.query ); - expect(result.searchContext.query.bool.must_not).to.deep.include( - { term: { visibility: "Private" } }, - { term: { published: false } } + expect(result.searchContext.query.bool.filter).to.deep.include( + { terms: { visibility: ["Institution", "Public"] } }, + { terms: { published: [true] } } ); }); @@ -71,11 +71,11 @@ describe("RequestPipeline", () => { expect(result.searchContext.query.bool.must).to.deep.include( requestBody.query ); - expect(result.searchContext.query.bool.must_not).to.deep.include({ - term: { published: false }, + expect(result.searchContext.query.bool.filter).to.deep.include({ + terms: { published: [true] }, }); - expect(result.searchContext.query.bool.must_not).not.to.deep.include({ - term: { visibility: "Private" }, + expect(result.searchContext.query.bool.filter).to.deep.include({ + terms: { visibility: ["Private", "Institution", "Public"] }, }); }); }); @@ -90,9 +90,9 @@ describe("RequestPipeline", () => { expect(result.searchContext.query.bool.must).to.deep.include( requestBody.query ); - expect(result.searchContext.query.bool.must_not).to.deep.include( - { term: { visibility: "Private" } }, - { term: { published: false } } + expect(result.searchContext.query.bool.filter).to.deep.include( + { terms: { visibility: ["Institution", "Public"] } }, + { terms: { published: [true] } } ); }); @@ -105,7 +105,12 @@ describe("RequestPipeline", () => { expect(result.searchContext.query.bool.must).to.deep.include( requestBody.query ); - expect(result.searchContext.query.bool.must_not).to.be.empty; + expect(result.searchContext.query.bool.filter).to.deep.include({ + terms: { published: [true, false] }, + }); + expect(result.searchContext.query.bool.filter).to.deep.include({ + terms: { visibility: ["Private", "Institution", "Public"] }, + }); }); }); @@ -140,15 +145,114 @@ describe("RequestPipeline", () => { }; pipeline = new RequestPipeline(requestBody); const result = pipeline.authFilter(event); - for (const i in requestBody.query.hybrid.queries) { - const originalQuery = requestBody.query.hybrid.queries[i]; - const newQuery = result.searchContext.query.hybrid.queries[i]; - expect(newQuery.bool.must).to.deep.include(originalQuery); - expect(newQuery.bool.must_not).to.deep.include( - { term: { visibility: "Private" } }, - { term: { published: false } } - ); + const [originalNeuralQuery, originalMatchQuery] = + requestBody.query.hybrid.queries; + const [newNeuralQuery, newMatchQuery] = + result.searchContext.query.hybrid.queries; + + expect(newNeuralQuery.neural.embedding).to.deep.include( + originalNeuralQuery.neural.embedding + ); + expect(newMatchQuery).to.deep.include(originalMatchQuery); + expect( + newNeuralQuery.neural.embedding.filter.bool.filter + ).to.deep.include( + { terms: { visibility: ["Institution", "Public"] } }, + { terms: { published: [true] } } + ); + expect(newMatchQuery.bool.filter).to.deep.include( + { terms: { visibility: ["Institution", "Public"] } }, + { terms: { published: [true] } } + ); + }); + }); + + describe("addNeuralModelId", () => { + let oldModelId; + beforeEach(() => { + oldModelId = process.env.OPENSEARCH_MODEL_ID; + process.env.OPENSEARCH_MODEL_ID = "MODEL_ID"; + requestBody.query = { + neural: { + embedding: { + query_text: + "Do you have any materials related to testing the request pipeline?", + k: 5, + }, + }, + }; + pipeline = new RequestPipeline(requestBody); + }); + + afterEach(() => { + if (oldModelId) { + process.env.OPENSEARCH_MODEL_ID = oldModelId; + } else { + delete process.env.OPENSEARCH_MODEL_ID; } + oldModelId = null; + }); + + it("does not modify the query if OPENSEARCH_MODEL_ID is not set", () => { + delete process.env.OPENSEARCH_MODEL_ID; + pipeline.addNeuralModelId(); + expect(pipeline.searchContext.query).to.deep.equal(requestBody.query); + }); + + it("does not modify the query if there are no neural queries", () => { + requestBody.query = { + term: { + all_titles: "request pipeline testing", + }, + }; + pipeline = new RequestPipeline(requestBody); + pipeline.addNeuralModelId(); + expect(pipeline.searchContext.query).to.deep.equal(requestBody.query); + }); + + it("does not modify the query if there is already a model_id", () => { + requestBody.query.neural.embedding.model_id = "EXISTING_MODEL_ID"; + pipeline = new RequestPipeline(requestBody); + pipeline.addNeuralModelId(); + expect(pipeline.searchContext.query.neural.embedding.model_id).to.eq( + "EXISTING_MODEL_ID" + ); + }); + + it("automatically adds the model_id to a neural query", () => { + pipeline.addNeuralModelId(); + expect(pipeline.searchContext.query.neural.embedding.model_id).to.eq( + "MODEL_ID" + ); + }); + + it("recursively adds the model_id to all neural queries in a hybrid query", () => { + event.userToken = new ApiToken(); + requestBody.query = { + hybrid: { + queries: [ + { + neural: { + embedding: { + query_text: + "Do you have any materials related to testing the request pipeline?", + k: 5, + }, + }, + }, + { + term: { + all_titles: "request pipeline testing", + }, + }, + ], + }, + }; + pipeline = new RequestPipeline(requestBody); + pipeline.addNeuralModelId(); + expect( + pipeline.searchContext.query.hybrid.queries[0].neural.embedding.model_id + ).to.eq("MODEL_ID"); }); }); }); diff --git a/av-download/lambdas/package-lock.json b/av-download/lambdas/package-lock.json index d5991bd9..d6758f8b 100644 --- a/av-download/lambdas/package-lock.json +++ b/av-download/lambdas/package-lock.json @@ -1,12 +1,12 @@ { "name": "lambdas", - "version": "2.9.3", + "version": "2.9.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "lambdas", - "version": "2.9.3", + "version": "2.9.4", "license": "Apache-2.0", "dependencies": { "fluent-ffmpeg": "2.1.3" diff --git a/av-download/lambdas/package.json b/av-download/lambdas/package.json index 06547e8c..ba1f3b89 100644 --- a/av-download/lambdas/package.json +++ b/av-download/lambdas/package.json @@ -1,6 +1,6 @@ { "name": "lambdas", - "version": "2.9.3", + "version": "2.9.4", "description": "Non-API handler lambdas", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" diff --git a/chat/pyproject.toml b/chat/pyproject.toml index 8d2d6221..81391689 100644 --- a/chat/pyproject.toml +++ b/chat/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dc-api-v2-chat" -version = "2.9.3" +version = "2.9.4" requires-python = ">=3.12" dependencies = [ "boto3~=1.34", diff --git a/chat/uv.lock b/chat/uv.lock index 37a06992..7528a0c4 100644 --- a/chat/uv.lock +++ b/chat/uv.lock @@ -302,7 +302,7 @@ wheels = [ [[package]] name = "dc-api-v2-chat" -version = "2.9.3" +version = "2.9.4" source = { virtual = "." } dependencies = [ { name = "boto3" }, diff --git a/docs/docs/spec/openapi.yaml b/docs/docs/spec/openapi.yaml index 53303c2e..8d47ca22 100644 --- a/docs/docs/spec/openapi.yaml +++ b/docs/docs/spec/openapi.yaml @@ -36,7 +36,7 @@ components: bearerAuth: type: http scheme: bearer - bearerFormat: JWT + bearerFormat: JWT security: - anonymous: [] - bearerAuth: [] @@ -279,6 +279,7 @@ paths: - $ref: "./types.yaml#/components/parameters/size" - $ref: "./types.yaml#/components/parameters/sort" - $ref: "./types.yaml#/components/parameters/as" + - $ref: "./types.yaml#/components/parameters/visibility" responses: 200: $ref: "./types.yaml#/components/responses/SearchResponse" @@ -286,12 +287,17 @@ paths: operationId: postSearch tags: - Search + parameters: + - $ref: "./types.yaml#/components/parameters/page" + - $ref: "./types.yaml#/components/parameters/size" + - $ref: "./types.yaml#/components/parameters/sort" + - $ref: "./types.yaml#/components/parameters/as" + - $ref: "./types.yaml#/components/parameters/visibility" requestBody: content: application/json: schema: type: object - responses: 200: $ref: "./types.yaml#/components/responses/SearchResponse" @@ -308,6 +314,7 @@ paths: - $ref: "./types.yaml#/components/parameters/size" - $ref: "./types.yaml#/components/parameters/sort" - $ref: "./types.yaml#/components/parameters/as" + - $ref: "./types.yaml#/components/parameters/visibility" responses: 200: $ref: "./types.yaml#/components/responses/SearchResponse" @@ -321,6 +328,12 @@ paths: - $ref: "./types.yaml#/components/parameters/size" - $ref: "./types.yaml#/components/parameters/sort" - $ref: "./types.yaml#/components/parameters/as" + - $ref: "./types.yaml#/components/parameters/visibility" + requestBody: + content: + application/json: + schema: + type: object responses: 200: $ref: "./types.yaml#/components/responses/SearchResponse" diff --git a/docs/docs/spec/types.yaml b/docs/docs/spec/types.yaml index f8e51a7f..95edac92 100644 --- a/docs/docs/spec/types.yaml +++ b/docs/docs/spec/types.yaml @@ -13,7 +13,7 @@ components: enum: - collections - file-sets - - Works + - works style: simple id: name: id @@ -103,6 +103,20 @@ components: type: integer minimum: 1 maximum: 300 + visibility: + name: visibility + in: query + required: false + description: Filter search results by visibility + explode: false + schema: + type: array + items: + type: string + enum: + - public + - institution + - private responses: DocumentResponse: description: A single document response diff --git a/docs/pyproject.toml b/docs/pyproject.toml index 8279a4d8..0e372321 100644 --- a/docs/pyproject.toml +++ b/docs/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dc-api-v2-docs" -version = "2.9.3" +version = "2.9.4" requires-python = ">=3.12" dependencies = [ "mkdocs>=1.1.2,<2.0.0", diff --git a/docs/uv.lock b/docs/uv.lock index 66ff2121..789d9a23 100644 --- a/docs/uv.lock +++ b/docs/uv.lock @@ -123,7 +123,7 @@ wheels = [ [[package]] name = "dc-api-v2-docs" -version = "2.9.3" +version = "2.9.4" source = { virtual = "." } dependencies = [ { name = "diagrams" },