Skip to content

Commit fa23c88

Browse files
JadeCaraJade Wibbelsgreptile-apps[bot]
committed
[ENG-2085] Optimized queries for csv download and search (#7014)
Co-authored-by: Jade Wibbels <[email protected]> Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
1 parent b5e3f73 commit fa23c88

File tree

4 files changed

+327
-30
lines changed

4 files changed

+327
-30
lines changed

src/fides/api/api/v1/endpoints/privacy_request_endpoints.py

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from pydantic import Field
3030
from pydantic import ValidationError as PydanticValidationError
3131
from sqlalchemy import cast, column, null, or_, select
32-
from sqlalchemy.orm import Query, Session
32+
from sqlalchemy.orm import Query, Session, selectinload
3333
from sqlalchemy.sql.expression import nullslast
3434
from starlette.responses import StreamingResponse
3535
from starlette.status import (
@@ -88,6 +88,7 @@
8888
from fides.api.schemas.external_https import PrivacyRequestResumeFormat
8989
from fides.api.schemas.policy import ActionType, CurrentStep
9090
from fides.api.schemas.privacy_request import (
91+
BULK_PRIVACY_REQUEST_BATCH_SIZE,
9192
BulkPostPrivacyRequests,
9293
BulkReviewResponse,
9394
BulkSoftDeletePrivacyRequests,
@@ -225,7 +226,9 @@ def create_privacy_request(
225226
privacy_request_service: PrivacyRequestService = Depends(
226227
get_privacy_request_service
227228
),
228-
data: Annotated[List[PrivacyRequestCreate], Field(max_length=50)], # type: ignore
229+
data: Annotated[
230+
List[PrivacyRequestCreate], Field(max_length=BULK_PRIVACY_REQUEST_BATCH_SIZE)
231+
], # type: ignore
229232
) -> BulkPostPrivacyRequests:
230233
"""
231234
Given a list of privacy request data elements, create corresponding PrivacyRequest objects
@@ -251,7 +254,9 @@ def create_privacy_request_authenticated(
251254
verify_oauth_client,
252255
scopes=[PRIVACY_REQUEST_CREATE],
253256
),
254-
data: Annotated[List[PrivacyRequestCreate], Field(max_length=50)], # type: ignore
257+
data: Annotated[
258+
List[PrivacyRequestCreate], Field(max_length=BULK_PRIVACY_REQUEST_BATCH_SIZE)
259+
], # type: ignore
255260
) -> BulkPostPrivacyRequests:
256261
"""
257262
Given a list of privacy request data elements, create corresponding PrivacyRequest objects
@@ -272,7 +277,10 @@ def privacy_request_csv_download(
272277
f = io.StringIO()
273278
csv_file = csv.writer(f)
274279

275-
privacy_request_ids: List[str] = [r.id for r in privacy_request_query]
280+
# Execute query once and convert to list to avoid multiple iterations
281+
privacy_requests: List[PrivacyRequest] = privacy_request_query.all()
282+
privacy_request_ids: List[str] = [r.id for r in privacy_requests]
283+
276284
denial_audit_log_query: Query = db.query(AuditLog).filter(
277285
AuditLog.action == AuditLogAction.denied,
278286
AuditLog.privacy_request_id.in_(privacy_request_ids),
@@ -281,7 +289,7 @@ def privacy_request_csv_download(
281289
r.privacy_request_id: r.message for r in denial_audit_log_query
282290
}
283291

284-
identity_columns, custom_field_columns = get_variable_columns(privacy_request_query)
292+
identity_columns, custom_field_columns = get_variable_columns(privacy_requests)
285293

286294
csv_file.writerow(
287295
[
@@ -301,7 +309,7 @@ def privacy_request_csv_download(
301309
)
302310

303311
pr: PrivacyRequest
304-
for pr in privacy_request_query:
312+
for pr in privacy_requests:
305313
denial_reason = (
306314
denial_audit_logs[pr.id]
307315
if pr.status == PrivacyRequestStatus.denied and pr.id in denial_audit_logs
@@ -346,12 +354,12 @@ def privacy_request_csv_download(
346354

347355

348356
def get_variable_columns(
349-
privacy_request_query: Query,
357+
privacy_requests: List[PrivacyRequest],
350358
) -> tuple[List[str], Dict[str, str]]:
351359
identity_columns: Set[str] = set()
352360
custom_field_columns: Dict[str, str] = {}
353361

354-
for pr in privacy_request_query:
362+
for pr in privacy_requests:
355363
identity_columns.update(
356364
extract_identity_column_names(pr.get_persisted_identity().model_dump())
357365
)
@@ -848,10 +856,29 @@ def _shared_privacy_request_search(
848856

849857
if download_csv:
850858
_validate_result_size(query)
859+
# Eager load relationships needed for CSV export to avoid N+1 queries
860+
query = query.options(
861+
selectinload(PrivacyRequest.provided_identities), # type: ignore[attr-defined]
862+
selectinload(PrivacyRequest.custom_fields), # type: ignore[attr-defined]
863+
selectinload(PrivacyRequest.policy).selectinload(Policy.rules), # type: ignore[attr-defined]
864+
)
851865
# Returning here if download_csv param was specified
852866
logger.info("Downloading privacy requests as csv")
853867
return privacy_request_csv_download(db, query)
854868

869+
# Eager load relationships for regular list view to avoid N+1 queries
870+
if include_identities or include_custom_privacy_request_fields:
871+
eager_load_options = []
872+
if include_identities:
873+
eager_load_options.append(
874+
selectinload(PrivacyRequest.provided_identities) # type: ignore[attr-defined]
875+
)
876+
if include_custom_privacy_request_fields:
877+
eager_load_options.append(
878+
selectinload(PrivacyRequest.custom_fields) # type: ignore[attr-defined]
879+
)
880+
query = query.options(*eager_load_options)
881+
855882
# Conditionally embed execution log details in the response.
856883
if verbose:
857884
logger.info("Finding execution and audit log details")
@@ -1288,19 +1315,26 @@ def validate_manual_input(
12881315
],
12891316
)
12901317
def bulk_restart_privacy_request_from_failure(
1291-
privacy_request_ids: List[str],
1318+
privacy_request_ids: Annotated[
1319+
List[str], Field(max_length=BULK_PRIVACY_REQUEST_BATCH_SIZE)
1320+
],
12921321
*,
12931322
db: Session = Depends(deps.get_db),
12941323
) -> BulkPostPrivacyRequests:
1295-
"""Bulk restart a of privacy request from failure."""
1324+
"""Bulk restart privacy requests from failure."""
12961325
succeeded: List[PrivacyRequestResponse] = []
12971326
failed: List[Dict[str, Any]] = []
1327+
1328+
# Fetch all privacy requests in one query to avoid N+1
1329+
privacy_requests_dict = {
1330+
pr.id: pr
1331+
for pr in PrivacyRequest.query_without_large_columns(db)
1332+
.filter(PrivacyRequest.id.in_(privacy_request_ids))
1333+
.all()
1334+
}
1335+
12981336
for privacy_request_id in privacy_request_ids:
1299-
privacy_request = (
1300-
PrivacyRequest.query_without_large_columns(db)
1301-
.filter(PrivacyRequest.id == privacy_request_id)
1302-
.first()
1303-
)
1337+
privacy_request = privacy_requests_dict.get(privacy_request_id)
13041338

13051339
if not privacy_request:
13061340
failed.append(
@@ -2220,12 +2254,18 @@ def bulk_soft_delete_privacy_requests(
22202254
if client.id == CONFIG.security.oauth_root_client_id:
22212255
user_id = "root"
22222256

2223-
for privacy_request_id in privacy_requests.request_ids:
2224-
privacy_request = (
2225-
PrivacyRequest.query_without_large_columns(db)
2226-
.filter(PrivacyRequest.id == privacy_request_id)
2227-
.first()
2228-
)
2257+
request_ids = privacy_requests.request_ids
2258+
2259+
# Fetch all privacy requests in one query to avoid N+1
2260+
privacy_requests_dict = {
2261+
pr.id: pr
2262+
for pr in PrivacyRequest.query_without_large_columns(db)
2263+
.filter(PrivacyRequest.id.in_(request_ids))
2264+
.all()
2265+
}
2266+
2267+
for privacy_request_id in request_ids:
2268+
privacy_request = privacy_requests_dict.get(privacy_request_id)
22292269

22302270
if not privacy_request:
22312271
failed.append(

src/fides/api/schemas/privacy_request.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -379,10 +379,15 @@ class PrivacyRequestVerboseResponse(PrivacyRequestResponse):
379379
model_config = ConfigDict(populate_by_name=True)
380380

381381

382+
# Batch size for bulk privacy request operations. Used for request validation and
383+
# automatic batching to avoid memory issues with large request lists.
384+
BULK_PRIVACY_REQUEST_BATCH_SIZE = 50
385+
386+
382387
class ReviewPrivacyRequestIds(FidesSchema):
383388
"""Pass in a list of privacy request ids"""
384389

385-
request_ids: List[str] = Field(..., max_length=50)
390+
request_ids: List[str] = Field(..., max_length=BULK_PRIVACY_REQUEST_BATCH_SIZE)
386391

387392

388393
class DenyPrivacyRequests(ReviewPrivacyRequestIds):

src/fides/service/privacy_request/privacy_request_service.py

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
# pylint: disable=too-many-lines
12
from datetime import datetime
23
from typing import Any, Dict, List, Optional
34

45
from loguru import logger
5-
from sqlalchemy.orm import Session
6+
from sqlalchemy.orm import Session, selectinload
67

78
from fides.api.common_exceptions import (
89
FidesopsException,
@@ -33,6 +34,7 @@
3334
PrivacyCenterConfig as PrivacyCenterConfigSchema,
3435
)
3536
from fides.api.schemas.privacy_request import (
37+
BULK_PRIVACY_REQUEST_BATCH_SIZE,
3638
BulkPostPrivacyRequests,
3739
BulkReviewResponse,
3840
CheckpointActionRequired,
@@ -82,6 +84,7 @@ def get_privacy_request(self, privacy_request_id: str) -> Optional[PrivacyReques
8284

8385
def _validate_privacy_request_for_bulk_operation(
8486
self,
87+
privacy_request: Optional[PrivacyRequest],
8588
request_id: str,
8689
) -> PrivacyRequest:
8790
"""
@@ -90,7 +93,6 @@ def _validate_privacy_request_for_bulk_operation(
9093
9194
Returns the validated privacy request.
9295
"""
93-
privacy_request = self.get_privacy_request(request_id)
9496
if not privacy_request:
9597
raise PrivacyRequestError(
9698
f"No privacy request found with id '{request_id}'",
@@ -103,9 +105,20 @@ def _validate_privacy_request_for_bulk_operation(
103105
mode="json"
104106
),
105107
)
106-
107108
return privacy_request
108109

110+
def _fetch_privacy_requests_for_bulk_operation(
111+
self, request_ids: List[str]
112+
) -> Dict[str, PrivacyRequest]:
113+
"""Fetch privacy requests in a single query to avoid N+1. Eager loads custom_fields."""
114+
privacy_requests = (
115+
PrivacyRequest.query_without_large_columns(self.db)
116+
.options(selectinload(PrivacyRequest.custom_fields)) # type: ignore[attr-defined]
117+
.filter(PrivacyRequest.id.in_(request_ids))
118+
.all()
119+
)
120+
return {pr.id: pr for pr in privacy_requests}
121+
109122
def _validate_required_location_fields(
110123
self, privacy_request_data: PrivacyRequestCreate
111124
) -> None:
@@ -521,13 +534,26 @@ def approve_privacy_requests(
521534
reviewed_by: Optional[str] = None,
522535
suppress_notification: Optional[bool] = False,
523536
) -> BulkReviewResponse:
537+
"""Approve privacy requests."""
538+
if len(request_ids) > BULK_PRIVACY_REQUEST_BATCH_SIZE:
539+
raise ValueError(
540+
f"Bulk operations are limited to {BULK_PRIVACY_REQUEST_BATCH_SIZE} requests. "
541+
f"Received {len(request_ids)}. Batch requests at the caller level."
542+
)
543+
524544
succeeded: List[PrivacyRequest] = []
525545
failed: List[BulkUpdateFailed] = []
526546

547+
# Fetch all privacy requests in one query to avoid N+1
548+
privacy_requests_dict = self._fetch_privacy_requests_for_bulk_operation(
549+
request_ids
550+
)
551+
527552
for request_id in request_ids:
553+
privacy_request = privacy_requests_dict.get(request_id)
528554
try:
529555
privacy_request = self._validate_privacy_request_for_bulk_operation(
530-
request_id
556+
privacy_request, request_id
531557
)
532558
except PrivacyRequestError as exc:
533559
failed.append(BulkUpdateFailed(message=exc.message, data=exc.data))
@@ -596,13 +622,26 @@ def deny_privacy_requests(
596622
*,
597623
user_id: Optional[str] = None,
598624
) -> BulkReviewResponse:
625+
"""Deny privacy requests."""
626+
if len(request_ids) > BULK_PRIVACY_REQUEST_BATCH_SIZE:
627+
raise ValueError(
628+
f"Bulk operations are limited to {BULK_PRIVACY_REQUEST_BATCH_SIZE} requests. "
629+
f"Received {len(request_ids)}. Batch requests at the caller level."
630+
)
631+
599632
succeeded: List[PrivacyRequest] = []
600633
failed: List[BulkUpdateFailed] = []
601634

635+
# Fetch all privacy requests in one query to avoid N+1
636+
privacy_requests_dict = self._fetch_privacy_requests_for_bulk_operation(
637+
request_ids
638+
)
639+
602640
for request_id in request_ids:
641+
privacy_request = privacy_requests_dict.get(request_id)
603642
try:
604643
privacy_request = self._validate_privacy_request_for_bulk_operation(
605-
request_id
644+
privacy_request, request_id
606645
)
607646
except PrivacyRequestError as exc:
608647
failed.append(BulkUpdateFailed(message=exc.message, data=exc.data))
@@ -661,6 +700,12 @@ def cancel_privacy_requests(
661700
user_id: Optional[str] = None,
662701
) -> BulkReviewResponse:
663702
"""Cancel a list of privacy requests and/or report failure"""
703+
if len(request_ids) > BULK_PRIVACY_REQUEST_BATCH_SIZE:
704+
raise ValueError(
705+
f"Bulk operations are limited to {BULK_PRIVACY_REQUEST_BATCH_SIZE} requests. "
706+
f"Received {len(request_ids)}. Batch requests at the caller level."
707+
)
708+
664709
succeeded: List[PrivacyRequest] = []
665710
failed: List[BulkUpdateFailed] = []
666711

@@ -672,10 +717,16 @@ def cancel_privacy_requests(
672717
PrivacyRequestStatus.error,
673718
]
674719

720+
# Fetch all privacy requests in one query to avoid N+1
721+
privacy_requests_dict = self._fetch_privacy_requests_for_bulk_operation(
722+
request_ids
723+
)
724+
675725
for request_id in request_ids:
726+
privacy_request = privacy_requests_dict.get(request_id)
676727
try:
677728
privacy_request = self._validate_privacy_request_for_bulk_operation(
678-
request_id
729+
privacy_request, request_id
679730
)
680731
except PrivacyRequestError as exc:
681732
failed.append(BulkUpdateFailed(message=exc.message, data=exc.data))
@@ -716,14 +767,26 @@ def finalize_privacy_requests(
716767
*,
717768
user_id: Optional[str] = None,
718769
) -> BulkReviewResponse:
719-
"""Bulk finalize privacy requests that are in requires_manual_finalization status"""
770+
"""Finalize privacy requests."""
771+
if len(request_ids) > BULK_PRIVACY_REQUEST_BATCH_SIZE:
772+
raise ValueError(
773+
f"Bulk operations are limited to {BULK_PRIVACY_REQUEST_BATCH_SIZE} requests. "
774+
f"Received {len(request_ids)}. Batch requests at the caller level."
775+
)
776+
720777
succeeded: List[PrivacyRequest] = []
721778
failed: List[BulkUpdateFailed] = []
722779

780+
# Fetch all privacy requests in one query to avoid N+1
781+
privacy_requests_dict = self._fetch_privacy_requests_for_bulk_operation(
782+
request_ids
783+
)
784+
723785
for request_id in request_ids:
786+
privacy_request = privacy_requests_dict.get(request_id)
724787
try:
725788
privacy_request = self._validate_privacy_request_for_bulk_operation(
726-
request_id
789+
privacy_request, request_id
727790
)
728791
except PrivacyRequestError as exc:
729792
failed.append(BulkUpdateFailed(message=exc.message, data=exc.data))

0 commit comments

Comments
 (0)