Skip to content

Commit 24a7ed2

Browse files
authored
Add custom SOQL condition handling (#16)
1 parent c78092f commit 24a7ed2

File tree

5 files changed

+150
-30
lines changed

5 files changed

+150
-30
lines changed

config.example.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,14 @@ objects:
7373
# Check project readme file for more details.
7474
dir_name_field: LinkedEntity.Username
7575

76+
# Purpose: Use value of this field as an extra condition in SOQL query when fetching objects
77+
# Required: NO
78+
# Default: None
79+
# Notes: In contrary to `modified_date_*`, it filters objects and not files attached to them.
80+
# It can filter objects by any field of the object that is allowed by Salesforce.
81+
extra_soql_condition: "MyCustomField__c = 'MyValue'"
82+
83+
7684
# Example 1: Process ContentDocument objects attached to `Account` object, older than datetime
7785
Account:
7886
modified_date_lt: 2023-08-01T00:00:00Z

src/salesforce_archivist/archivist.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class ArchivistObject(BaseModel):
2525
modified_date_lt: Optional[datetime.datetime] = None
2626
modified_date_gt: Optional[datetime.datetime] = None
2727
dir_name_field: Optional[str] = None
28+
extra_soql_condition: Optional[str] = None
2829

2930
def __eq__(self, other: Any) -> bool:
3031
if not isinstance(other, type(self)):

src/salesforce_archivist/salesforce/salesforce.py

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -52,35 +52,36 @@ def _get_content_document_list_query(self) -> str:
5252
select_list = ["LinkedEntityId", "ContentDocumentId", "LinkedEntity.Type"]
5353
if self._archivist_obj.dir_name_field is not None and self._archivist_obj.dir_name_field not in select_list:
5454
select_list.append(self._archivist_obj.dir_name_field)
55-
where_conditions = []
55+
where = []
56+
linked_id_where = "LinkedEntityId IN (SELECT Id FROM {obj_type})".format(obj_type=self._archivist_obj.obj_type)
57+
if self._archivist_obj.extra_soql_condition is not None:
58+
linked_id_where = "LinkedEntityId IN (SELECT Id FROM {obj_type} WHERE {where})".format(
59+
obj_type=self._archivist_obj.obj_type, where=self._archivist_obj.extra_soql_condition
60+
)
61+
where.append(linked_id_where)
62+
doc_id_where = []
5663
if self._archivist_obj.modified_date_lt is not None:
57-
where_conditions.append(
64+
doc_id_where.append(
5865
"ContentDocument.ContentModifiedDate < {date}".format(
5966
date=self._archivist_obj.modified_date_lt.strftime("%Y-%m-%dT%H:%M:%SZ")
6067
)
6168
)
6269
if self._archivist_obj.modified_date_gt is not None:
63-
where_conditions.append(
70+
doc_id_where.append(
6471
"ContentDocument.ContentModifiedDate > {date}".format(
6572
date=self._archivist_obj.modified_date_gt.strftime("%Y-%m-%dT%H:%M:%SZ")
6673
)
6774
)
68-
where = ""
69-
if len(where_conditions):
70-
where = "WHERE {}".format(" AND ".join(where_conditions))
71-
72-
# Using WHERE IN and not using filter on `LinkedEntity.Type` is done because of SF restrictions like:
73-
#
74-
# Implementation restriction: ContentDocumentLink requires a filter by a single ID on ContentDocumentId
75-
# or LinkedEntityId using the equals operator or multiple ID's using the IN operator.
76-
#
77-
# Implementation restriction: filtering on non-id fields is only permitted when filtering
78-
# by ContentDocumentLink.LinkedEntityId using the equal operator.
75+
if len(doc_id_where):
76+
where.append(
77+
"ContentDocumentId IN (SELECT Id FROM ContentDocument WHERE {where})".format(
78+
where=" AND ".join(doc_id_where)
79+
)
80+
)
7981

80-
return (
81-
"SELECT {fields} FROM ContentDocumentLink "
82-
"WHERE ContentDocumentId IN (SELECT Id FROM ContentDocument {where})"
83-
).format(fields=", ".join(select_list), where=where)
82+
return ("SELECT {fields} " "FROM ContentDocumentLink " "WHERE {where}").format(
83+
fields=", ".join(select_list), where=" AND ".join(where)
84+
)
8485

8586
def download_content_document_link_list(
8687
self,
@@ -187,6 +188,8 @@ def _get_attachment_list_query(self) -> str:
187188
date=self._archivist_obj.modified_date_gt.strftime("%Y-%m-%dT%H:%M:%SZ")
188189
)
189190
)
191+
if self._archivist_obj.extra_soql_condition is not None:
192+
where_conditions.append(self._archivist_obj.extra_soql_condition)
190193
where = ""
191194
if len(where_conditions):
192195
where = "WHERE {}".format(" AND ".join(where_conditions))

test/salesforce/test_salesforce.py

Lines changed: 94 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818

1919

2020
@pytest.mark.parametrize(
21-
"modified_date_lt, modified_date_gt, dir_name_field, expected_query",
21+
"modified_date_lt, modified_date_gt, dir_name_field, extra_soql_condition, expected_query",
2222
[
2323
(
2424
None,
2525
None,
2626
None,
27-
"SELECT LinkedEntityId, ContentDocumentId, LinkedEntity.Type FROM ContentDocumentLink WHERE ContentDocumentId IN (SELECT Id FROM ContentDocument )",
27+
None,
28+
"SELECT LinkedEntityId, ContentDocumentId, LinkedEntity.Type FROM ContentDocumentLink "
29+
"WHERE LinkedEntityId IN (SELECT Id FROM User)",
2830
),
2931
(
3032
datetime(
@@ -39,10 +41,14 @@
3941
),
4042
None,
4143
None,
44+
None,
4245
(
4346
"SELECT LinkedEntityId, ContentDocumentId, LinkedEntity.Type "
4447
"FROM ContentDocumentLink "
45-
"WHERE ContentDocumentId IN (SELECT Id FROM ContentDocument WHERE ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z)"
48+
"WHERE LinkedEntityId IN (SELECT Id FROM User) "
49+
"AND ContentDocumentId IN ("
50+
"SELECT Id FROM ContentDocument WHERE ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z"
51+
")"
4652
),
4753
),
4854
(
@@ -67,11 +73,51 @@
6773
tzinfo=timezone.utc,
6874
),
6975
None,
76+
None,
7077
(
7178
"SELECT LinkedEntityId, ContentDocumentId, LinkedEntity.Type "
7279
"FROM ContentDocumentLink "
73-
"WHERE ContentDocumentId IN ("
74-
"SELECT Id FROM ContentDocument WHERE ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z AND ContentDocument.ContentModifiedDate > 2023-01-01T00:00:00Z"
80+
"WHERE LinkedEntityId IN (SELECT Id FROM User) "
81+
"AND ContentDocumentId IN ("
82+
"SELECT Id "
83+
"FROM ContentDocument "
84+
"WHERE ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z "
85+
"AND ContentDocument.ContentModifiedDate > 2023-01-01T00:00:00Z"
86+
")"
87+
),
88+
),
89+
(
90+
datetime(
91+
year=2024,
92+
month=1,
93+
day=1,
94+
hour=0,
95+
minute=0,
96+
second=0,
97+
microsecond=0,
98+
tzinfo=timezone.utc,
99+
),
100+
datetime(
101+
year=2023,
102+
month=1,
103+
day=1,
104+
hour=0,
105+
minute=0,
106+
second=0,
107+
microsecond=0,
108+
tzinfo=timezone.utc,
109+
),
110+
"DirField",
111+
None,
112+
(
113+
"SELECT LinkedEntityId, ContentDocumentId, LinkedEntity.Type, DirField "
114+
"FROM ContentDocumentLink "
115+
"WHERE LinkedEntityId IN (SELECT Id FROM User) "
116+
"AND ContentDocumentId IN ("
117+
"SELECT Id "
118+
"FROM ContentDocument "
119+
"WHERE ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z "
120+
"AND ContentDocument.ContentModifiedDate > 2023-01-01T00:00:00Z"
75121
")"
76122
),
77123
),
@@ -97,11 +143,16 @@
97143
tzinfo=timezone.utc,
98144
),
99145
"DirField",
146+
"MyCustomField__c = 'MyValue'",
100147
(
101148
"SELECT LinkedEntityId, ContentDocumentId, LinkedEntity.Type, DirField "
102149
"FROM ContentDocumentLink "
103-
"WHERE ContentDocumentId IN ("
104-
"SELECT Id FROM ContentDocument WHERE ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z AND ContentDocument.ContentModifiedDate > 2023-01-01T00:00:00Z"
150+
"WHERE LinkedEntityId IN (SELECT Id FROM User WHERE MyCustomField__c = 'MyValue') "
151+
"AND ContentDocumentId IN ("
152+
"SELECT Id "
153+
"FROM ContentDocument "
154+
"WHERE ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z "
155+
"AND ContentDocument.ContentModifiedDate > 2023-01-01T00:00:00Z"
105156
")"
106157
),
107158
),
@@ -111,6 +162,7 @@ def test_download_content_document_link_list_queries(
111162
modified_date_lt: datetime | None,
112163
modified_date_gt: datetime | None,
113164
dir_name_field: str | None,
165+
extra_soql_condition: str | None,
114166
expected_query: str,
115167
):
116168
client = Mock()
@@ -123,6 +175,7 @@ def test_download_content_document_link_list_queries(
123175
modified_date_lt=modified_date_lt,
124176
modified_date_gt=modified_date_gt,
125177
dir_name_field=dir_name_field,
178+
extra_soql_condition=extra_soql_condition,
126179
)
127180
salesforce = Salesforce(archivist_obj=archivist_obj, client=client, max_api_usage_percent=50)
128181
salesforce.download_content_document_link_list(
@@ -136,9 +189,10 @@ def test_download_content_document_link_list_queries(
136189

137190

138191
@pytest.mark.parametrize(
139-
"modified_date_lt, modified_date_gt, expected_query",
192+
"modified_date_lt, modified_date_gt, extra_soql_condition, expected_query",
140193
[
141194
(
195+
None,
142196
None,
143197
None,
144198
"SELECT Id, ParentId, BodyLength, Name FROM Attachment",
@@ -155,6 +209,7 @@ def test_download_content_document_link_list_queries(
155209
tzinfo=timezone.utc,
156210
),
157211
None,
212+
None,
158213
(
159214
"SELECT Id, ParentId, BodyLength, Name "
160215
"FROM Attachment "
@@ -182,17 +237,47 @@ def test_download_content_document_link_list_queries(
182237
microsecond=0,
183238
tzinfo=timezone.utc,
184239
),
240+
None,
185241
(
186242
"SELECT Id, ParentId, BodyLength, Name "
187243
"FROM Attachment "
188244
"WHERE LastModifiedDate < 2024-01-01T00:00:00Z AND LastModifiedDate > 2023-01-01T00:00:00Z"
189245
),
190246
),
247+
(
248+
datetime(
249+
year=2024,
250+
month=1,
251+
day=1,
252+
hour=0,
253+
minute=0,
254+
second=0,
255+
microsecond=0,
256+
tzinfo=timezone.utc,
257+
),
258+
datetime(
259+
year=2023,
260+
month=1,
261+
day=1,
262+
hour=0,
263+
minute=0,
264+
second=0,
265+
microsecond=0,
266+
tzinfo=timezone.utc,
267+
),
268+
"MyCustomField__c = 'MyValue'",
269+
(
270+
"SELECT Id, ParentId, BodyLength, Name "
271+
"FROM Attachment "
272+
"WHERE LastModifiedDate < 2024-01-01T00:00:00Z AND LastModifiedDate > 2023-01-01T00:00:00Z AND MyCustomField__c = 'MyValue'"
273+
),
274+
),
191275
],
192276
)
193277
def test_download_attachment_list_queries(
194278
modified_date_lt: datetime | None,
195279
modified_date_gt: datetime | None,
280+
extra_soql_condition: str | None,
196281
expected_query: str,
197282
):
198283
client = Mock()
@@ -204,6 +289,7 @@ def test_download_attachment_list_queries(
204289
obj_type="Attachment",
205290
modified_date_lt=modified_date_lt,
206291
modified_date_gt=modified_date_gt,
292+
extra_soql_condition=extra_soql_condition,
207293
)
208294
salesforce = Salesforce(archivist_obj=archivist_obj, client=client, max_api_usage_percent=50)
209295
salesforce.download_attachment_list(

0 commit comments

Comments
 (0)