Skip to content

Commit aeb6185

Browse files
authored
Fix issues with SF ContentDocument restrictions (#13)
* Fix issues with SF ContentDocument restrictions Workaround which fixes issues with SF malformed queries which worked previously. Errors that were occurring: > Implementation restriction: ContentDocumentLink requires a filter by a single Id on ContentDocumentId or LinkedEntityId using the equals operator or multiple Id's using the IN operator. > Implementation restriction: filtering on non-id fields is only permitted when filtering by ContentDocumentLink.LinkedEntityId using the equals operator. * Update tests for salesforce.py
1 parent 1c07e48 commit aeb6185

File tree

2 files changed

+50
-35
lines changed

2 files changed

+50
-35
lines changed

src/salesforce_archivist/salesforce/salesforce.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,25 +47,37 @@ def _init_tmp_dir(self) -> str:
4747
return tmp_dir
4848

4949
def _get_content_document_list_query(self) -> str:
50-
select_list = ["LinkedEntityId", "ContentDocumentId"]
50+
select_list = ["LinkedEntityId", "ContentDocumentId", "LinkedEntity.Type"]
5151
if self._archivist_obj.dir_name_field is not None and self._archivist_obj.dir_name_field not in select_list:
5252
select_list.append(self._archivist_obj.dir_name_field)
53-
where_list = ["LinkedEntity.Type = '{obj_type}'".format(obj_type=self._archivist_obj.obj_type)]
53+
where_conditions = []
5454
if self._archivist_obj.modified_date_lt is not None:
55-
where_list.append(
55+
where_conditions.append(
5656
"ContentDocument.ContentModifiedDate < {date}".format(
5757
date=self._archivist_obj.modified_date_lt.strftime("%Y-%m-%dT%H:%M:%SZ")
5858
)
5959
)
6060
if self._archivist_obj.modified_date_gt is not None:
61-
where_list.append(
61+
where_conditions.append(
6262
"ContentDocument.ContentModifiedDate > {date}".format(
6363
date=self._archivist_obj.modified_date_gt.strftime("%Y-%m-%dT%H:%M:%SZ")
6464
)
6565
)
66-
return "SELECT {fields} FROM ContentDocumentLink WHERE {where}".format(
67-
fields=", ".join(select_list), where=" AND ".join(where_list)
68-
)
66+
where = ""
67+
if len(where_conditions):
68+
where = "WHERE {}".format(" AND ".join(where_conditions))
69+
# Using WHERE IN and not using filter on `LinkedEntity.Type` is done because of SF restrictions like:
70+
#
71+
# Implementation restriction: ContentDocumentLink requires a filter by a single Id on ContentDocumentId
72+
# or LinkedEntityId using the equals operator or multiple Id's using the IN operator.
73+
#
74+
# Implementation restriction: filtering on non-id fields is only permitted when filtering
75+
# by ContentDocumentLink.LinkedEntityId using the equals operator.
76+
77+
return (
78+
"SELECT {fields} FROM ContentDocumentLink "
79+
"WHERE ContentDocumentId IN (SELECT Id FROM ContentDocument {where})"
80+
).format(fields=", ".join(select_list), where=where)
6981

7082
def download_content_document_link_list(
7183
self,
@@ -81,10 +93,14 @@ def download_content_document_link_list(
8193
reader = csv.reader(file)
8294
next(reader)
8395
for row in reader:
96+
# If type is not the same as the object type, skip.
97+
# This is a workaround for restriction on ContentDocumentLink filtering directly in query.
98+
if row[2] != self._archivist_obj.obj_type:
99+
continue
84100
link = ContentDocumentLink(
85101
linked_entity_id=row[0],
86102
content_document_id=row[1],
87-
download_dir_name=row[2] if self._archivist_obj.dir_name_field is not None else None,
103+
download_dir_name=row[3] if self._archivist_obj.dir_name_field is not None else None,
88104
)
89105
document_link_list.add_link(link)
90106

test/salesforce/test_salesforce.py

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
None,
2323
None,
2424
None,
25-
"SELECT LinkedEntityId, ContentDocumentId FROM ContentDocumentLink WHERE LinkedEntity.Type = 'User'",
25+
"SELECT LinkedEntityId, ContentDocumentId, LinkedEntity.Type FROM ContentDocumentLink WHERE ContentDocumentId IN (SELECT Id FROM ContentDocument )",
2626
),
2727
(
2828
datetime(
@@ -38,10 +38,9 @@
3838
None,
3939
None,
4040
(
41-
"SELECT LinkedEntityId, ContentDocumentId "
41+
"SELECT LinkedEntityId, ContentDocumentId, LinkedEntity.Type "
4242
"FROM ContentDocumentLink "
43-
"WHERE LinkedEntity.Type = 'User' "
44-
"AND ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z"
43+
"WHERE ContentDocumentId IN (SELECT Id FROM ContentDocument WHERE ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z)"
4544
),
4645
),
4746
(
@@ -67,11 +66,11 @@
6766
),
6867
None,
6968
(
70-
"SELECT LinkedEntityId, ContentDocumentId "
69+
"SELECT LinkedEntityId, ContentDocumentId, LinkedEntity.Type "
7170
"FROM ContentDocumentLink "
72-
"WHERE LinkedEntity.Type = 'User' "
73-
"AND ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z "
74-
"AND ContentDocument.ContentModifiedDate > 2023-01-01T00:00:00Z"
71+
"WHERE ContentDocumentId IN ("
72+
"SELECT Id FROM ContentDocument WHERE ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z AND ContentDocument.ContentModifiedDate > 2023-01-01T00:00:00Z"
73+
")"
7574
),
7675
),
7776
(
@@ -97,11 +96,11 @@
9796
),
9897
"DirField",
9998
(
100-
"SELECT LinkedEntityId, ContentDocumentId, DirField "
99+
"SELECT LinkedEntityId, ContentDocumentId, LinkedEntity.Type, DirField "
101100
"FROM ContentDocumentLink "
102-
"WHERE LinkedEntity.Type = 'User' "
103-
"AND ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z "
104-
"AND ContentDocument.ContentModifiedDate > 2023-01-01T00:00:00Z"
101+
"WHERE ContentDocumentId IN ("
102+
"SELECT Id FROM ContentDocument WHERE ContentDocument.ContentModifiedDate < 2024-01-01T00:00:00Z AND ContentDocument.ContentModifiedDate > 2023-01-01T00:00:00Z"
103+
")"
105104
),
106105
),
107106
],
@@ -141,35 +140,35 @@ def test_download_content_document_link_list_queries(
141140
[],
142141
# no results from query (file with only header)
143142
[
144-
[["LinkedEntityId", "ContentDocumentId"]],
143+
[["LinkedEntityId", "ContentDocumentId", "Type"]],
145144
],
146145
# results without custom field for dir name
147146
[
148147
[
149-
["LinkedEntityId", "ContentDocumentId"],
150-
["LinkedEntityId_1", "ContentDocumentId_1"],
151-
["LinkedEntityId_2", "ContentDocumentId_2"],
148+
["LinkedEntityId", "ContentDocumentId", "User"],
149+
["LinkedEntityId_1", "ContentDocumentId_1", "User"],
150+
["LinkedEntityId_2", "ContentDocumentId_2", "User"],
152151
]
153152
],
154153
# results with custom field for dir name
155154
[
156155
[
157-
["LinkedEntityId", "ContentDocumentId", "CustomFieldForDirName"],
158-
["LinkedEntityId_1", "ContentDocumentId_1", "CustomFieldForDirName_1"],
159-
["LinkedEntityId_2", "ContentDocumentId_2", "CustomFieldForDirName_2"],
156+
["LinkedEntityId", "ContentDocumentId", "User", "CustomFieldForDirName"],
157+
["LinkedEntityId_1", "ContentDocumentId_1", "User", "CustomFieldForDirName_1"],
158+
["LinkedEntityId_2", "ContentDocumentId_2", "User", "CustomFieldForDirName_2"],
160159
]
161160
],
162161
# results with custom field for dir name in multiple csv files
163162
[
164163
[
165-
["LinkedEntityId", "ContentDocumentId", "CustomFieldForDirName"],
166-
["LinkedEntityId_1", "ContentDocumentId_1", "CustomFieldForDirName_1"],
167-
["LinkedEntityId_2", "ContentDocumentId_2", "CustomFieldForDirName_2"],
164+
["LinkedEntityId", "ContentDocumentId", "User", "CustomFieldForDirName"],
165+
["LinkedEntityId_1", "ContentDocumentId_1", "User", "CustomFieldForDirName_1"],
166+
["LinkedEntityId_2", "ContentDocumentId_2", "User", "CustomFieldForDirName_2"],
168167
],
169168
[
170-
["LinkedEntityId", "ContentDocumentId", "CustomFieldForDirName"],
171-
["LinkedEntityId_3", "ContentDocumentId_3", "CustomFieldForDirName_3"],
172-
["LinkedEntityId_4", "ContentDocumentId_4", "CustomFieldForDirName_4"],
169+
["LinkedEntityId", "ContentDocumentId", "User", "CustomFieldForDirName"],
170+
["LinkedEntityId_3", "ContentDocumentId_3", "User", "CustomFieldForDirName_3"],
171+
["LinkedEntityId_4", "ContentDocumentId_4", "User", "CustomFieldForDirName_4"],
173172
],
174173
],
175174
],
@@ -182,7 +181,7 @@ def test_download_content_document_link_list_csv_reading(
182181
archivist_obj = ArchivistObject(
183182
data_dir=tmp_dir,
184183
obj_type="User",
185-
dir_name_field=(csv_files_data[0][0][2] if len(csv_files_data) and len(csv_files_data[0][0]) > 2 else None),
184+
dir_name_field=(csv_files_data[0][0][3] if len(csv_files_data) and len(csv_files_data[0][0]) > 3 else None),
186185
)
187186
client.bulk2 = Mock(
188187
side_effect=lambda *args, **kwargs: gen_temp_csv_files(
@@ -196,7 +195,7 @@ def test_download_content_document_link_list_csv_reading(
196195
doc_link = ContentDocumentLink(
197196
linked_entity_id=row[0],
198197
content_document_id=row[1],
199-
download_dir_name=row[2] if len(row) > 2 else row[0],
198+
download_dir_name=row[3] if len(row) > 3 else row[0],
200199
)
201200
add_link_calls.append(call(doc_link))
202201

0 commit comments

Comments
 (0)