Skip to content

Commit 83d072c

Browse files
authored
Trim duplicate explanations with id match (owasp-dep-scan#431)
Signed-off-by: Prabhu Subramanian <[email protected]>
1 parent 5baedd9 commit 83d072c

File tree

2 files changed

+180
-168
lines changed

2 files changed

+180
-168
lines changed

depscan/lib/explainer.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,14 @@ def explain(project_type, src_dir, bom_dir, vdr_file, vdr_result, explanation_mo
4747
rsection = Markdown("""## Service Endpoints
4848
4949
The following endpoints and code hotspots were identified by depscan. Verify that proper authentication and authorization mechanisms are in place to secure them.""")
50-
console.print(rsection)
50+
any_endpoints_shown = False
5151
for ospec in openapi_spec_files:
52-
pattern_methods = print_endpoints(ospec)
52+
pattern_methods = print_endpoints(
53+
ospec, rsection if not any_endpoints_shown else None
54+
)
55+
if not any_endpoints_shown and pattern_methods:
56+
any_endpoints_shown = True
57+
5358
# Return early for endpoints only explanations
5459
if explanation_mode in ("Endpoints",):
5560
return
@@ -109,7 +114,7 @@ def _track_usage_targets(usage_targets, usages_object):
109114
usage_targets.add(f"{file}#{l}")
110115

111116

112-
def print_endpoints(ospec):
117+
def print_endpoints(ospec, header_section=None):
113118
if not ospec:
114119
return
115120
paths = json_load(ospec).get("paths") or {}
@@ -151,6 +156,9 @@ def print_endpoints(ospec):
151156
sorted_areas.sort()
152157
rtable.add_row(k, ("\n".join(v)).upper(), "\n".join(sorted_areas))
153158
if pattern_methods:
159+
# Print the header section
160+
if header_section:
161+
console.print(header_section)
154162
console.print()
155163
console.print(rtable)
156164
return pattern_methods
@@ -178,6 +186,7 @@ def explain_reachables(
178186
reachable_explanations = 0
179187
checked_flows = 0
180188
has_crypto_flows = False
189+
explained_ids = {}
181190
purls_reachable_explanations = defaultdict(int)
182191
source_reachable_explanations = defaultdict(int)
183192
sink_reachable_explanations = defaultdict(int)
@@ -194,16 +203,9 @@ def explain_reachables(
194203
or (not areach.get("purls") and not cpp_flow)
195204
):
196205
continue
197-
# Focus only on the prioritized list if available
198-
# if project_type in ("java",) and pkg_group_rows:
199-
# is_prioritized = False
200-
# for apurl in areach.get("purls"):
201-
# if pkg_group_rows.get(apurl):
202-
# is_prioritized = True
203-
# if not is_prioritized:
204-
# continue
205206
(
206207
flow_tree,
208+
added_ids,
207209
comment,
208210
source_sink_desc,
209211
source_code_str,
@@ -218,7 +220,13 @@ def explain_reachables(
218220
project_type,
219221
vdr_result,
220222
)
221-
if not source_sink_desc or not flow_tree or len(flow_tree.children) < 5:
223+
# The goal is to reduce duplicate explanations by checking if a given flow is similar to one we have explained
224+
# before. We do this by checking the node ids, source-sink explanations, purl tags and so on.
225+
added_ids_str = "-".join(added_ids)
226+
# Have we seen this sequence before?
227+
if explained_ids.get(added_ids_str) or len(added_ids) < 4:
228+
continue
229+
if not source_sink_desc or not flow_tree or len(flow_tree.children) < 4:
222230
continue
223231
# In non-reachables mode, we are not interested in reachable flows.
224232
if (
@@ -269,6 +277,7 @@ def explain_reachables(
269277
header_shown = True
270278
console.print()
271279
console.print(rtable)
280+
explained_ids[added_ids_str] = True
272281
reachable_explanations += 1
273282
if purls_str:
274283
purls_reachable_explanations[purls_str] += 1
@@ -428,7 +437,7 @@ def filter_tags(tags):
428437

429438

430439
def is_filterable_code(project_type, code):
431-
if len(code) < 5:
440+
if len(code) < 3:
432441
return True
433442
for c in (
434443
"console.log",
@@ -510,6 +519,7 @@ def explain_flows(explanation_mode, flows, purls, project_type, vdr_result):
510519
if purls:
511520
purls_str = "\n".join(purls)
512521
comments.append(f"[info]Reachable Packages:[/info]\n{purls_str}")
522+
added_ids = []
513523
added_flows = []
514524
added_node_desc = []
515525
has_check_tag = False
@@ -547,6 +557,7 @@ def explain_flows(explanation_mode, flows, purls, project_type, vdr_result):
547557
if flow_str in added_flows or node_desc in added_node_desc:
548558
continue
549559
added_flows.append(flow_str)
560+
added_ids.append(str(aflow.get("id", "")))
550561
added_node_desc.append(node_desc)
551562
if not tree:
552563
tree = Tree(flow_str)
@@ -561,6 +572,7 @@ def explain_flows(explanation_mode, flows, purls, project_type, vdr_result):
561572
)
562573
return (
563574
tree,
575+
added_ids,
564576
"\n".join(comments),
565577
source_sink_desc,
566578
source_code_str,

0 commit comments

Comments
 (0)