Skip to content

Commit 6646a70

Browse files
authored
c semantics (owasp-dep-scan#421)
* Explainer improvements Signed-off-by: Prabhu Subramanian <[email protected]> * c repotests Signed-off-by: Prabhu Subramanian <[email protected]> * Track malicious packages under priority items Signed-off-by: Prabhu Subramanian <[email protected]> * Ruby cdxgen image tags fix Signed-off-by: Prabhu Subramanian <[email protected]> * Repo tests Signed-off-by: Prabhu Subramanian <[email protected]> --------- Signed-off-by: Prabhu Subramanian <[email protected]>
1 parent 351ee2c commit 6646a70

File tree

5 files changed

+99
-24
lines changed

5 files changed

+99
-24
lines changed

.github/workflows/repotests.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,13 @@ jobs:
129129
uv run depscan --src ${GITHUB_WORKSPACE}/depscan_reports/cdxgen --bom-dir ${GITHUB_WORKSPACE}/depscan_reports/cdxgen --reports-dir ${GITHUB_WORKSPACE}/depscan_reports/cdxgen --reachability-analyzer SemanticReachability --explain
130130
rm -rf ${GITHUB_WORKSPACE}/depscan_reports
131131
shell: bash
132+
- name: repotests Signal-Desktop
133+
run: |
134+
mkdir -p ${GITHUB_WORKSPACE}/depscan_reports/Signal-Desktop
135+
uv run huggingface-cli download AppThreat/ukaina --include "js/Signal-Desktop/*.json" --exclude "js/Signal-Desktop/*.vdr.json" --repo-type dataset --local-dir ${GITHUB_WORKSPACE}/depscan_reports/Signal-Desktop
136+
uv run depscan --src ${GITHUB_WORKSPACE}/depscan_reports/Signal-Desktop --bom-dir ${GITHUB_WORKSPACE}/depscan_reports/Signal-Desktop --reports-dir ${GITHUB_WORKSPACE}/depscan_reports/Signal-Desktop --reachability-analyzer SemanticReachability --explain
137+
rm -rf ${GITHUB_WORKSPACE}/depscan_reports
138+
shell: bash
132139
- name: repotests forgejo
133140
run: |
134141
mkdir -p ${GITHUB_WORKSPACE}/depscan_reports/forgejo
@@ -164,6 +171,20 @@ jobs:
164171
uv run depscan --src ${GITHUB_WORKSPACE}/depscan_reports/depscan --bom-dir ${GITHUB_WORKSPACE}/depscan_reports/phpmyadmin --reports-dir ${GITHUB_WORKSPACE}/depscan_reports/phpmyadmin --reachability-analyzer SemanticReachability --explain
165172
rm -rf ${GITHUB_WORKSPACE}/depscan_reports
166173
shell: bash
174+
- name: repotests openssl-3.5.0
175+
run: |
176+
mkdir -p ${GITHUB_WORKSPACE}/depscan_reports/openssl-3.5.0
177+
uv run huggingface-cli download AppThreat/ukaina --include "c/openssl-3.5.0/*.json" --exclude "c/openssl-3.5.0/*.vdr.json" --repo-type dataset --local-dir ${GITHUB_WORKSPACE}/depscan_reports/openssl-3.5.0
178+
uv run depscan --src ${GITHUB_WORKSPACE}/depscan_reports/depscan --bom-dir ${GITHUB_WORKSPACE}/depscan_reports/openssl-3.5.0 --reports-dir ${GITHUB_WORKSPACE}/depscan_reports/openssl-3.5.0 --reachability-analyzer SemanticReachability --explain
179+
rm -rf ${GITHUB_WORKSPACE}/depscan_reports
180+
shell: bash
181+
- name: repotests open5gs-2.7.5
182+
run: |
183+
mkdir -p ${GITHUB_WORKSPACE}/depscan_reports/open5gs-2.7.5
184+
uv run huggingface-cli download AppThreat/ukaina --include "c/open5gs-2.7.5/*.json" --exclude "c/open5gs-2.7.5/*.vdr.json" --repo-type dataset --local-dir ${GITHUB_WORKSPACE}/depscan_reports/open5gs-2.7.5
185+
uv run depscan --src ${GITHUB_WORKSPACE}/depscan_reports/depscan --bom-dir ${GITHUB_WORKSPACE}/depscan_reports/open5gs-2.7.5 --reports-dir ${GITHUB_WORKSPACE}/depscan_reports/open5gs-2.7.5 --reachability-analyzer SemanticReachability --explain
186+
rm -rf ${GITHUB_WORKSPACE}/depscan_reports
187+
shell: bash
167188
- name: Set up JDK
168189
uses: actions/setup-java@v4
169190
with:

depscan/lib/explainer.py

Lines changed: 63 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,16 @@ def explain(project_type, src_dir, bom_dir, vdr_result):
2323
:param bom_dir: BOM directory
2424
"""
2525
pattern_methods = {}
26+
has_any_explanation = False
27+
has_any_crypto_flows = False
2628
slices_files = glob.glob(f"{bom_dir}/**/*reachables.slices.json", recursive=True)
2729
openapi_spec_files = glob.glob(f"{bom_dir}/*openapi*.json", recursive=False)
2830
if not openapi_spec_files:
2931
openapi_spec_files = glob.glob(f"{src_dir}/*openapi*.json", recursive=False)
3032
if openapi_spec_files:
3133
rsection = Markdown("""## Service Endpoints
3234
33-
The following endpoints and code hotspots were identified by depscan. Ensure proper authentication and authorization mechanisms are implemented to secure them.""")
35+
The following endpoints and code hotspots were identified by depscan. Verify that proper authentication and authorization mechanisms are in place to secure them.""")
3436
console.print(rsection)
3537
for ospec in openapi_spec_files:
3638
pattern_methods = print_endpoints(ospec)
@@ -42,18 +44,28 @@ def explain(project_type, src_dir, bom_dir, vdr_result):
4244
rsection = Markdown(
4345
"""## Reachable Flows
4446
45-
Below are some reachable flows, including endpoint-reachable ones, identified by depscan. Use the generated OpenAPI specification file to assess these endpoints for vulnerabilities and risk.
47+
Below are some reachable flows, including those accessible via endpoints, identified by depscan. Use the generated OpenAPI specification to evaluate these endpoints for vulnerabilities and risk.
4648
"""
4749
)
4850
else:
4951
rsection = Markdown(
5052
"""## Reachable Flows
5153
52-
Below are some reachable flows identified by depscan. Use the provided tips to enhance your application's security posture.
54+
Below are several data flows identified by depscan, including reachable ones. Use the tips provided to strengthen your applications security posture.
5355
"""
5456
)
55-
console.print(rsection)
56-
explain_reachables(reachables_data, project_type, vdr_result)
57+
has_explanation, has_crypto_flows = explain_reachables(
58+
reachables_data,
59+
project_type,
60+
vdr_result,
61+
rsection if not has_any_explanation else None,
62+
)
63+
if not has_any_explanation and has_explanation:
64+
has_any_explanation = True
65+
if not has_any_crypto_flows and has_crypto_flows:
66+
has_any_crypto_flows = True
67+
if slices_files and not has_any_explanation and not has_any_crypto_flows:
68+
console.print("depscan did not find any reachable flow in this scan.")
5769

5870

5971
def _track_usage_targets(usage_targets, usages_object):
@@ -110,17 +122,34 @@ def print_endpoints(ospec):
110122
return pattern_methods
111123

112124

113-
def explain_reachables(reachables, project_type, vdr_result):
125+
def is_cpp_flow(flows):
126+
if not flows:
127+
return False
128+
attempts = 0
129+
for idx, aflow in enumerate(flows):
130+
if aflow.get("parentFileName", "").endswith(".c") or aflow.get(
131+
"parentFileName", ""
132+
).endswith(".cpp"):
133+
return True
134+
attempts += 1
135+
if attempts > 3:
136+
return False
137+
return False
138+
139+
140+
def explain_reachables(reachables, project_type, vdr_result, header_section=None):
114141
""""""
115142
reachable_explanations = 0
116143
checked_flows = 0
117144
has_crypto_flows = False
118145
purls_reachable_explanations = defaultdict(int)
146+
has_explanation = False
147+
header_shown = False
119148
for areach in reachables.get("reachables", []):
120149
if (
121150
not areach.get("flows")
122151
or len(areach.get("flows")) < 2
123-
or not areach.get("purls")
152+
or (not areach.get("purls") and not is_cpp_flow(areach.get("flows")))
124153
):
125154
continue
126155
# Focus only on the prioritized list if available
@@ -145,10 +174,13 @@ def explain_reachables(reachables, project_type, vdr_result):
145174
continue
146175
purls_str = ",".join(sorted(areach.get("purls", [])))
147176
if (
148-
purls_reachable_explanations[purls_str] + 1
177+
purls_str
178+
and purls_reachable_explanations[purls_str] + 1
149179
> max_purls_reachable_explanations
150180
):
151181
continue
182+
if not has_explanation:
183+
has_explanation = True
152184
# Did we find any crypto flows
153185
if is_crypto_flow and not has_crypto_flows:
154186
has_crypto_flows = True
@@ -163,15 +195,21 @@ def explain_reachables(reachables, project_type, vdr_result):
163195
)
164196
rtable.add_column(header="Flow", vertical="top")
165197
rtable.add_row(flow_tree)
198+
# Print the header first in case we haven't
199+
if not header_shown and header_section:
200+
console.print()
201+
console.print(header_section)
202+
header_shown = True
166203
console.print()
167204
console.print(rtable)
168205
reachable_explanations += 1
169-
purls_reachable_explanations[purls_str] += 1
206+
if purls_str:
207+
purls_reachable_explanations[purls_str] += 1
170208
if has_check_tag:
171209
checked_flows += 1
172210
if reachable_explanations + 1 > max_reachable_explanations:
173211
break
174-
if reachable_explanations:
212+
if has_explanation:
175213
tips = """## Secure Design Tips"""
176214

177215
if has_crypto_flows:
@@ -183,12 +221,18 @@ def explain_reachables(reachables, project_type, vdr_result):
183221
- Review the validation and sanitization methods used in the application.
184222
- To enhance the security posture, implement a common validation middleware.
185223
"""
186-
else:
224+
elif purls_reachable_explanations:
187225
tips += """
188226
- Consider implementing a common validation and sanitization library to reduce the risk of exploitability.
227+
"""
228+
else:
229+
tips += """
230+
- Enhance your unit and integration tests to cover the flows listed above.
231+
- Additionally, set up an appropriate fuzzer to continuously evaluate the performance of the parser and validation functions across various payloads.
189232
"""
190233
rsection = Markdown(tips)
191234
console.print(rsection)
235+
return has_explanation, has_crypto_flows
192236

193237

194238
def flow_to_source_sink(idx, flow, purls, project_type, vdr_result):
@@ -200,9 +244,8 @@ def flow_to_source_sink(idx, flow, purls, project_type, vdr_result):
200244
reached_services = vdr_result.reached_services
201245
is_endpoint_reachable = False
202246
possible_reachable_service = False
203-
is_crypto_flow = "crypto" in flow.get("tags", []) or "crypto-generate" in flow.get(
204-
"tags", []
205-
)
247+
tags = flow.get("tags", [])
248+
is_crypto_flow = "crypto" in tags or "crypto-generate" in tags
206249
method_in_emoji = ":right_arrow_curving_left:"
207250
for p in purls:
208251
if endpoint_reached_purls and endpoint_reached_purls.get(p):
@@ -256,6 +299,9 @@ def flow_to_source_sink(idx, flow, purls, project_type, vdr_result):
256299
"middleware" in source_sink_desc.lower() or "route" in source_sink_desc.lower()
257300
):
258301
source_sink_desc = "The flow originates from middleware."
302+
elif len(purls) == 0:
303+
if tags:
304+
source_sink_desc = f"{source_sink_desc} can be used to reach packages with tags {','.join(tags[:2])}"
259305
elif len(purls) == 1:
260306
if is_endpoint_reachable:
261307
source_sink_desc = f"{source_sink_desc} can be used to reach this package from certain endpoints."
@@ -369,8 +415,9 @@ def explain_flows(flows, purls, project_type, vdr_result):
369415
comments.append(
370416
":exclamation_mark: Refactor this flow to minimize the use of external libraries."
371417
)
372-
purls_str = "\n".join(purls)
373-
comments.append(f"[info]Reachable Packages:[/info]\n{purls_str}")
418+
if purls:
419+
purls_str = "\n".join(purls)
420+
comments.append(f"[info]Reachable Packages:[/info]\n{purls_str}")
374421
added_flows = []
375422
has_check_tag = False
376423
last_file_loc = None

packages/analysis-lib/src/analysis_lib/output.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,16 @@ def find_next_steps(
268268
is_endpoint_reachable = True
269269
if reached_services and reached_services.get(matched_by):
270270
possible_reachable_service = True
271+
# This is a very naive way of determining whether a CVE is for a malware or not
272+
# See https://github.com/AppThreat/vulnerability-db/issues/212 for an example of this being wrong.
271273
is_malware = check_malware_cve(cve_list)
272274
if is_malware:
273-
next_step_str = (
274-
":stop_sign: Malicious package! This is a [bold]security incident[/bold]."
275-
)
275+
if is_reachable or is_endpoint_reachable:
276+
next_step_str = ":stop_sign: Malicious package that is also reachable! This is a [bold]top-priority security incident[/bold]."
277+
elif is_exploitable:
278+
next_step_str = ":stop_sign: Malicious package that is also exploitable! This is a [bold]top-priority security incident[/bold]."
279+
else:
280+
next_step_str = ":stop_sign: Malicious package! This is a [bold]security incident[/bold]."
276281
# Package has a number of CVEs.
277282
elif len(cve_list) > 5:
278283
if fix_version:

packages/analysis-lib/src/analysis_lib/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,6 +1493,7 @@ def analyze_cve_vuln(
14931493
insights.append("[bright_red]:stop_sign: Malicious[/bright_red]")
14941494
plain_insights.append("Malicious")
14951495
counts.malicious_count += 1
1496+
cve_requires_attn = True
14961497
has_flagged_cwe = False
14971498
add_to_pkg_group_rows = False
14981499
if fixed_location := get_unaffected(vuln):

packages/xbom-lib/src/xbom_lib/cdxgen.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
# version of cdxgen to use
2121
CDXGEN_IMAGE_VERSION = os.getenv("CDXGEN_IMAGE_VERSION", "latest")
22+
CDXGEN_IMAGE_ROLLING_VERSION = os.getenv("CDXGEN_IMAGE_ROLLING_VERSION", "v11")
2223

2324
# cdxgen default image to use
2425
DEFAULT_IMAGE_NAME = (
@@ -56,10 +57,10 @@
5657
"python": f"ghcr.io/cyclonedx/cdxgen-python312:{CDXGEN_IMAGE_VERSION}",
5758
"swift": f"ghcr.io/cyclonedx/cdxgen-debian-swift6:{CDXGEN_IMAGE_VERSION}",
5859
"swift6": f"ghcr.io/cyclonedx/cdxgen-debian-swift6:{CDXGEN_IMAGE_VERSION}",
59-
"ruby26": f"ghcr.io/cyclonedx/cdxgen-debian-ruby26:{CDXGEN_IMAGE_VERSION}",
60-
"ruby33": f"ghcr.io/cyclonedx/cdxgen-debian-ruby33:{CDXGEN_IMAGE_VERSION}",
61-
"ruby34": f"ghcr.io/cyclonedx/cdxgen-debian-ruby34:{CDXGEN_IMAGE_VERSION}",
62-
"ruby": f"ghcr.io/cyclonedx/cdxgen-debian-ruby34:{CDXGEN_IMAGE_VERSION}",
60+
"ruby26": f"ghcr.io/cyclonedx/cdxgen-debian-ruby26:{CDXGEN_IMAGE_ROLLING_VERSION}",
61+
"ruby33": f"ghcr.io/cyclonedx/cdxgen-debian-ruby33:{CDXGEN_IMAGE_ROLLING_VERSION}",
62+
"ruby34": f"ghcr.io/cyclonedx/cdxgen-debian-ruby34:{CDXGEN_IMAGE_ROLLING_VERSION}",
63+
"ruby": f"ghcr.io/cyclonedx/cdxgen-debian-ruby34:{CDXGEN_IMAGE_ROLLING_VERSION}",
6364
"dotnet-core": f"ghcr.io/cyclonedx/cdxgen-debian-dotnet6:{CDXGEN_IMAGE_VERSION}",
6465
"dotnet-framework": f"ghcr.io/cyclonedx/cdxgen-debian-dotnet6:{CDXGEN_IMAGE_VERSION}",
6566
"dotnet6": f"ghcr.io/cyclonedx/cdxgen-debian-dotnet6:{CDXGEN_IMAGE_VERSION}",
@@ -477,7 +478,7 @@ def generate(self) -> BOMResult:
477478
# Should we pull the most recent image
478479
if needs_latest_image(image_name):
479480
if self.logger:
480-
self.logger.debug(f"Pulling the image {image_name}")
481+
self.logger.debug(f"Pulling the image {image_name} using {container_command}.")
481482
exec_tool(
482483
[container_command, "pull", "--quiet", image_name], logger=self.logger
483484
)

0 commit comments

Comments
 (0)