Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .clusterfuzzlite/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# ClusterFuzzLite build image for aemo-mdff-reader.
# Uses the OSS-Fuzz Python base image, which provides atheris and
# the compile_python_fuzzer helper.
FROM gcr.io/oss-fuzz-base/base-builder-python

COPY . $SRC/aemo-mdff-reader
WORKDIR $SRC/aemo-mdff-reader
COPY .clusterfuzzlite/build.sh $SRC/build.sh
10 changes: 10 additions & 0 deletions .clusterfuzzlite/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash -eu
# ClusterFuzzLite build script — installs the package and compiles each
# atheris harness in fuzz/ via OSS-Fuzz's compile_python_fuzzer helper.

cd "$SRC/aemo-mdff-reader"
pip3 install --no-cache-dir .

for fuzzer in fuzz/fuzz_*.py; do
compile_python_fuzzer "$fuzzer"
done
1 change: 1 addition & 0 deletions .clusterfuzzlite/project.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: python
39 changes: 39 additions & 0 deletions .github/workflows/cflite_batch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: ClusterFuzzLite scheduled batch fuzz

# Longer scheduled fuzz session that grows the persistent corpus and
# crash storage in the gh-pages branch. Runs each sanitizer in turn
# for ``fuzz-seconds``. Storage requires a ``gh-pages`` branch; the
# action creates it on first run.

on:
schedule:
# Sundays at 02:00 UTC — quiet window, off-cycle from CodeQL/Scorecard.
- cron: "0 2 * * 0"
workflow_dispatch:

permissions: read-all

jobs:
batch-fuzz:
runs-on: ubuntu-latest
timeout-minutes: 60
permissions:
# cflite needs write access to gh-pages for corpus + crash storage.
contents: write
strategy:
fail-fast: false
matrix:
sanitizer: [address, undefined]
steps:
- name: Build fuzzers (${{ matrix.sanitizer }})
uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
with:
language: python
sanitizer: ${{ matrix.sanitizer }}
- name: Run fuzzers (${{ matrix.sanitizer }})
uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
with:
language: python
fuzz-seconds: 1800
mode: batch
sanitizer: ${{ matrix.sanitizer }}
45 changes: 45 additions & 0 deletions .github/workflows/cflite_pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: ClusterFuzzLite PR fuzz

# Per-PR fuzz: smoke-test the build and run a quick (30s) crash search
# only on PRs whose changes actually reach the parser. The longer
# corpus-extending pass + the second sanitizer live in cflite_batch.yml
# so PRs aren't held up by fuzzing.
#
# `paths` is enumerated explicitly (not `aemo_mdff_reader/**`) to skip
# fuzzing on cli / aggregate / reader / sql changes — none of which
# the harnesses exercise.

on:
pull_request:
paths:
- "aemo_mdff_reader/__init__.py"
- "aemo_mdff_reader/parser.py"
- "aemo_mdff_reader/types.py"
- "aemo_mdff_reader/spec.py"
- "fuzz/**"
- ".clusterfuzzlite/**"
- ".github/workflows/cflite_pr.yml"

permissions: read-all

jobs:
fuzz:
name: fuzz (address, 30s)
# Skip draft PRs — fuzz on the final form, not the in-progress one.
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
timeout-minutes: 6
steps:
- name: Build fuzzers
uses: google/clusterfuzzlite/actions/build_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
with:
language: python
sanitizer: address
- name: Run fuzzers (30s smoke)
uses: google/clusterfuzzlite/actions/run_fuzzers@884713a6c30a92e5e8544c39945cd7cb630abcd1 # v1
with:
language: python
fuzz-seconds: 30
mode: code-change
sanitizer: address
output-sarif: true
19 changes: 19 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,21 @@ jobs:
/tmp/smoke/bin/python -c "import aemo_mdff_reader as m; print(m.__version__)"
/tmp/smoke/bin/aemo-mdff-reader --version
- name: Generate build provenance attestation
id: provenance
uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0
with:
subject-path: |
dist/*.whl
dist/*.tar.gz
# Stage the provenance bundle as a file alongside the release so
# OpenSSF Scorecard's signed-releases check (which scans release
# assets, not GitHub's attestations API) sees an in-toto provenance
# artefact and awards full marks.
- name: Stage provenance bundle for the release
run: |
mkdir -p provenance
cp "${{ steps.provenance.outputs.bundle-path }}" "provenance/aemo_mdff_reader.intoto.jsonl"
ls -la provenance/
# SBOM is written outside dist/ so the publish job's PyPI upload
# (which only accepts .whl/.tar.gz) is not contaminated. anchore's
# sbom-action does not auto-create the parent directory of
Expand Down Expand Up @@ -86,6 +96,10 @@ jobs:
with:
name: sbom
path: sbom/
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7
with:
name: provenance
path: provenance/

publish:
name: Publish to PyPI
Expand Down Expand Up @@ -150,6 +164,10 @@ jobs:
with:
name: sbom
path: sbom/
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
with:
name: provenance
path: provenance/
- name: Create GitHub Release with notes from CHANGELOG
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3
with:
Expand All @@ -159,3 +177,4 @@ jobs:
dist/*.whl
signatures/*
sbom/*
provenance/*
43 changes: 43 additions & 0 deletions fuzz/fuzz_parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Fuzz the NEM12 streaming parser entry point.

Run locally:
pip install atheris
python fuzz/fuzz_parse.py -atheris_runs=10000

Run in OSS-Fuzz / ClusterFuzzLite: this file is built by
.clusterfuzzlite/build.sh.
"""

from __future__ import annotations

import io
import sys

import atheris

with atheris.instrument_imports():
from aemo_mdff_reader import parse


def TestOneInput(data: bytes) -> None:
# Python is memory-safe, so coverage-guided fuzzing of a pure-Python
# parser is hunting for hangs, infinite loops, and pathological
# memory growth — not crashes. Any exception raised by the parser
# on malformed input is by definition an expected rejection, so we
# swallow them broadly. SystemExit / KeyboardInterrupt deliberately
# propagate.
try:
text = data.decode("utf-8", errors="replace")
for _ in parse(io.StringIO(text)):
pass
except Exception: # see comment above.
return


def main() -> None:
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()


if __name__ == "__main__":
main()
31 changes: 31 additions & 0 deletions fuzz/fuzz_parse_accumulations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Fuzz the NEM13 (accumulation) parser."""

from __future__ import annotations

import io
import sys

import atheris

with atheris.instrument_imports():
from aemo_mdff_reader import parse_accumulations


def TestOneInput(data: bytes) -> None:
# See fuzz_parse.py — broad except is intentional for a pure-Python
# memory-safe target. We're hunting for hangs / pathological growth.
try:
text = data.decode("utf-8", errors="replace")
for _ in parse_accumulations(io.StringIO(text)):
pass
except Exception:
return


def main() -> None:
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()


if __name__ == "__main__":
main()
30 changes: 30 additions & 0 deletions fuzz/fuzz_parse_to_columns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Fuzz the columnar fast-path build."""

from __future__ import annotations

import io
import sys

import atheris

with atheris.instrument_imports():
from aemo_mdff_reader import parse_to_columns


def TestOneInput(data: bytes) -> None:
# See fuzz_parse.py — broad except is intentional for a pure-Python
# memory-safe target. We're hunting for hangs / pathological growth.
try:
text = data.decode("utf-8", errors="replace")
parse_to_columns(io.StringIO(text))
except Exception:
return


def main() -> None:
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()


if __name__ == "__main__":
main()
Loading