From b9091053bc8ae39c3cfdd8a581cdd9d2c0c1a510 Mon Sep 17 00:00:00 2001 From: Tony Nowatzki Date: Tue, 7 Apr 2026 09:06:37 -0700 Subject: [PATCH] Fix easy artifact automation issues --- .gitignore | 1 + INSTALL.md | 10 ++++-- README.md | 17 ++++++----- scripts/collect_results.py | 9 ++++-- scripts/plot_ablation_mapping.py | 28 +++++++++++++---- scripts/run_ablation.py | 15 ++++++--- scripts/run_all.sh | 52 +++++++++++++++++++------------- scripts/setup.sh | 16 +++++++++- 8 files changed, 105 insertions(+), 43 deletions(-) diff --git a/.gitignore b/.gitignore index b8c9c18..08e5adb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ csegfold/build/ csegfold/tmp/ output/ +.venv/ __pycache__/ *.pyc .cache/ diff --git a/INSTALL.md b/INSTALL.md index 0bdc433..e6e2087 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -22,11 +22,17 @@ sudo apt-get install -y build-essential cmake g++ git python3 python3-pip curl ### Python packages ```bash +python3 -m venv .venv +. .venv/bin/activate pip install -r requirements.txt ``` This installs: numpy, scipy, matplotlib, pandas, pyyaml. +> **Note:** On Ubuntu/Debian systems with PEP 668 enabled, installing into the +> system Python may fail with an `externally-managed-environment` error. Use a +> virtual environment as shown above. + > **Note:** Only the plotting and result collection scripts need these packages. The experiment runner scripts and the C++ simulator have no Python package dependencies. ## Building from Source @@ -39,7 +45,7 @@ This installs: numpy, scipy, matplotlib, pandas, pyyaml. This script: 1. Checks all system dependencies (CMake, `g++`, Python) -2. Installs Python packages from `requirements.txt` +2. Creates or reuses a repo-local `.venv/` and installs Python packages there 3. Builds the C++ simulator with Ramulator2 HBM2 DRAM backend 4. Runs a smoke test to verify the build @@ -102,7 +108,7 @@ Typical memory usage per simulation: python3 scripts/download_matrices.py ``` -Downloads 21 SuiteSparse matrices (~50 MB total) from https://sparse.tamu.edu/. Already-downloaded matrices are skipped. +Downloads 20 SuiteSparse matrix directories (~50 MB total) from https://sparse.tamu.edu/. Already-downloaded matrices are skipped. ## Verification diff --git a/README.md b/README.md index 16b76f2..796e51f 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,8 @@ docker compose run artifact ./scripts/run_all.sh Python >= 3.8 is required. Install dependencies with: ```bash +python3 -m venv .venv +. .venv/bin/activate pip install -r requirements.txt ``` @@ -57,6 +59,7 @@ See [INSTALL.md](INSTALL.md) for detailed dependency and RAM requirements. This script: - Checks system dependencies (CMake >= 3.15, `g++` with C++20, Python 3) +- Creates or reuses a repo-local `.venv/` and installs Python packages there - Checks Python packages (numpy, scipy, matplotlib, pyyaml, pandas) - Builds the C++ simulator with Ramulator2 HBM2 DRAM backend - Runs a smoke test to verify the build @@ -69,7 +72,7 @@ After building, the simulator binary is at `csegfold/build/csegfold`. python3 scripts/download_matrices.py ``` -Downloads 21 matrices from the [SuiteSparse Matrix Collection](https://sparse.tamu.edu/) into `benchmarks/data/suitesparse/`. Matrices already present are skipped. These cover all three paper experiments. +Downloads 20 SuiteSparse matrix directories from the [SuiteSparse Matrix Collection](https://sparse.tamu.edu/) into `benchmarks/data/suitesparse/`. Matrices already present are skipped. These cover all three paper experiments. ### Step 3: Run Overall Performance Experiment @@ -177,8 +180,8 @@ Parses all `*_stats.json` files and produces: - `overall_results.csv` — SegFold cycle counts for overall performance - `nonsquare_results.csv` — SegFold cycle counts for non-square matrices - `breakdown_results.csv` — Cycle counts per config per matrix (pivoted) -- `ablation_mapping_suitesparse.csv` — Ablation mapping with memory hierarchy -- `ablation_mapping_suitesparse_nomem.csv` — Ablation mapping without memory hierarchy +- `ablation_mapping_suitesparse_results.csv` — Ablation mapping with memory hierarchy +- `ablation_mapping_suitesparse_nomem_results.csv` — Ablation mapping without memory hierarchy ### Step 9: Generate Plots @@ -187,9 +190,9 @@ python3 scripts/plot_overall.py output/my_run python3 scripts/plot_nonsquare.py output/my_run python3 scripts/plot_breakdown.py output/my_run python3 scripts/plot_ablation_mapping.py \ - --mem-csv output/my_run/ablation_mapping_suitesparse.csv \ - --nomem-csv output/my_run/ablation_mapping_suitesparse_nomem.csv \ - --output output/my_run/plots/ablation_mapping_suitesparse.pdf + --mem-csv output/my_run/ablation_mapping_suitesparse_results.csv \ + --nomem-csv output/my_run/ablation_mapping_suitesparse_nomem_results.csv \ + --output output/my_run/plots/ablation_mapping.pdf python3 scripts/plot_ablation.py output/my_run ``` @@ -197,7 +200,7 @@ Generates PDF and PNG figures in `output/my_run/plots/`: - `overall_speedup.pdf` — Bar chart: SegFold vs Spada vs Flexagon (normalized to Spada) - `nonsquare_speedup.pdf` — Bar chart: SegFold vs Spada on rectangular matrices - `breakdown_speedup.pdf` — Stacked bars: incremental speedup per optimization -- `ablation_mapping_suitesparse.pdf` — Mapping strategy comparison (with/without memory hierarchy) +- `ablation_mapping.pdf` — Mapping strategy comparison (with/without memory hierarchy) - `ablation_window_size.pdf` — Window size sweep (normalized speedup) - `ablation_crossbar_width.pdf` — Crossbar width sweep (normalized speedup) - K-reordering summary printed to console (average relative speedup/slowdown) diff --git a/scripts/collect_results.py b/scripts/collect_results.py index ead9840..736c953 100755 --- a/scripts/collect_results.py +++ b/scripts/collect_results.py @@ -7,6 +7,7 @@ - nonsquare_results.csv - breakdown_results.csv - ablation_mapping_suitesparse_results.csv + - ablation_mapping_suitesparse_nomem_results.csv - ablation_{group}_results.csv (window-size, crossbar-width, k-reordering) Usage: @@ -178,7 +179,7 @@ def collect_paper_results(output_dir: Path): # Ablation studies ablation_dir = output_dir / "ablation" # Groups that run on SuiteSparse matrices (pivoted to {config}_cycles columns) - suitesparse_groups = {"mapping-paper"} + suitesparse_groups = {"mapping-paper", "mapping-paper-nomem"} if ablation_dir.is_dir(): for group_dir in sorted(ablation_dir.iterdir()): if not group_dir.is_dir(): @@ -211,7 +212,11 @@ def collect_paper_results(output_dir: Path): df["speedup_vs_zero"] = ( df["zero_cycles"] / df["segfold_cycles"] ).round(2) - out_path = output_dir / "ablation_mapping_suitesparse_results.csv" + if group_name == "mapping-paper": + out_name = "ablation_mapping_suitesparse_results.csv" + else: + out_name = "ablation_mapping_suitesparse_nomem_results.csv" + out_path = output_dir / out_name df.to_csv(out_path, index=False) written.append(out_path) print(f"Wrote {len(df)} rows -> {out_path}") diff --git a/scripts/plot_ablation_mapping.py b/scripts/plot_ablation_mapping.py index 24c481c..e3a468c 100644 --- a/scripts/plot_ablation_mapping.py +++ b/scripts/plot_ablation_mapping.py @@ -2,11 +2,15 @@ """Plot ablation mapping on SuiteSparse matrices. Produces a bar chart showing speedup of each mapping strategy -normalized to Zero-Offset (with memory hierarchy). +normalized to Zero-Offset, with optional side-by-side panels for +with-memory and no-memory runs. Usage: python3 scripts/plot_ablation_mapping.py python3 scripts/plot_ablation_mapping.py --output output/plots/ablation_mapping.pdf + python3 scripts/plot_ablation_mapping.py \ + --mem-csv output/ablation_mapping_suitesparse_results.csv \ + --nomem-csv output/ablation_mapping_suitesparse_nomem_results.csv """ import argparse @@ -119,28 +123,40 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("--mem-csv", default=None, help="Path to ablation_mapping_suitesparse.csv") + parser.add_argument("--nomem-csv", default=None, + help="Path to ablation_mapping_suitesparse_nomem_results.csv") parser.add_argument("--output", default=None) args = parser.parse_args() mem_csv = args.mem_csv or os.path.join( PROJECT_ROOT, "output", "ablation_mapping_suitesparse_results.csv") + nomem_csv = args.nomem_csv out_path = args.output or os.path.join( PROJECT_ROOT, "output", "plots", "ablation_mapping.pdf") mem_matrices, mem_speedups = load_and_compute(mem_csv) + nomem_data = None + if nomem_csv and os.path.exists(nomem_csv): + nomem_data = load_and_compute(nomem_csv) y_max = 1.75 - fig, ax = plt.subplots(figsize=(12, 4.5)) - - plot_panel(ax, mem_matrices, mem_speedups, "", y_max) + if nomem_data: + fig, axes = plt.subplots(1, 2, figsize=(16, 4.5), sharey=True) + plot_panel(axes[0], mem_matrices, mem_speedups, "With Memory Hierarchy", y_max) + nomem_matrices, nomem_speedups = nomem_data + plot_panel(axes[1], nomem_matrices, nomem_speedups, "Without Memory Hierarchy", y_max) + handles, labels = axes[0].get_legend_handles_labels() + else: + fig, ax = plt.subplots(figsize=(12, 4.5)) + plot_panel(ax, mem_matrices, mem_speedups, "", y_max) + handles, labels = ax.get_legend_handles_labels() # Legend at top - handles, labels = ax.get_legend_handles_labels() fig.legend(handles, labels, loc="upper center", fontsize=11, ncol=len(STRATEGIES), framealpha=0.95, edgecolor="gray", handlelength=1.5, columnspacing=1.0, - bbox_to_anchor=(0.5, 1.05)) + bbox_to_anchor=(0.5, 1.05)) os.makedirs(os.path.dirname(out_path), exist_ok=True) plt.tight_layout() diff --git a/scripts/run_ablation.py b/scripts/run_ablation.py index e69cb6f..09b2158 100644 --- a/scripts/run_ablation.py +++ b/scripts/run_ablation.py @@ -2,15 +2,17 @@ """Ablation Studies: Run SegFold with different configs. Ablation groups on synthetic matrices: - - window-size: B loader window size sweep (1, 4, 8, 16, 32, 64) - - k-reordering: B row reordering strategies - - crossbar-width: B loader row limit sweep (1, 2, 4, 8, 16) + - window-size: B loader window size sweep (1, 4, 8, 16, 32, 64) + - k-reordering: B row reordering strategies + - crossbar-width: B loader row limit sweep (1, 2, 4, 8, 16) Ablation groups on SuiteSparse matrices: - mapping-paper: Ablation mapping with memory hierarchy + - mapping-paper-nomem: Ablation mapping without memory hierarchy Usage: python3 scripts/run_ablation.py output/my_run python3 scripts/run_ablation.py output/my_run --ablation mapping-paper + python3 scripts/run_ablation.py output/my_run --ablation mapping-paper-nomem python3 scripts/run_ablation.py output/my_run --jobs 4 """ @@ -51,10 +53,15 @@ "ideal": "configs/ablation-map-paper-ideal.yaml", "zero": "configs/ablation-map-paper-zero.yaml", }, + "mapping-paper-nomem": { + "segfold": "configs/ablation-map-paper-nomem-segfold.yaml", + "ideal": "configs/ablation-map-paper-nomem-ideal.yaml", + "zero": "configs/ablation-map-paper-nomem-zero.yaml", + }, } # Ablation groups that run on SuiteSparse matrices instead of synthetic -SUITESPARSE_ABLATIONS = {"mapping-paper"} +SUITESPARSE_ABLATIONS = {"mapping-paper", "mapping-paper-nomem"} SUITESPARSE_MATRICES = [ "fv1", "flowmeter0", "delaunay_n13", diff --git a/scripts/run_all.sh b/scripts/run_all.sh index bf27aba..2c43145 100755 --- a/scripts/run_all.sh +++ b/scripts/run_all.sh @@ -150,12 +150,13 @@ echo "==========================================" echo " 1. Overall performance (11 matrices) ~15-30 min" echo " 2. Non-square performance (6 matrices) ~10-20 min" echo " 3. Speedup breakdown (5 configs x 12 mat) ~30-60 min" -echo " 4. Ablation mapping (3 configs x 16 mat x2) ~30-60 min" -echo " 5. Window size ablation (6 configs, synthetic) ~5-10 min" -echo " 6. Crossbar width ablation (5 configs, synthetic) ~5-10 min" -echo " 7. K-reordering ablation (3 configs, synthetic) ~5-10 min" -echo " 8. Collect results into CSV" -echo " 9. Generate plots" +echo " 4. Ablation mapping (3 configs x 16 mat) ~15-30 min" +echo " 5. Ablation mapping (nomem) (3 configs x 16 mat) ~15-30 min" +echo " 6. Window size ablation (6 configs, synthetic) ~5-10 min" +echo " 7. Crossbar width ablation (5 configs, synthetic) ~5-10 min" +echo " 8. K-reordering ablation (3 configs, synthetic) ~5-10 min" +echo " 9. Collect results into CSV" +echo "10. Generate plots" echo "" echo " Estimated total runtime: 2-3 hours (depends on hardware)" echo "==========================================" @@ -193,44 +194,52 @@ python3 "$PROJECT_ROOT/scripts/run_ablation.py" "$OUT_DIR" \ echo "[$(ts)] Step 4: Ablation mapping complete." echo "" -# ── Step 5: Window size ablation ──────────────────────────────────────── +# ── Step 5: Ablation mapping (no memory hierarchy) ────────────────────── -echo "[$(ts)] Step 5: Running window size ablation..." +echo "[$(ts)] Step 5: Running ablation mapping without memory hierarchy..." +python3 "$PROJECT_ROOT/scripts/run_ablation.py" "$OUT_DIR" \ + --ablation mapping-paper-nomem --jobs "$MAX_JOBS" +echo "[$(ts)] Step 5: Ablation mapping without memory hierarchy complete." +echo "" + +# ── Step 6: Window size ablation ──────────────────────────────────────── + +echo "[$(ts)] Step 6: Running window size ablation..." python3 "$PROJECT_ROOT/scripts/run_ablation.py" "$OUT_DIR" \ --ablation window-size --jobs "$MAX_JOBS" -echo "[$(ts)] Step 5: Window size ablation complete." +echo "[$(ts)] Step 6: Window size ablation complete." echo "" -# ── Step 6: Crossbar width ablation ───────────────────────────────────── +# ── Step 7: Crossbar width ablation ───────────────────────────────────── -echo "[$(ts)] Step 6: Running crossbar width ablation..." +echo "[$(ts)] Step 7: Running crossbar width ablation..." python3 "$PROJECT_ROOT/scripts/run_ablation.py" "$OUT_DIR" \ --ablation crossbar-width --jobs "$MAX_JOBS" -echo "[$(ts)] Step 6: Crossbar width ablation complete." +echo "[$(ts)] Step 7: Crossbar width ablation complete." echo "" -# ── Step 7: K-reordering ablation ─────────────────────────────────────── +# ── Step 8: K-reordering ablation ─────────────────────────────────────── -echo "[$(ts)] Step 7: Running k-reordering ablation..." +echo "[$(ts)] Step 8: Running k-reordering ablation..." python3 "$PROJECT_ROOT/scripts/run_ablation.py" "$OUT_DIR" \ --ablation k-reordering --jobs "$MAX_JOBS" -echo "[$(ts)] Step 7: K-reordering ablation complete." +echo "[$(ts)] Step 8: K-reordering ablation complete." echo "" -# ── Step 8: Collect results ────────────────────────────────────────────── +# ── Step 9: Collect results ────────────────────────────────────────────── -echo "[$(ts)] Step 8: Collecting results..." +echo "[$(ts)] Step 9: Collecting results..." if [ -f "$PROJECT_ROOT/scripts/collect_results.py" ]; then python3 "$PROJECT_ROOT/scripts/collect_results.py" "$OUT_DIR" - echo "[$(ts)] Step 8: Results collected." + echo "[$(ts)] Step 9: Results collected." else echo "[run_all] WARNING: scripts/collect_results.py not found, skipping." fi echo "" -# ── Step 9: Generate plots ─────────────────────────────────────────────── +# ── Step 10: Generate plots ────────────────────────────────────────────── -echo "[$(ts)] Step 9: Generating plots..." +echo "[$(ts)] Step 10: Generating plots..." for plot_script in plot_overall.py plot_nonsquare.py plot_breakdown.py; do if [ -f "$PROJECT_ROOT/scripts/$plot_script" ]; then echo " Running $plot_script ..." @@ -244,6 +253,7 @@ if [ -f "$PROJECT_ROOT/scripts/plot_ablation_mapping.py" ]; then echo " Running plot_ablation_mapping.py ..." python3 "$PROJECT_ROOT/scripts/plot_ablation_mapping.py" \ --mem-csv "$OUT_DIR/ablation_mapping_suitesparse_results.csv" \ + --nomem-csv "$OUT_DIR/ablation_mapping_suitesparse_nomem_results.csv" \ --output "$OUT_DIR/plots/ablation_mapping.pdf" fi # Synthetic ablation plots (window size, crossbar width, k-reordering) @@ -251,7 +261,7 @@ if [ -f "$PROJECT_ROOT/scripts/plot_ablation.py" ]; then echo " Running plot_ablation.py ..." python3 "$PROJECT_ROOT/scripts/plot_ablation.py" "$OUT_DIR" fi -echo "[$(ts)] Step 9: Plots generated." +echo "[$(ts)] Step 10: Plots generated." echo "" # ── Step 10: Summary and comparison ────────────────────────────────────── diff --git a/scripts/setup.sh b/scripts/setup.sh index 5f58d1d..23ffbed 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -48,6 +48,20 @@ if ! command -v python3 &>/dev/null; then fi echo " python3 $(python3 --version 2>&1 | awk '{print $2}') ... OK" +if [ -z "${VIRTUAL_ENV:-}" ]; then + VENV_DIR="$PROJECT_ROOT/.venv" + if [ ! -d "$VENV_DIR" ]; then + echo " Creating local virtualenv at $VENV_DIR ..." + python3 -m venv "$VENV_DIR" + fi + # Use a repo-local virtualenv so setup works on PEP 668 systems. + # shellcheck disable=SC1091 + . "$VENV_DIR/bin/activate" + echo " Using virtualenv: $VIRTUAL_ENV" +else + echo " Using active virtualenv: $VIRTUAL_ENV" +fi + # Auto-install from requirements.txt if present REQ_FILE="$PROJECT_ROOT/requirements.txt" if [ -f "$REQ_FILE" ]; then @@ -64,7 +78,7 @@ for pkg in "${PYTHON_PACKAGES[@]}"; do fi if ! python3 -c "import $import_name" &>/dev/null; then echo "ERROR: Python package '$pkg' is not installed." >&2 - echo " Run: pip install -r requirements.txt" >&2 + echo " Run: . .venv/bin/activate && pip install -r requirements.txt" >&2 exit 1 fi echo " $pkg ... OK"