diff --git a/final_disturbance.py b/final_disturbance.py index d624bbb..d878415 100644 --- a/final_disturbance.py +++ b/final_disturbance.py @@ -1,7 +1,7 @@ # final_disturbance.py """ Combines fire, insect, and harvest rasters via CellStatistics (MAX). -Saves to a centralized NLCD_harvest_severity folder structure with LZW compression. +Saves to a centralized NLCD_harvest_severity folder structure without compression. Final codes: 1–4 : Harvest severity (as provided by the chosen harvest workflow) @@ -29,19 +29,22 @@ def _set_env_from_dataset(ds): arcpy.env.outputCoordinateSystem = d.spatialReference logging.info("ArcPy env initialized from: %s", ds) -def _save_byte_tif(ras, out_tif): +def _save_byte_tif(ras, out_tif, *, overwrite=False): + if not overwrite and _exists(out_tif): + logging.info("Skipping existing output: %s", out_tif) + return False try: if arcpy.Exists(out_tif): arcpy.management.Delete(out_tif) except Exception: pass - with arcpy.EnvManager(compression="LZW", pyramid="NONE"): - arcpy.management.CopyRaster(ras, out_tif, pixel_type="8_BIT_UNSIGNED", format="TIFF") + arcpy.management.CopyRaster(ras, out_tif, pixel_type="8_BIT_UNSIGNED", format="TIFF") if getattr(cfg, "COMPUTE_OUTPUT_STATS", False): try: arcpy.management.CalculateStatistics(out_tif) except Exception: pass + return True def _mask_low_severity_fire(fire_ras: Raster) -> Raster: mask_fire = getattr(cfg, "MASK_LOW_SEVERITY_FIRE", True) @@ -90,7 +93,7 @@ def main(): lines.append(f" - {name:<7}: {p} [{'OK' if _exists(p) else 'MISSING'}]") logging.warning("\n".join(lines)) logging.error("Skipping period=%s due to missing inputs: %s", period, ", ".join(missing)) - skipped.append((period, missing)) + skipped.append({"period": period, "reason": "missing inputs", "details": missing}) continue fire_ras = Raster(fire_path) @@ -107,33 +110,58 @@ def main(): # Combined MAX (DATA) using standardized codes combined_max = CellStatistics([fire_final, insect_final, harvest_ras], "MAXIMUM", "DATA") - # Save combined out_combined = os.path.join(final_out_dir, f"disturb_{method_tag}_{period}.tif") - _save_byte_tif(combined_max, out_combined) - logging.info("Final combined disturbance => %s", out_combined) + out_harvest_only = os.path.join(cfg.NLCD_FINAL_HARVEST_ONLY_DIR, f"harvest_counted_{method_tag}_{period}.tif") + out_insect = os.path.join(cfg.NLCD_FINAL_INSECT_DIR, f"insect_{period}.tif") + out_fire = os.path.join(cfg.NLCD_FINAL_FIRE_DIR, f"fire_{period}.tif") + + outputs = { + "combined": out_combined, + "harvest_only": out_harvest_only, + "insect": out_insect, + "fire": out_fire, + } + if all(_exists(path) for path in outputs.values()): + logging.info("All final disturbance outputs already exist for %s; skipping.", period) + skipped.append({"period": period, "reason": "existing outputs", "details": list(outputs.values())}) + continue + + wrote_any = False + + if _save_byte_tif(combined_max, out_combined): + logging.info("Final combined disturbance => %s", out_combined) + wrote_any = True - # Export “what counts as harvest” after masking out fire/insect: - # keep harvest 1–4 only where fire==0 and insect==0 harvest_counted = Con((fire_final > 0) | (insect_final > 0), 0, harvest_ras) - out_harvest_only = os.path.join(cfg.NLCD_FINAL_HARVEST_ONLY_DIR, f"harvest_counted_{method_tag}_{period}.tif") - _save_byte_tif(harvest_counted, out_harvest_only) - logging.info("Harvest counted (masked by fire/insect) => %s", out_harvest_only) + if _save_byte_tif(harvest_counted, out_harvest_only): + logging.info("Harvest counted (masked by fire/insect) => %s", out_harvest_only) + wrote_any = True - # Convenience exports of presence layers - out_insect = os.path.join(cfg.NLCD_FINAL_INSECT_DIR, f"insect_{period}.tif") - _save_byte_tif(insect_final, out_insect) + if _save_byte_tif(insect_final, out_insect): + wrote_any = True - out_fire = os.path.join(cfg.NLCD_FINAL_FIRE_DIR, f"fire_{period}.tif") - _save_byte_tif(fire_final, out_fire) + if _save_byte_tif(fire_final, out_fire): + wrote_any = True - processed.append(period) + if wrote_any: + processed.append(period) + else: + skipped.append({"period": period, "reason": "existing outputs", "details": list(outputs.values())}) logging.info("Run summary -> processed: %d, skipped: %d", len(processed), len(skipped)) if processed: logging.info("Processed periods: %s", ", ".join(processed)) if skipped: - for period, miss in skipped: - logging.warning("Skipped %s (missing: %s)", period, ", ".join(miss)) + for item in skipped: + reason = item.get("reason", "unknown reason") + period = item.get("period") + details = item.get("details") + if reason == "missing inputs" and details: + logging.warning("Skipped %s (missing: %s)", period, ", ".join(details)) + elif reason == "existing outputs": + logging.info("Skipped %s (all outputs already exist)", period) + else: + logging.info("Skipped %s (%s)", period, reason) arcpy.CheckInExtension("Spatial") logging.info("final_disturbance.py completed.") diff --git a/gdal_compress_outputs.py b/gdal_compress_outputs.py new file mode 100644 index 0000000..93e0da1 --- /dev/null +++ b/gdal_compress_outputs.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +"""Retroactively compress disturbance outputs using GDAL.""" + +import argparse +import logging +import os +import shutil +import subprocess +import sys +import tempfile +from typing import Iterable, Sequence, List, Optional + +import disturbance_config as cfg + + +DEFAULT_CREATION_OPTS = ("COMPRESS=LZW", "TILED=YES") + + +def _existing_dirs(dirs: Iterable[str]) -> List[str]: + out: List[str] = [] + for d in dirs: + if os.path.isdir(d): + out.append(d) + else: + logging.warning("Skipping missing directory: %s", d) + return out + + +def _default_dirs() -> List[str]: + return [ + cfg.NLCD_FINAL_DIR, + cfg.NLCD_FINAL_HARVEST_ONLY_DIR, + cfg.NLCD_FINAL_INSECT_DIR, + cfg.NLCD_FINAL_FIRE_DIR, + cfg.NLCD_TCC_CHANGE_DIR, + cfg.NLCD_TCC_PCT_CHANGE_DIR, + cfg.NLCD_HARVEST_SEVERITY_DIR, + cfg.NLCD_HARVEST_PCT_SEV_DIR, + ] + + +def _find_tifs(directories: Sequence[str]) -> List[str]: + rasters: List[str] = [] + for d in directories: + for root, _, files in os.walk(d): + for name in files: + if name.lower().endswith(".tif"): + rasters.append(os.path.join(root, name)) + return sorted(rasters) + + +def _compress_raster(path: str, *, overwrite: bool, dry_run: bool, creation_opts: Sequence[str]) -> bool: + target_path = path if overwrite else f"{os.path.splitext(path)[0]}_lzw.tif" + + if not overwrite and os.path.exists(target_path): + logging.info("Skipping (already exists): %s", target_path) + return True + + logging.info("Compressing: %s", path) + if dry_run: + return True + + if shutil.which("gdal_translate") is None: + raise RuntimeError("gdal_translate not found on PATH; install GDAL or adjust PATH.") + + directory = os.path.dirname(path) + base = os.path.basename(path) + with tempfile.NamedTemporaryFile(dir=directory, prefix=f".{base}", suffix=".tmp.tif", delete=False) as tmp: + tmp_path = tmp.name + + cmd = ["gdal_translate", path, tmp_path] + for opt in creation_opts: + cmd.extend(["-co", opt]) + + logging.debug("Running: %s", " ".join(cmd)) + result = subprocess.run(cmd, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + if result.returncode != 0: + logging.error("gdal_translate failed for %s\nSTDOUT: %s\nSTDERR: %s", path, result.stdout, result.stderr) + os.unlink(tmp_path) + return False + + if overwrite: + os.replace(tmp_path, path) + else: + os.replace(tmp_path, target_path) + logging.info("Compressed copy written to %s", target_path) + return True + + +def parse_args(argv: Sequence[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Compress disturbance rasters using GDAL.") + parser.add_argument("dirs", nargs="*", help="Directories to search for GeoTIFF files. Defaults to standard output folders.") + parser.add_argument("--overwrite", action="store_true", help="Replace originals in-place (default writes *_lzw.tif copies).") + parser.add_argument("--dry-run", action="store_true", help="List rasters that would be compressed without running GDAL.") + parser.add_argument( + "--creation-option", + "-co", + action="append", + dest="creation_options", + help="Creation options to pass to gdal_translate (may be repeated). Default: COMPRESS=LZW, TILED=YES.", + ) + parser.add_argument("--log-level", default="INFO", help="Logging level (default: INFO)") + return parser.parse_args(argv) + + +def main(argv: Optional[Sequence[str]] = None) -> int: + args = parse_args(argv or sys.argv[1:]) + logging.basicConfig(level=getattr(logging, args.log_level.upper(), logging.INFO), + format="%(asctime)s [%(levelname)s] %(message)s") + + dirs = args.dirs or _default_dirs() + dirs = _existing_dirs(dirs) + if not dirs: + logging.error("No valid directories to scan.") + return 1 + + rasters = _find_tifs(dirs) + if not rasters: + logging.warning("No GeoTIFF files found in: %s", ", ".join(dirs)) + return 0 + + creation_opts = args.creation_options or list(DEFAULT_CREATION_OPTS) + + success = True + for path in rasters: + ok = _compress_raster(path, overwrite=args.overwrite, dry_run=args.dry_run, creation_opts=creation_opts) + success &= ok + + if args.dry_run: + logging.info("Dry run complete. %d rasters would be processed.", len(rasters)) + elif success: + logging.info("Compression complete. Processed %d rasters.", len(rasters)) + else: + logging.error("Compression completed with errors.") + return 2 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/harvest_other_severity.py b/harvest_other_severity.py index 9daa298..42eb098 100644 --- a/harvest_other_severity.py +++ b/harvest_other_severity.py @@ -18,19 +18,22 @@ def _set_env(ds): arcpy.env.extent = d.extent arcpy.env.outputCoordinateSystem = d.spatialReference -def _save_tif(ras, out_tif, pixel_type): +def _save_tif(ras, out_tif, pixel_type, *, overwrite=False): + if not overwrite and _exists(out_tif): + logging.info("Skipping existing output: %s", out_tif) + return False try: if arcpy.Exists(out_tif): arcpy.management.Delete(out_tif) except Exception: pass - with arcpy.EnvManager(compression="LZW", pyramid="NONE"): - arcpy.management.CopyRaster(ras, out_tif, pixel_type=pixel_type, format="TIFF") + arcpy.management.CopyRaster(ras, out_tif, pixel_type=pixel_type, format="TIFF") if getattr(cfg, "COMPUTE_OUTPUT_STATS", False): try: arcpy.management.CalculateStatistics(out_tif) except Exception: pass + return True def _classify_loss(loss_r): b1, b2, b3, b4 = sorted(cfg.NLCD_TCC_SEVERITY_BREAKS) @@ -73,6 +76,17 @@ def main(): logging.error("Skipping %s (missing TCC).", pname) continue + out_change = os.path.join(cfg.NLCD_TCC_CHANGE_DIR, f"nlcd_tcc_change_{pname}.tif") + out_severity = os.path.join(cfg.NLCD_HARVEST_SEVERITY_DIR, f"nlcd_tcc_severity_{pname}.tif") + + write_change = getattr(cfg, "WRITE_PP_CHANGE", True) + expected = [out_severity] + if write_change: + expected.append(out_change) + if all(_exists(p) for p in expected): + logging.info("Outputs already exist for %s; skipping.", pname) + continue + start_r, end_r = Raster(sp), Raster(ep) inv_s = IsNull(start_r) | (start_r < 0) | (start_r > 100) inv_e = IsNull(end_r) | (end_r < 0) | (end_r > 100) @@ -82,19 +96,16 @@ def main(): dpp = SetNull(IsNull(s_ok) | IsNull(e_ok), e_ok - s_ok) dpp = Con(dpp < -100, -100, Con(dpp > 100, 100, dpp)) - out_change = os.path.join(cfg.NLCD_TCC_CHANGE_DIR, f"nlcd_tcc_change_{pname}.tif") - out_severity = os.path.join(cfg.NLCD_HARVEST_SEVERITY_DIR, f"nlcd_tcc_severity_{pname}.tif") - - if getattr(cfg, "WRITE_PP_CHANGE", True): - _save_tif(Int(dpp), out_change, "16_BIT_SIGNED") - logging.info("Saved pp change => %s", out_change) + if write_change: + if _save_tif(Int(dpp), out_change, "16_BIT_SIGNED"): + logging.info("Saved pp change => %s", out_change) else: logging.info("WRITE_PP_CHANGE=False; skipping write of %s", out_change) loss = Con(dpp < 0, Abs(dpp), 0) sev = _classify_loss(loss) - _save_tif(sev, out_severity, "8_BIT_UNSIGNED") - logging.info("Saved pp-based severity => %s", out_severity) + if _save_tif(sev, out_severity, "8_BIT_UNSIGNED"): + logging.info("Saved pp-based severity => %s", out_severity) logging.info("Completed pp-change processing.") finally: arcpy.CheckInExtension("Spatial") diff --git a/harvest_other_severity_percent.py b/harvest_other_severity_percent.py index b133575..176ea8c 100644 --- a/harvest_other_severity_percent.py +++ b/harvest_other_severity_percent.py @@ -22,19 +22,22 @@ def _set_env(ds): arcpy.env.extent = d.extent arcpy.env.outputCoordinateSystem = d.spatialReference -def _save_tif(ras, out_tif, pixel_type): +def _save_tif(ras, out_tif, pixel_type, *, overwrite=False): + if not overwrite and _exists(out_tif): + logging.info("Skipping existing output: %s", out_tif) + return False try: if arcpy.Exists(out_tif): arcpy.management.Delete(out_tif) except Exception: pass - with arcpy.EnvManager(compression="LZW", pyramid="NONE"): - arcpy.management.CopyRaster(ras, out_tif, pixel_type=pixel_type, format="TIFF") + arcpy.management.CopyRaster(ras, out_tif, pixel_type=pixel_type, format="TIFF") if getattr(cfg, "COMPUTE_OUTPUT_STATS", False): try: arcpy.management.CalculateStatistics(out_tif) except Exception: pass + return True def _pct_breaks(): return getattr(cfg, "NLCD_TCC_PCT_SEVERITY_BREAKS", cfg.NLCD_TCC_SEVERITY_BREAKS) @@ -82,6 +85,17 @@ def main(): logging.error("Skipping %s (missing TCC).", pname) continue + out_change = os.path.join(cfg.NLCD_TCC_PCT_CHANGE_DIR, f"nlcd_tcc_pct_change_{pname}.tif") + out_severity = os.path.join(cfg.NLCD_HARVEST_PCT_SEV_DIR, f"nlcd_tcc_pct_severity_{pname}.tif") + + write_change = getattr(cfg, "WRITE_PCT_CHANGE", True) + expected = [out_severity] + if write_change: + expected.append(out_change) + if all(_exists(p) for p in expected): + logging.info("Outputs already exist for %s; skipping.", pname) + continue + start_r, end_r = Raster(sp), Raster(ep) inv_s = IsNull(start_r) | (start_r < 0) | (start_r > 100) @@ -94,23 +108,20 @@ def main(): pct = SetNull(IsNull(s_ok) | IsNull(e_ok), Float((e_ok - s_ok) / s_ok * 100)) pct = Con(pct < -100, -100, Con(pct > 100, 100, pct)) - out_change = os.path.join(cfg.NLCD_TCC_PCT_CHANGE_DIR, f"nlcd_tcc_pct_change_{pname}.tif") - out_severity = os.path.join(cfg.NLCD_HARVEST_PCT_SEV_DIR, f"nlcd_tcc_pct_severity_{pname}.tif") - - if getattr(cfg, "WRITE_PCT_CHANGE", True): + if write_change: if SAVE_PCT_AS_INT16: - _save_tif(Int(pct * PCT_SCALE), out_change, "16_BIT_SIGNED") - logging.info("Saved percent change INT16 (scale=%s) => %s", PCT_SCALE, out_change) + if _save_tif(Int(pct * PCT_SCALE), out_change, "16_BIT_SIGNED"): + logging.info("Saved percent change INT16 (scale=%s) => %s", PCT_SCALE, out_change) else: - _save_tif(pct, out_change, "32_BIT_FLOAT") - logging.info("Saved percent change FLOAT32 => %s", out_change) + if _save_tif(pct, out_change, "32_BIT_FLOAT"): + logging.info("Saved percent change FLOAT32 => %s", out_change) else: logging.info("WRITE_PCT_CHANGE=False; skipping write of %s", out_change) loss = Con(pct < 0, Abs(pct), 0) sev = _classify_loss(loss) - _save_tif(sev, out_severity, "8_BIT_UNSIGNED") - logging.info("Saved percent-based severity => %s", out_severity) + if _save_tif(sev, out_severity, "8_BIT_UNSIGNED"): + logging.info("Saved percent-based severity => %s", out_severity) logging.info("Completed percent-change processing.") finally: