diff --git a/disturbance_config.py b/disturbance_config.py index 331608b..870367d 100644 --- a/disturbance_config.py +++ b/disturbance_config.py @@ -3,6 +3,7 @@ import os import glob import logging +from typing import Iterable, List, Sequence # -------------------------------------------------------------------- # LOGGING CONFIG @@ -22,7 +23,7 @@ INSECT_OUTPUT_DIR = os.path.join(INSECT_GDB_DIR, "Processed") INSECT_FINAL_DIR = os.path.join(INSECT_GDB_DIR, "Final") -# Hansen +# Hansen (Hansen Global Forest Change harvest proxy) HANSEN_INPUT_DIR = os.path.join(BASE_DIR, "Hansen") HANSEN_OUTPUT_DIR = os.path.join(HANSEN_INPUT_DIR, "Processed") @@ -71,6 +72,63 @@ def _resolve_tcc_path(year: int) -> str: # Thresholds (upper bounds) for assigning canopy-loss severity classes 1-4 NLCD_TCC_SEVERITY_BREAKS = [25, 50, 75, 100] +# -------------------------------------------------------------------- +# HARVEST WORKFLOW SELECTION +# -------------------------------------------------------------------- +# Two harvest/other processing options are supported: +# * "hansen" => harvest_other.py +# * "nlcd_tcc_severity" => harvest_other_severity.py +# Update HARVEST_WORKFLOW to toggle between the legacy Hansen workflow +# and the newer NLCD Tree Canopy Cover severity workflow. +HARVEST_WORKFLOW = "hansen" + +HARVEST_PRODUCTS = { + "hansen": { + "module": "harvest_other", + "description": "Hansen Global Forest Change based harvest/other", + "raster_directory": HANSEN_OUTPUT_DIR, + "raster_template": "hansen_{period}.tif", + "combined_output_subdir": "hansen", + }, + "nlcd_tcc_severity": { + "module": "harvest_other_severity", + "description": "NLCD Tree Canopy Cover change severity", + "raster_directory": NLCD_TCC_OUTPUT_DIR, + "raster_template": "nlcd_tcc_severity_{period}.tif", + "combined_output_subdir": "nlcd_tcc_severity", + }, +} + + +def harvest_product_config(workflow: str = None): + """Return the harvest configuration for the selected workflow.""" + + wf = workflow or HARVEST_WORKFLOW + if wf not in HARVEST_PRODUCTS: + raise ValueError( + f"Unknown harvest workflow '{wf}'. Valid options: {sorted(HARVEST_PRODUCTS)}" + ) + return dict(HARVEST_PRODUCTS[wf]) + + +def harvest_raster_path(period_name: str, workflow: str = None) -> str: + """Return the expected harvest raster path for the given period.""" + + config = harvest_product_config(workflow) + directory = config["raster_directory"] + template = config["raster_template"] + return os.path.join(directory, template.format(period=period_name)) + + +def final_combined_dir(workflow: str = None) -> str: + """Directory for final disturbance rasters for the selected harvest workflow.""" + + config = harvest_product_config(workflow) + combined_subdir = config.get("combined_output_subdir") or (workflow or HARVEST_WORKFLOW) + path = os.path.join(FINAL_COMBINED_ROOT_DIR, combined_subdir) + os.makedirs(path, exist_ok=True) + return path + # -------------------------------------------------------------------- # NLCD LAND COVER (reference raster for env) # -------------------------------------------------------------------- @@ -98,20 +156,29 @@ def nlcd_lc_path(year: int) -> str: # FINAL COMBINATION OUTPUTS # -------------------------------------------------------------------- INTERMEDIATE_COMBINED_DIR = os.path.join(BASE_DIR, "Intermediate") -FINAL_COMBINED_DIR = os.path.join(BASE_DIR, "FinalCombined") +FINAL_COMBINED_ROOT_DIR = os.path.join(BASE_DIR, "FinalCombined") +# Backwards compatibility: legacy callers may still reference FINAL_COMBINED_DIR +FINAL_COMBINED_DIR = FINAL_COMBINED_ROOT_DIR # Create directories if they don’t exist: -for _dir in [ +_BASE_DIRS = [ INSECT_OUTPUT_DIR, INSECT_FINAL_DIR, HANSEN_OUTPUT_DIR, NLCD_TCC_OUTPUT_DIR, FIRE_OUTPUT_DIR, INTERMEDIATE_COMBINED_DIR, - FINAL_COMBINED_DIR -]: + FINAL_COMBINED_ROOT_DIR, +] + +for _dir in _BASE_DIRS: os.makedirs(_dir, exist_ok=True) +for _cfg in HARVEST_PRODUCTS.values(): + combined_subdir = _cfg.get("combined_output_subdir") or "" + if combined_subdir: + os.makedirs(os.path.join(FINAL_COMBINED_ROOT_DIR, combined_subdir), exist_ok=True) + # -------------------------------------------------------------------- # REGIONS & TIME PERIODS # -------------------------------------------------------------------- @@ -154,3 +221,77 @@ def nlcd_lc_path(year: int) -> str: r"C:\GIS\Data\LEARN\Disturbances\Hansen\GFW2023_60N_120W.tif", r"C:\GIS\Data\LEARN\Disturbances\Hansen\GFW2023_60N_130W.tif", ] + + +def _hansen_tile_id(tile_path: str) -> str: + """Return the bare tile identifier (filename without extension).""" + + base = os.path.basename(tile_path) + stem, _ = os.path.splitext(base) + return stem + + +_HANSEN_TILE_LOOKUP = { + _hansen_tile_id(path).upper(): path + for path in HANSEN_TILES +} + + +def normalize_tile_ids(tile_ids: Sequence[str]) -> List[str]: + """Normalize user-provided Hansen tile identifiers.""" + + normed: List[str] = [] + for raw in tile_ids: + if raw is None: + continue + tile = raw.strip().upper() + if not tile: + continue + tile = tile.replace(".TIF", "") + if tile in _HANSEN_TILE_LOOKUP: + normed.append(tile) + continue + prefixed = tile if tile.startswith("GFW") else f"GFW2023_{tile.lstrip('_')}" + normed.append(prefixed) + return normed + + +def hansen_tile_paths(tile_ids: Iterable[str] | None = None) -> List[str]: + """Return Hansen tile paths filtered by ``tile_ids`` if provided.""" + + if not tile_ids: + return list(HANSEN_TILES) + + selected: List[str] = [] + missing: List[str] = [] + for tile in normalize_tile_ids(tile_ids): + path = _HANSEN_TILE_LOOKUP.get(tile) + if path: + selected.append(path) + else: + missing.append(tile) + + if missing: + logging.warning( + "Ignored %d Hansen tile id(s) with no match: %s", + len(missing), + ", ".join(sorted(set(missing))), + ) + + # Deduplicate while preserving order of input sequence + seen = set() + ordered: List[str] = [] + for path in selected: + if path not in seen: + ordered.append(path) + seen.add(path) + + if not ordered: + logging.error("No Hansen tiles matched the provided ids. Using full tile list instead.") + return list(HANSEN_TILES) + + logging.info("Selected %d Hansen tile(s).", len(ordered)) + for path in ordered: + logging.info(" %s", path) + + return ordered diff --git a/final_disturbance.py b/final_disturbance.py index fb4d209..7c69c19 100644 --- a/final_disturbance.py +++ b/final_disturbance.py @@ -1,7 +1,16 @@ # final_disturbance.py """ Combines fire, insect, and harvest rasters via CellStatistics (MAX) -and optionally masks out low-severity fire. +and optionally masks out low-severity fire. The harvest raster source is +controlled by disturbance_config.HARVEST_WORKFLOW so either the legacy +Hansen workflow or the newer NLCD Tree Canopy Cover severity workflow can +be used without code changes. + +Example +------- +``` +python final_disturbance.py +``` """ import arcpy @@ -28,33 +37,49 @@ def main(): arcpy.env.outputCoordinateSystem = cfg.NLCD_RASTER arcpy.env.overwriteOutput = True + harvest_cfg = cfg.harvest_product_config() + logging.info( + "Using harvest workflow '%s' => %s", + cfg.HARVEST_WORKFLOW, + harvest_cfg.get("description", ""), + ) + logging.info("Harvest rasters directory => %s", harvest_cfg.get("raster_directory")) + + final_output_dir = cfg.final_combined_dir() + logging.info("Final disturbance outputs will be written to => %s", final_output_dir) + for period in cfg.TIME_PERIODS.keys(): # Build paths fire_path = os.path.join(cfg.FIRE_OUTPUT_DIR, f"fire_{period}.tif") insect_path = os.path.join(cfg.INSECT_FINAL_DIR, f"insect_damage_{period}.tif") - hansen_path = os.path.join(cfg.HANSEN_OUTPUT_DIR, f"hansen_{period}.tif") + harvest_path = cfg.harvest_raster_path(period) if not (arcpy.Exists(fire_path) and arcpy.Exists(insect_path) and - arcpy.Exists(hansen_path)): + arcpy.Exists(harvest_path)): logging.warning(f"Missing one or more rasters for period={period}. Skipping.") continue - logging.info(f"Combining Fire={fire_path}, Insect={insect_path}, Harvest={hansen_path}") + logging.info( + "Combining Fire=%s, Insect=%s, Harvest=%s", + fire_path, + insect_path, + harvest_path, + ) # Max combination fire_ras = Raster(fire_path) insect_ras = Raster(insect_path) - hansen_ras = Raster(hansen_path) + harvest_ras = Raster(harvest_path) - combined_max = CellStatistics([fire_ras, insect_ras, hansen_ras], "MAXIMUM", "DATA") + combined_max = CellStatistics([fire_ras, insect_ras, harvest_ras], "MAXIMUM", "DATA") # Example: mask out code=3 => 0 (low severity fire). # If you do NOT want to mask out code=3, skip this step. final_dist = Con(combined_max == 3, 0, combined_max) # Save - out_raster_path = os.path.join(cfg.FINAL_COMBINED_DIR, f"disturb_{period}.tif") + out_raster_path = os.path.join(final_output_dir, f"disturb_{period}.tif") final_dist.save(out_raster_path) logging.info(f"Final combined disturbance => {out_raster_path}") diff --git a/fire.py b/fire.py index 6d1547a..7d56fa8 100644 --- a/fire.py +++ b/fire.py @@ -11,6 +11,12 @@ - For each time period in cfg.TIME_PERIODS, gather the reclassified rasters for each year in that period, perform CellStatistics (MAX), and save the final as fire_{period}.tif. + +Example +------- +``` +python fire.py +``` """ import arcpy diff --git a/harvest_other.py b/harvest_other.py index b4d73e9..5614be8 100644 --- a/harvest_other.py +++ b/harvest_other.py @@ -4,16 +4,64 @@ 1) (Optional) Mosaicking multiple tiles (if not already done) 2) (Optional) Reprojecting the mosaic to match NLCD (if not already done) 3) Extracting disturbance in the specified time periods + +This is the legacy harvest workflow invoked when +``disturbance_config.HARVEST_WORKFLOW == "hansen"``. + +Example +------- +Run against a pair of tiles (identified by the Hansen filename stem): + +``` +python harvest_other.py --tile-id GFW2023_40N_090W --tile-id 40N_100W +``` + +Omit ``--tile-id`` to process all configured tiles. """ import arcpy import os import logging import sys +import argparse +from typing import Iterable, Sequence import disturbance_config as cfg -def main(): + +def _parse_cli_args(argv: Sequence[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Process Hansen harvest/other rasters, optionally filtered to specific tiles.", + ) + parser.add_argument( + "--tile-id", + dest="tile_ids", + action="append", + default=[], + metavar="TILE_ID", + help=( + "Limit processing to a Hansen tile id. Repeat for multiple ids. " + "The value may be the full stem (e.g. GFW2023_40N_090W) or the core " + "row/column portion (e.g. 40N_090W)." + ), + ) + parser.add_argument( + "--tiles", + dest="tile_csv", + metavar="ID1,ID2", + help="Comma-separated list of Hansen tile ids to include.", + ) + return parser.parse_args(argv) + + +def _combine_tile_args(tile_ids: Iterable[str], csv: str | None) -> list[str]: + combined = list(tile_ids) if tile_ids else [] + if csv: + combined.extend(part.strip() for part in csv.split(",") if part.strip()) + return combined + + +def main(tile_ids: Iterable[str] | None = None): """ Creates a single Hansen mosaic, reprojects it to match NLCD, then for each time period, extracts disturbance (1) vs no-disturbance (0). @@ -30,6 +78,10 @@ def main(): arcpy.env.cellSize = cfg.NLCD_RASTER arcpy.env.outputCoordinateSystem = cfg.NLCD_RASTER + # Resolve tile selection (if any) + tile_ids = list(tile_ids) if tile_ids else [] + selected_tiles = cfg.hansen_tile_paths(tile_ids) + # Output paths hansen_mosaic_raw = os.path.join(cfg.HANSEN_OUTPUT_DIR, "hansen_mosaic_raw.tif") hansen_mosaic_reproj = os.path.join(cfg.HANSEN_OUTPUT_DIR, "hansen_mosaic_reproj.tif") @@ -37,12 +89,12 @@ def main(): # 1) Mosaic (only if needed) if not arcpy.Exists(hansen_mosaic_raw): logging.info("Mosaicking Hansen tiles into a single raw raster:") - for t in cfg.HANSEN_TILES: + for t in selected_tiles: logging.info(f" {t}") logging.info(f"Creating mosaic => {hansen_mosaic_raw}") arcpy.management.MosaicToNewRaster( - input_rasters=cfg.HANSEN_TILES, + input_rasters=selected_tiles, output_location=os.path.dirname(hansen_mosaic_raw), raster_dataset_name_with_extension=os.path.basename(hansen_mosaic_raw), coordinate_system_for_the_raster="", @@ -54,6 +106,12 @@ def main(): ) else: logging.info(f"Mosaic already exists => {hansen_mosaic_raw}. Skipping mosaic step.") + if tile_ids: + logging.info( + "Tile filter requested (%s) but existing mosaic will be reused. Delete the mosaic " + "if you need to rebuild it from only the specified tiles.", + ", ".join(tile_ids), + ) # 2) Reproject (only if needed) if not arcpy.Exists(hansen_mosaic_reproj): @@ -67,6 +125,12 @@ def main(): ) else: logging.info(f"Reprojected mosaic already exists => {hansen_mosaic_reproj}. Skipping reproject step.") + if tile_ids: + logging.info( + "Tile filter requested (%s) but existing reprojected mosaic will be reused. Delete the " + "reprojected raster if you need a tile-specific product.", + ", ".join(tile_ids), + ) # 3) Extract time periods from the reprojected mosaic from arcpy.sa import Raster, Con @@ -99,4 +163,6 @@ def main(): logging.info("Hansen 'harvest/other' processing completed successfully.") if __name__ == "__main__": - main() + args = _parse_cli_args() + combined_tiles = _combine_tile_args(args.tile_ids, args.tile_csv) + main(tile_ids=combined_tiles) diff --git a/harvest_other_severity.py b/harvest_other_severity.py index 66f2fa0..de7c8b6 100644 --- a/harvest_other_severity.py +++ b/harvest_other_severity.py @@ -10,11 +10,25 @@ This version prefers the NLCD Land Cover (2021) raster as the global reference for ArcPy env; if missing, it will fall back to the period's end-year TCC raster. + +Set ``disturbance_config.HARVEST_WORKFLOW = "nlcd_tcc_severity"`` to integrate +these severity rasters into the final disturbance combination workflow. + +Example +------- +``` +python harvest_other_severity.py --tile-id GFW2023_40N_090W +``` + +The ``--tile-id`` filter is accepted for consistency with the Hansen workflow +but currently logged and ignored because NLCD TCC rasters are not tiled. """ import os import logging import arcpy +import argparse +from typing import Iterable, Sequence import disturbance_config as cfg @@ -60,8 +74,46 @@ def _classify_loss(loss_raster): ) -def main(): +def _parse_cli_args(argv: Sequence[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Derive harvest/other severity rasters from NLCD Tree Canopy Cover. " + "Tile filters are accepted for parity with the Hansen workflow " + "but will be ignored because the rasters are not tiled." + ) + ) + parser.add_argument( + "--tile-id", + dest="tile_ids", + action="append", + default=[], + metavar="TILE_ID", + help="Optional Hansen-style tile ids (ignored, logged for awareness).", + ) + parser.add_argument( + "--tiles", + dest="tile_csv", + metavar="ID1,ID2", + help="Comma-separated list of tile ids (ignored, logged).", + ) + return parser.parse_args(argv) + + +def _combine_tile_args(tile_ids: Iterable[str], csv: str | None) -> list[str]: + combined = list(tile_ids) if tile_ids else [] + if csv: + combined.extend(part.strip() for part in csv.split(",") if part.strip()) + return combined + + +def main(tile_ids: Iterable[str] | None = None): logging.info("Starting NLCD Tree Canopy change-based harvest/other processing.") + tile_ids = list(tile_ids) if tile_ids else [] + if tile_ids: + logging.info( + "Tile filter requested (%s) but NLCD TCC rasters are not tiled; proceeding with full extent.", + ", ".join(tile_ids), + ) arcpy.env.overwriteOutput = True arcpy.CheckOutExtension("Spatial") try: @@ -134,4 +186,6 @@ def main(): if __name__ == "__main__": - main() + args = _parse_cli_args() + combined_tiles = _combine_tile_args(args.tile_ids, args.tile_csv) + main(tile_ids=combined_tiles) diff --git a/insect_disease_merge.py b/insect_disease_merge.py index 88a644d..06e235c 100644 --- a/insect_disease_merge.py +++ b/insect_disease_merge.py @@ -2,6 +2,12 @@ """ Merges per-region insect/disease rasters (values {0,5}) into a single CONUS raster for each time period. + +Example +------- +``` +python insect_disease_merge.py +``` """ import arcpy diff --git a/insect_disease_process.py b/insect_disease_process.py index dd1700f..62100c9 100644 --- a/insect_disease_process.py +++ b/insect_disease_process.py @@ -2,6 +2,12 @@ Processes raw Insect/Disease data by reading ESRI FileGDB layers using OGR, filtering by year, and rasterizing each region to match the NLCD resolution/extent. Must be run in a GDAL/OGR environment. + +Example +------- +``` +python insect_disease_process.py --period 2019_2021 +``` """ import os diff --git a/pull_tiles.py b/pull_tiles.py index 7e2a407..b2bb5a3 100644 --- a/pull_tiles.py +++ b/pull_tiles.py @@ -1,3 +1,12 @@ +"""Utility script for listing Hansen tiles that overlap a target raster. + +Example +------- +``` +python pull_tiles.py +``` +""" + import os import glob import sys diff --git a/run_disturbance.py b/run_disturbance.py index cd28743..ffb49da 100644 --- a/run_disturbance.py +++ b/run_disturbance.py @@ -3,23 +3,99 @@ Orchestrates the ArcPy-based disturbance-processing workflow, in order: 1) insect_disease_merge.py -2) harvest_other.py +2) harvest workflow (harvest_other.py or harvest_other_severity.py) 3) fire.py 4) final_disturbance.py IMPORTANT: You must run 'insect_disease_process.py' in a separate GDAL environment prior to this script. + +Example +------- +Run only the harvest and final combination steps for two Hansen tiles: + +``` +python run_disturbance.py --steps harvest final --tile-id GFW2023_40N_090W --tile-id 40N_100W +``` + +Omit ``--steps`` to execute the full pipeline. """ import logging # Import each script +import importlib +import argparse +from typing import Iterable, Sequence + +import disturbance_config as cfg import insect_disease_merge -import harvest_other import fire import final_disturbance -def main(): +STEP_SEQUENCE = [ + ("insect_merge", "Insect/disease merge", insect_disease_merge.main), + ("harvest", "Harvest workflow", None), # special handling below + ("fire", "Fire processing", fire.main), + ("final", "Final disturbance combination", final_disturbance.main), +] + + +def _load_harvest_module(): + harvest_cfg = cfg.harvest_product_config() + module_name = harvest_cfg["module"] + logging.info( + "Selected harvest workflow '%s' => %s (module: %s)", + cfg.HARVEST_WORKFLOW, + harvest_cfg.get("description", ""), + module_name, + ) + try: + return importlib.import_module(module_name) + except ImportError as exc: + raise ImportError( + f"Unable to import harvest workflow module '{module_name}'" + ) from exc + + +def _parse_cli_args(argv: Sequence[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Run the disturbance-processing workflow end-to-end or for specific steps.", + ) + parser.add_argument( + "--steps", + nargs="+", + choices=[name for name, _, _ in STEP_SEQUENCE], + help=( + "Names of the steps to run. Choices: " + + ", ".join(name for name, _, _ in STEP_SEQUENCE) + ), + ) + parser.add_argument( + "--tile-id", + dest="tile_ids", + action="append", + default=[], + metavar="TILE_ID", + help="Optional Hansen tile id filter passed through to the harvest step.", + ) + parser.add_argument( + "--tiles", + dest="tile_csv", + metavar="ID1,ID2", + help="Comma-separated list of tile ids to include (forwarded to harvest).", + ) + return parser.parse_args(argv) + + +def _combine_tile_args(tile_ids: Iterable[str], csv: str | None) -> list[str]: + combined = list(tile_ids) if tile_ids else [] + if csv: + combined.extend(part.strip() for part in csv.split(",") if part.strip()) + return combined + + +def main(selected_steps: Sequence[str] | None = None, tile_ids: Iterable[str] | None = None): """ Runs the ArcPy-based scripts in sequence. Make sure 'insect_disease_process.py' is already done. @@ -27,19 +103,33 @@ def main(): logging.info("========== Disturbance Workflow Started ==========") logging.warning("Ensure 'insect_disease_process.py' has been run in the GDAL environment first.") - # 1) Merge insect/disease - insect_disease_merge.main() - - # 2) Harvest/other - harvest_other.main() + steps_to_run = list(selected_steps) if selected_steps else [name for name, _, _ in STEP_SEQUENCE] + valid_steps = {name for name, _, _ in STEP_SEQUENCE} + invalid = [step for step in steps_to_run if step not in valid_steps] + if invalid: + raise ValueError(f"Unknown step name(s): {invalid}") - # 3) Fire - fire.main() + harvest_module = None + for step_name, step_label, step_func in STEP_SEQUENCE: + if step_name not in steps_to_run: + logging.info("Skipping %s step (not requested).", step_label) + continue - # 4) Final combination - final_disturbance.main() + logging.info("---- Running step: %s ----", step_label) + if step_name == "harvest": + if harvest_module is None: + harvest_module = _load_harvest_module() + try: + harvest_module.main(tile_ids=tile_ids) + except TypeError: + # Backwards compatibility if workflow has not been updated to accept tile_ids + harvest_module.main() + else: + step_func() logging.info("========== Disturbance Workflow Complete ==========") if __name__ == "__main__": - main() + args = _parse_cli_args() + combined_tiles = _combine_tile_args(args.tile_ids, args.tile_csv) + main(selected_steps=args.steps, tile_ids=combined_tiles or None) diff --git a/run_insect_disease.py b/run_insect_disease.py index 701be4a..1f00147 100644 --- a/run_insect_disease.py +++ b/run_insect_disease.py @@ -6,6 +6,12 @@ for a hardcoded list of time periods. Simply edit HARDCODED_PERIODS below to add/remove periods. + +Example +------- +``` +python run_insect_disease.py +``` """ import logging