diff --git a/data/helios/togo_2020/config.json b/data/helios/togo_2020/config.json new file mode 100644 index 00000000..8f9cd926 --- /dev/null +++ b/data/helios/togo_2020/config.json @@ -0,0 +1,126 @@ +{ + "layers": { + "label": { + "type": "vector" + }, + "sentinel1_ascending": { + "band_sets": [ + { + "bands": [ + "vv", + "vh" + ], + "dtype": "float32" + } + ], + "data_source": { + "cache_dir": "cache/planetary_computer", + "duration": "366d", + "ingest": false, + "name": "rslp.satlas.data_sources.MonthlySentinel1", + "query": { + "sar:instrument_mode": { + "eq": "IW" + }, + "sar:polarizations": { + "eq": [ + "VV", + "VH" + ] + }, + "sat:orbit_state": { + "eq": "ascending" + } + }, + "query_config": { + "max_matches": 12 + }, + "time_offset": "-180d" + }, + "type": "raster" + }, + "sentinel1_descending": { + "band_sets": [ + { + "bands": [ + "vv", + "vh" + ], + "dtype": "float32" + } + ], + "data_source": { + "cache_dir": "cache/planetary_computer", + "duration": "366d", + "ingest": false, + "name": "rslp.satlas.data_sources.MonthlySentinel1", + "query": { + "sar:instrument_mode": { + "eq": "IW" + }, + "sar:polarizations": { + "eq": [ + "VV", + "VH" + ] + }, + "sat:orbit_state": { + "eq": "descending" + } + }, + "query_config": { + "max_matches": 12 + }, + "time_offset": "-180d" + }, + "type": "raster" + }, + "sentinel2": { + "band_sets": [ + { + "bands": [ + "B02", + "B03", + "B04", + "B08" + ], + "dtype": "uint16" + }, + { + "bands": [ + "B05", + "B06", + "B07", + "B8A", + "B11", + "B12" + ], + "dtype": "uint16", + "zoom_offset": -1 + }, + { + "bands": [ + "B01", + "B09" + ], + "dtype": "uint16", + "zoom_offset": -2 + } + ], + "data_source": { + "cache_dir": "cache/planetary_computer", + "duration": "366d", + "harmonize": true, + "ingest": false, + "max_cloud_cover": 50, + "name": "rslp.satlas.data_sources.MonthlyAzureSentinel2", + "query_config": { + "max_matches": 12 + }, + "sort_by": "eo:cloud_cover", + "time_offset": "-180d" + }, + "type": "raster" + } + } +} diff --git a/data/helios/togo_2020/finetune_12_months.yaml b/data/helios/togo_2020/finetune_12_months.yaml new file mode 100644 index 00000000..fb701c7a --- /dev/null +++ b/data/helios/togo_2020/finetune_12_months.yaml @@ -0,0 +1,271 @@ +model: + class_path: rslearn.train.lightning_module.RslearnLightningModule + init_args: + model: + class_path: rslearn.models.multitask.MultiTaskModel + init_args: + encoder: + - class_path: rslp.helios.model.Helios + init_args: + checkpoint_path: "{CHECKPOINT_PATH}" + selector: ["encoder"] + forward_kwargs: + patch_size: {PATCH_SIZE} + decoders: + crop_type_classification: + - class_path: rslearn.models.pooling_decoder.PoolingDecoder + init_args: + in_channels: {ENCODER_EMBEDDING_SIZE} + out_channels: 2 + - class_path: rslearn.train.tasks.classification.ClassificationHead + lr: 0.0001 + plateau: true + plateau_factor: 0.2 + plateau_patience: 2 + plateau_min_lr: 0 + plateau_cooldown: 10 +data: + class_path: rslearn.train.data_module.RslearnDataModule + init_args: + path: weka://dfive-default/rslearn-eai/datasets/crop/togo_2020/20250701 + inputs: + sentinel2_0: + data_type: "raster" + layers: ["sentinel2"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_1: + data_type: "raster" + layers: ["sentinel2.1"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_2: + data_type: "raster" + layers: ["sentinel2.2"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_3: + data_type: "raster" + layers: ["sentinel2.3"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_4: + data_type: "raster" + layers: ["sentinel2.4"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_5: + data_type: "raster" + layers: ["sentinel2.5"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_6: + data_type: "raster" + layers: ["sentinel2.6"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_7: + data_type: "raster" + layers: ["sentinel2.7"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_8: + data_type: "raster" + layers: ["sentinel2.8"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_9: + data_type: "raster" + layers: ["sentinel2.9"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_10: + data_type: "raster" + layers: ["sentinel2.10"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel2_11: + data_type: "raster" + layers: ["sentinel2.11"] + bands: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + passthrough: true + dtype: FLOAT32 + sentinel1_0: + data_type: "raster" + layers: ["sentinel1_ascending"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_1: + data_type: "raster" + layers: ["sentinel1_ascending.1"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_2: + data_type: "raster" + layers: ["sentinel1_ascending.2"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_3: + data_type: "raster" + layers: ["sentinel1_ascending.3"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_4: + data_type: "raster" + layers: ["sentinel1_ascending.4"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_5: + data_type: "raster" + layers: ["sentinel1_ascending.5"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_6: + data_type: "raster" + layers: ["sentinel1_ascending.6"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_7: + data_type: "raster" + layers: ["sentinel1_ascending.7"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_8: + data_type: "raster" + layers: ["sentinel1_ascending.8"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_9: + data_type: "raster" + layers: ["sentinel1_ascending.9"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_10: + data_type: "raster" + layers: ["sentinel1_ascending.10"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + sentinel1_11: + data_type: "raster" + layers: ["sentinel1_ascending.11"] + bands: ["vv", "vh"] + passthrough: true + dtype: FLOAT32 + label: + data_type: "vector" + layers: ["label"] + is_target: true + task: + class_path: rslearn.train.tasks.multi_task.MultiTask + init_args: + tasks: + crop_type_classification: + class_path: rslearn.train.tasks.classification.ClassificationTask + init_args: + property_name: "category" + classes: [1, 0] + enable_f1_metric: true + metric_kwargs: + average: "micro" + input_mapping: + crop_type_classification: + label: "targets" + batch_size: 8 + num_workers: 32 + default_config: + transforms: + - class_path: rslearn.train.transforms.concatenate.Concatenate + init_args: + selections: + sentinel2_0: [] + sentinel2_1: [] + sentinel2_2: [] + sentinel2_3: [] + sentinel2_4: [] + sentinel2_5: [] + sentinel2_6: [] + sentinel2_7: [] + sentinel2_8: [] + sentinel2_9: [] + sentinel2_10: [] + sentinel2_11: [] + output_selector: sentinel2_l2a + - class_path: rslearn.train.transforms.concatenate.Concatenate + init_args: + selections: + sentinel1_0: [] + sentinel1_1: [] + sentinel1_2: [] + sentinel1_3: [] + sentinel1_4: [] + sentinel1_5: [] + sentinel1_6: [] + sentinel1_7: [] + sentinel1_8: [] + sentinel1_9: [] + sentinel1_10: [] + sentinel1_11: [] + output_selector: sentinel1 + - class_path: rslp.helios.norm.HeliosNormalize + init_args: + config_fname: "/opt/helios/data/norm_configs/computed.json" + band_names: + sentinel2_l2a: ["B02", "B03", "B04", "B08", "B05", "B06", "B07", "B8A", "B11", "B12", "B01", "B09"] + sentinel1: ["vv", "vh"] + - class_path: rslearn.train.transforms.pad.Pad + init_args: + size: 8 + mode: "center" + image_selectors: ["sentinel2_l2a", "sentinel1"] + train_config: + groups: ["groundtruth_window_32"] + tags: + split: "train" + val_config: + groups: ["groundtruth_window_32"] + tags: + split: "val" + test_config: + groups: ["groundtruth_window_32"] + tags: + split: "test" +trainer: + max_epochs: 100 + callbacks: + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: "epoch" + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + save_top_k: 1 + save_last: true + monitor: val_loss + mode: min + - class_path: rslearn.train.callbacks.freeze_unfreeze.FreezeUnfreeze + init_args: + module_selector: ["model", "encoder", 0] + unfreeze_at_epoch: 100 +rslp_project: placeholder +rslp_experiment: placeholder diff --git a/requirements.txt b/requirements.txt index 7358718e..879c91bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ beaker-py>=2.0 fastapi>=0.115 +geopandas>=1.1.0 google-cloud-bigtable>=2.18 google-cloud-pubsub>=2.18 interrogate>=1.7 diff --git a/rslp/crop/rapids_togo/create_windows_for_labels.py b/rslp/crop/rapids_togo/create_windows_for_labels.py new file mode 100644 index 00000000..696c3b14 --- /dev/null +++ b/rslp/crop/rapids_togo/create_windows_for_labels.py @@ -0,0 +1,188 @@ +"""Create windows for crop type mapping. + +Data from https://zenodo.org/records/3836629 +""" + +import argparse +import hashlib +import multiprocessing +from datetime import datetime, timezone + +import pandas as pd +import shapely +import tqdm +from rslearn.const import WGS84_PROJECTION +from rslearn.dataset import Window +from rslearn.utils import Projection, STGeometry, get_utm_ups_crs +from rslearn.utils.feature import Feature +from rslearn.utils.mp import star_imap_unordered +from rslearn.utils.vector_format import GeojsonVectorFormat +from upath import UPath + +WINDOW_RESOLUTION = 10 +LABEL_LAYER = "label" + +# data was collected in May 2020, so we consider the 6 months before and after may +# we pick the center month; the actual range will be managed by the offset in the config. +START_TIME = datetime(2020, 5, 1, tzinfo=timezone.utc) +END_TIME = datetime(2020, 5, 31, tzinfo=timezone.utc) + + +def create_window( + csv_row: pd.Series, ds_path: UPath, group_name: str, window_size: int, is_test: bool +) -> None: + """Create windows for crop type mapping. + + Args: + csv_row: a row of the dataframe + ds_path: path to the dataset + group_name: name of the group + window_size: window size + is_test: whether or not this is a test window + """ + # Get sample metadata + polygon_id = csv_row["unique_id"] + latitude, longitude = csv_row["latitude"], csv_row["longitude"] + is_crop = csv_row["is_crop"] + category = is_crop + + src_point = shapely.Point(longitude, latitude) + src_geometry = STGeometry(WGS84_PROJECTION, src_point, None) + dst_crs = get_utm_ups_crs(longitude, latitude) + dst_projection = Projection(dst_crs, WINDOW_RESOLUTION, -WINDOW_RESOLUTION) + dst_geometry = src_geometry.to_projection(dst_projection) + + # This is specific for window size = 1. + if window_size == 1: + bounds = ( + int(dst_geometry.shp.x), + int(dst_geometry.shp.y) - window_size, + int(dst_geometry.shp.x) + window_size, + int(dst_geometry.shp.y), + ) + else: + bounds = ( + int(dst_geometry.shp.x), + int(dst_geometry.shp.y), + int(dst_geometry.shp.x) + window_size // 2, + int(dst_geometry.shp.y) + window_size // 2, + ) + + group = f"{group_name}_window_{window_size}" + window_name = f"{polygon_id}_{latitude}_{longitude}" + if not is_test: + # Check if train or val. + # If split by polygon id, no samples from the same polygon will be in the same split. + is_val = hashlib.sha256(str(window_name).encode()).hexdigest()[0] in ["0", "1"] + + if is_val: + split = "val" + else: + split = "train" + else: + split = "test" + + window = Window( + path=Window.get_window_root(ds_path, group, window_name), + group=group, + name=window_name, + projection=dst_projection, + bounds=bounds, + time_range=(START_TIME, END_TIME), + options={ + "split": split, + "is_crop": is_crop, + "category": category, + }, + ) + window.save() + + # Add the label. + feature = Feature( + window.get_geometry(), + { + "category": category, + }, + ) + layer_dir = window.get_layer_dir(LABEL_LAYER) + GeojsonVectorFormat().encode_vector(layer_dir, [feature]) + window.mark_layer_completed(LABEL_LAYER) + + +def create_windows_from_csv( + csv_paths: UPath, + ds_path: UPath, + group_name: str, + window_size: int, +) -> None: + """Create windows from csv. + + Args: + csv_paths: path to the csv files + ds_path: path to the dataset + group_name: name of the group + window_size: window size + """ + for filename in [ + "crop_merged_v2.csv", + "noncrop_merged_v2.csv", + "togo_test_majority.csv", + ]: + df_sampled = pd.read_csv(csv_paths / filename) + csv_rows = [] + for _, row in df_sampled.iterrows(): + csv_rows.append(row) + + jobs = [ + dict( + csv_row=row, + ds_path=ds_path, + group_name=group_name, + window_size=window_size, + is_test="test" in filename, + ) + for row in csv_rows + ] + p = multiprocessing.Pool(32) + outputs = star_imap_unordered(p, create_window, jobs) + for _ in tqdm.tqdm(outputs, total=len(jobs)): + pass + p.close() + + +if __name__ == "__main__": + multiprocessing.set_start_method("forkserver") + parser = argparse.ArgumentParser(description="Create windows from csv") + parser.add_argument( + "--csv_paths", + type=str, + default="gs://ai2-helios-us-central1/evaluations/crop_type_mapping/togo_2020", + help="Path to the csv file", + ) + parser.add_argument( + "--ds_path", + type=str, + required=True, + help="Path to the dataset", + ) + parser.add_argument( + "--group_name", + type=str, + required=False, + help="Name of the group", + default="groundtruth", + ) + parser.add_argument( + "--window_size", + type=int, + required=False, + help="Window size", + default=1, + ) + args = parser.parse_args() + create_windows_from_csv( + UPath(args.csv_paths), + UPath(args.ds_path), + args.group_name, + window_size=args.window_size, + ) diff --git a/rslp/crop/rapids_togo/to_csv.py b/rslp/crop/rapids_togo/to_csv.py new file mode 100644 index 00000000..de3b71f9 --- /dev/null +++ b/rslp/crop/rapids_togo/to_csv.py @@ -0,0 +1,42 @@ +"""Turn shapefiles into csvs. + +It's easier for google cloud if the files are in a single csv instead of +in a shapefile, so we process it into csvs. +""" + +import geopandas +import pandas as pd +from upath import UPath + + +def process_files(shapefile_path: UPath) -> pd.DataFrame: + """Create windows for crop type mapping. + + Args: + shapefile_path: path to the shapefile + """ + df = geopandas.read_file(shapefile_path) + is_crop = 1 + if "non" in shapefile_path.name.lower(): + is_crop = 0 + + df["is_crop"] = is_crop + + df["longitude"] = df.geometry.centroid.x + df["latitude"] = df.geometry.centroid.y + + df["org_file"] = shapefile_path.name + df.reset_index() + df["unique_id"] = df.apply(lambda x: f"{x.name}-{x.org_file}", axis=1) + + return df[["is_crop", "latitude", "longitude", "org_file", "unique_id"]] + + +if __name__ == "__main__": + for filename in ["crop_merged_v2", "noncrop_merged_v2", "togo_test_majority"]: + csv_name = UPath(".") / f"{UPath(filename).stem}.csv" + if csv_name.exists(): + print(f"{csv_name} exists - skipping") + continue + df = process_files(UPath(filename)) + df.to_csv(csv_name)