Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions run_batch_communities_local_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def log_memory(stage):
def is_feature_processed(output_csv, feature_id):
if not os.path.exists(output_csv):
return False
df = pd.read_csv(output_csv, usecols=['FeatureID'])
return str(feature_id) in df['FeatureID'].astype(str).values
df = pd.read_csv(output_csv, usecols=['FeatureID'], dtype={'FeatureID': str})
return str(feature_id) in df['FeatureID'].values

# --- Main Batch Function ---
def run_batch_for_scale(shapefile, id_field, inventory_periods, tree_canopy_source, scale_name, date_str,
Expand Down
12 changes: 6 additions & 6 deletions run_batch_communities_local_memory_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,17 @@ def get_processed_ids_for_period(scale_folder, year1, year2):

for csv_file in glob.glob(csv_pattern):
try:
df = pd.read_csv(csv_file, usecols=["FeatureID"])
processed_ids.update(df["FeatureID"].astype(str).unique())
df = pd.read_csv(csv_file, usecols=["FeatureID"], dtype={"FeatureID": str})
processed_ids.update(df["FeatureID"].unique())
except Exception as e:
arcpy.AddWarning(f"Could not read existing chunk {csv_file}: {e}")

if os.path.exists(final_csv):
try:
df = pd.read_csv(final_csv, usecols=["FeatureID", "YearRange"])
df = pd.read_csv(final_csv, usecols=["FeatureID", "YearRange"], dtype={"FeatureID": str})
period = f"{year1}-{year2}"
processed_ids.update(
df.loc[df["YearRange"] == period, "FeatureID"].astype(str).unique()
df.loc[df["YearRange"] == period, "FeatureID"].unique()
)
except Exception as e:
arcpy.AddWarning(f"Could not read final CSV {final_csv}: {e}")
Expand Down Expand Up @@ -174,7 +174,7 @@ def worker(args):
if os.path.exists(chunk_csv):
try:
existing_ids = set(
pd.read_csv(chunk_csv, usecols=["FeatureID"])["FeatureID"].astype(str)
pd.read_csv(chunk_csv, usecols=["FeatureID"], dtype={"FeatureID": str})["FeatureID"]
)
arcpy.AddMessage(
f"Chunk {chunk_id}: found {len(existing_ids)} existing records"
Expand Down Expand Up @@ -228,7 +228,7 @@ def merge_csv_outputs(scale_folder):
print("No chunk CSVs found; skipping merge.")
return

df_list = [pd.read_csv(csv_file) for csv_file in all_csv_files]
df_list = [pd.read_csv(csv_file, dtype={"FeatureID": str}) for csv_file in all_csv_files]
master_df = pd.concat(df_list).drop_duplicates(subset=["FeatureID", "YearRange"])

master_csv_path = os.path.join(scale_folder, "master_flux_final.csv")
Expand Down