diff --git a/modelling/README.md b/modelling/README.md index 25c3930..383cad0 100644 --- a/modelling/README.md +++ b/modelling/README.md @@ -176,6 +176,18 @@ Cluster summaries attempt to expose the underlying feature structure driving the This allows the resulting neighbourhoods to be inspected and interpreted ecologically, rather than treated as opaque statistical groupings. +The resulting neighbourhood structures can also be aggregated temporally to produce seasonal ecological calendars that summarise the mean normalised activity of ecological neighbourhoods across the year, allowing broader seasonal structure to be visualised at community scale rather than species-by-species. + +Rather than focusing on individual taxa, the calendars attempt to expose larger seasonal ecological modes, including: + +- Winter visitor structure +- Spring flowering and emergence periods +- Resident detectability dynamics +- Extended summer assemblages +- Transitional seasonal neighbourhoods + +The resulting heatmaps provide an interpretable view of how different regions of seasonal ecological space become active, overlap, and decline through the ecological year. + ## Folder Structure ### seasonal-presence/ diff --git a/modelling/data/activity_heatmap.png b/modelling/data/activity_heatmap.png new file mode 100644 index 0000000..263ffd3 Binary files /dev/null and b/modelling/data/activity_heatmap.png differ diff --git a/modelling/data/calendar_activity.json b/modelling/data/calendar_activity.json new file mode 100644 index 0000000..7ea6fb4 --- /dev/null +++ b/modelling/data/calendar_activity.json @@ -0,0 +1,699 @@ +{ + "schema_version": "seasonal-ecological-calendar-activity/v1", + "source_cluster_schema_version": "seasonal-ecological-calendar-clusters/v1", + "n_clusters": 8, + "normalisation": { + "method": "species_max", + "description": "Each species is normalised to its own maximum synthesised value before cluster-level monthly aggregation.", + "value_column": "synthesised" + }, + "clusters": [ + { + "cluster_id": 1, + "calendar_label": "Redwing neighbourhood", + "n_species": 1, + "species": [ + "Redwing" + ], + "monthly_activity": [ + { + "month": 1, + "month_name": "January", + "mean_activity": 1.0, + "n_species_contributing": 1 + }, + { + "month": 2, + "month_name": "February", + "mean_activity": 0.944798, + "n_species_contributing": 1 + }, + { + "month": 3, + "month_name": "March", + "mean_activity": 0.609792, + "n_species_contributing": 1 + }, + { + "month": 4, + "month_name": "April", + "mean_activity": 0.224145, + "n_species_contributing": 1 + }, + { + "month": 5, + "month_name": "May", + "mean_activity": 0.030029, + "n_species_contributing": 1 + }, + { + "month": 6, + "month_name": "June", + "mean_activity": 0.00302, + "n_species_contributing": 1 + }, + { + "month": 7, + "month_name": "July", + "mean_activity": 0.000304, + "n_species_contributing": 1 + }, + { + "month": 8, + "month_name": "August", + "mean_activity": 3.1e-05, + "n_species_contributing": 1 + }, + { + "month": 9, + "month_name": "September", + "mean_activity": 0.008068, + "n_species_contributing": 1 + }, + { + "month": 10, + "month_name": "October", + "mean_activity": 0.081518, + "n_species_contributing": 1 + }, + { + "month": 11, + "month_name": "November", + "mean_activity": 0.262747, + "n_species_contributing": 1 + }, + { + "month": 12, + "month_name": "December", + "mean_activity": 0.533111, + "n_species_contributing": 1 + } + ] + }, + { + "cluster_id": 2, + "calendar_label": "Extended Spring Seasonal Presence", + "n_species": 13, + "species": [ + "Dandelion", + "Brimstone Butterfly", + "Red Dead Nettle", + "Red Campion", + "Buttercup", + "Swallow", + "Chiffchaff", + "Peacock Butterfly", + "Cuckoo Pint", + "Common Poppy", + "Swift", + "Red Admiral Butterfly", + "Speckled Wood Butterfly" + ], + "monthly_activity": [ + { + "month": 1, + "month_name": "January", + "mean_activity": 0.000281, + "n_species_contributing": 13 + }, + { + "month": 2, + "month_name": "February", + "mean_activity": 0.055817, + "n_species_contributing": 13 + }, + { + "month": 3, + "month_name": "March", + "mean_activity": 0.306096, + "n_species_contributing": 13 + }, + { + "month": 4, + "month_name": "April", + "mean_activity": 0.561452, + "n_species_contributing": 13 + }, + { + "month": 5, + "month_name": "May", + "mean_activity": 0.672088, + "n_species_contributing": 13 + }, + { + "month": 6, + "month_name": "June", + "mean_activity": 0.611283, + "n_species_contributing": 13 + }, + { + "month": 7, + "month_name": "July", + "mean_activity": 0.460299, + "n_species_contributing": 13 + }, + { + "month": 8, + "month_name": "August", + "mean_activity": 0.342359, + "n_species_contributing": 13 + }, + { + "month": 9, + "month_name": "September", + "mean_activity": 0.160963, + "n_species_contributing": 13 + }, + { + "month": 10, + "month_name": "October", + "mean_activity": 0.035911, + "n_species_contributing": 13 + }, + { + "month": 11, + "month_name": "November", + "mean_activity": 0.00979, + "n_species_contributing": 13 + }, + { + "month": 12, + "month_name": "December", + "mean_activity": 0.000117, + "n_species_contributing": 13 + } + ] + }, + { + "cluster_id": 3, + "calendar_label": "Rosebay Willowherb neighbourhood", + "n_species": 1, + "species": [ + "Rosebay Willowherb" + ], + "monthly_activity": [ + { + "month": 1, + "month_name": "January", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 2, + "month_name": "February", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 3, + "month_name": "March", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 4, + "month_name": "April", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 5, + "month_name": "May", + "mean_activity": 0.00122, + "n_species_contributing": 1 + }, + { + "month": 6, + "month_name": "June", + "mean_activity": 0.242893, + "n_species_contributing": 1 + }, + { + "month": 7, + "month_name": "July", + "mean_activity": 0.774129, + "n_species_contributing": 1 + }, + { + "month": 8, + "month_name": "August", + "mean_activity": 1.0, + "n_species_contributing": 1 + }, + { + "month": 9, + "month_name": "September", + "mean_activity": 0.428247, + "n_species_contributing": 1 + }, + { + "month": 10, + "month_name": "October", + "mean_activity": 0.001098, + "n_species_contributing": 1 + }, + { + "month": 11, + "month_name": "November", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 12, + "month_name": "December", + "mean_activity": 0.0, + "n_species_contributing": 1 + } + ] + }, + { + "cluster_id": 4, + "calendar_label": "Snowdrop neighbourhood", + "n_species": 1, + "species": [ + "Snowdrop" + ], + "monthly_activity": [ + { + "month": 1, + "month_name": "January", + "mean_activity": 0.161619, + "n_species_contributing": 1 + }, + { + "month": 2, + "month_name": "February", + "mean_activity": 1.0, + "n_species_contributing": 1 + }, + { + "month": 3, + "month_name": "March", + "mean_activity": 0.498704, + "n_species_contributing": 1 + }, + { + "month": 4, + "month_name": "April", + "mean_activity": 0.001791, + "n_species_contributing": 1 + }, + { + "month": 5, + "month_name": "May", + "mean_activity": 1e-06, + "n_species_contributing": 1 + }, + { + "month": 6, + "month_name": "June", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 7, + "month_name": "July", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 8, + "month_name": "August", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 9, + "month_name": "September", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 10, + "month_name": "October", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 11, + "month_name": "November", + "mean_activity": 0.0, + "n_species_contributing": 1 + }, + { + "month": 12, + "month_name": "December", + "mean_activity": 0.0, + "n_species_contributing": 1 + } + ] + }, + { + "cluster_id": 5, + "calendar_label": "Moderate Spring Seasonal Presence", + "n_species": 5, + "species": [ + "Bluebell", + "Garlic Mustard", + "Cow Parsley", + "Cowslip", + "Orange Tip Butterfly" + ], + "monthly_activity": [ + { + "month": 1, + "month_name": "January", + "mean_activity": 0.0, + "n_species_contributing": 5 + }, + { + "month": 2, + "month_name": "February", + "mean_activity": 7.1e-05, + "n_species_contributing": 5 + }, + { + "month": 3, + "month_name": "March", + "mean_activity": 0.059311, + "n_species_contributing": 5 + }, + { + "month": 4, + "month_name": "April", + "mean_activity": 0.69493, + "n_species_contributing": 5 + }, + { + "month": 5, + "month_name": "May", + "mean_activity": 0.873766, + "n_species_contributing": 5 + }, + { + "month": 6, + "month_name": "June", + "mean_activity": 0.237317, + "n_species_contributing": 5 + }, + { + "month": 7, + "month_name": "July", + "mean_activity": 0.02036, + "n_species_contributing": 5 + }, + { + "month": 8, + "month_name": "August", + "mean_activity": 4.2e-05, + "n_species_contributing": 5 + }, + { + "month": 9, + "month_name": "September", + "mean_activity": 0.0, + "n_species_contributing": 5 + }, + { + "month": 10, + "month_name": "October", + "mean_activity": 0.0, + "n_species_contributing": 5 + }, + { + "month": 11, + "month_name": "November", + "mean_activity": 0.0, + "n_species_contributing": 5 + }, + { + "month": 12, + "month_name": "December", + "mean_activity": 0.0, + "n_species_contributing": 5 + } + ] + }, + { + "cluster_id": 6, + "calendar_label": "Jay neighbourhood", + "n_species": 1, + "species": [ + "Jay" + ], + "monthly_activity": [ + { + "month": 1, + "month_name": "January", + "mean_activity": 1.0, + "n_species_contributing": 1 + }, + { + "month": 2, + "month_name": "February", + "mean_activity": 0.493792, + "n_species_contributing": 1 + }, + { + "month": 3, + "month_name": "March", + "mean_activity": 0.289808, + "n_species_contributing": 1 + }, + { + "month": 4, + "month_name": "April", + "mean_activity": 0.207613, + "n_species_contributing": 1 + }, + { + "month": 5, + "month_name": "May", + "mean_activity": 0.174491, + "n_species_contributing": 1 + }, + { + "month": 6, + "month_name": "June", + "mean_activity": 0.160439, + "n_species_contributing": 1 + }, + { + "month": 7, + "month_name": "July", + "mean_activity": 0.061444, + "n_species_contributing": 1 + }, + { + "month": 8, + "month_name": "August", + "mean_activity": 0.012438, + "n_species_contributing": 1 + }, + { + "month": 9, + "month_name": "September", + "mean_activity": 0.216924, + "n_species_contributing": 1 + }, + { + "month": 10, + "month_name": "October", + "mean_activity": 0.477498, + "n_species_contributing": 1 + }, + { + "month": 11, + "month_name": "November", + "mean_activity": 0.363197, + "n_species_contributing": 1 + }, + { + "month": 12, + "month_name": "December", + "mean_activity": 0.29356, + "n_species_contributing": 1 + } + ] + }, + { + "cluster_id": 7, + "calendar_label": "Resident With Spring Persistence And Summer Suppression", + "n_species": 7, + "species": [ + "Mute Swan", + "Robin", + "Blackbird", + "Goldfinch", + "Woodpigeon", + "Common Starling", + "Magpie" + ], + "monthly_activity": [ + { + "month": 1, + "month_name": "January", + "mean_activity": 0.958799, + "n_species_contributing": 7 + }, + { + "month": 2, + "month_name": "February", + "mean_activity": 0.94085, + "n_species_contributing": 7 + }, + { + "month": 3, + "month_name": "March", + "mean_activity": 0.854765, + "n_species_contributing": 7 + }, + { + "month": 4, + "month_name": "April", + "mean_activity": 0.74791, + "n_species_contributing": 7 + }, + { + "month": 5, + "month_name": "May", + "mean_activity": 0.685496, + "n_species_contributing": 7 + }, + { + "month": 6, + "month_name": "June", + "mean_activity": 0.666718, + "n_species_contributing": 7 + }, + { + "month": 7, + "month_name": "July", + "mean_activity": 0.511563, + "n_species_contributing": 7 + }, + { + "month": 8, + "month_name": "August", + "mean_activity": 0.395505, + "n_species_contributing": 7 + }, + { + "month": 9, + "month_name": "September", + "mean_activity": 0.369965, + "n_species_contributing": 7 + }, + { + "month": 10, + "month_name": "October", + "mean_activity": 0.400098, + "n_species_contributing": 7 + }, + { + "month": 11, + "month_name": "November", + "mean_activity": 0.451802, + "n_species_contributing": 7 + }, + { + "month": 12, + "month_name": "December", + "mean_activity": 0.601392, + "n_species_contributing": 7 + } + ] + }, + { + "cluster_id": 8, + "calendar_label": "Resident With Summer Detectability Collapse", + "n_species": 10, + "species": [ + "House Sparrow", + "Common Cleavers", + "Shepherds Purse", + "Skylark", + "Wren", + "Blue Tit", + "Dunnock", + "Great Tit", + "Daisy", + "Song Thrush" + ], + "monthly_activity": [ + { + "month": 1, + "month_name": "January", + "mean_activity": 0.565921, + "n_species_contributing": 10 + }, + { + "month": 2, + "month_name": "February", + "mean_activity": 0.579917, + "n_species_contributing": 10 + }, + { + "month": 3, + "month_name": "March", + "mean_activity": 0.755331, + "n_species_contributing": 10 + }, + { + "month": 4, + "month_name": "April", + "mean_activity": 0.921735, + "n_species_contributing": 10 + }, + { + "month": 5, + "month_name": "May", + "mean_activity": 0.829935, + "n_species_contributing": 10 + }, + { + "month": 6, + "month_name": "June", + "mean_activity": 0.600453, + "n_species_contributing": 10 + }, + { + "month": 7, + "month_name": "July", + "mean_activity": 0.330648, + "n_species_contributing": 10 + }, + { + "month": 8, + "month_name": "August", + "mean_activity": 0.205292, + "n_species_contributing": 10 + }, + { + "month": 9, + "month_name": "September", + "mean_activity": 0.210055, + "n_species_contributing": 10 + }, + { + "month": 10, + "month_name": "October", + "mean_activity": 0.232025, + "n_species_contributing": 10 + }, + { + "month": 11, + "month_name": "November", + "mean_activity": 0.261783, + "n_species_contributing": 10 + }, + { + "month": 12, + "month_name": "December", + "mean_activity": 0.395007, + "n_species_contributing": 10 + } + ] + } + ] +} diff --git a/modelling/data/cluster_analysis.json b/modelling/data/cluster_analysis.json index d397994..fc9e792 100644 --- a/modelling/data/cluster_analysis.json +++ b/modelling/data/cluster_analysis.json @@ -1,6 +1,6 @@ { "schema_version": "species-similarity-clusters/v2", - "created_utc": "2026-05-12T09:48:39.526449+00:00", + "created_utc": "2026-05-12T19:31:33.748438+00:00", "source_similarity_schema_version": "species-similarity/v1", "source_feature_schema_version": "species-feature-table/v1", "n_species": 39, diff --git a/modelling/data/extracted_clusters.json b/modelling/data/extracted_clusters.json new file mode 100644 index 0000000..1f36912 --- /dev/null +++ b/modelling/data/extracted_clusters.json @@ -0,0 +1,113 @@ +{ + "schema_version": "seasonal-ecological-calendar-clusters/v1", + "source_schema_version": "species-similarity-clusters/v2", + "source_created_utc": "2026-05-12T19:31:33.748438+00:00", + "n_species": 39, + "n_clusters": 8, + "cluster_caveat": "Clusters should be interpreted as exploratory seasonal assemblages rather than fixed ecological categories.", + "clusters": [ + { + "cluster_id": 1, + "calendar_label": "Redwing neighbourhood", + "description": "Single-species cluster containing Redwing, mainly representing core winter winter visitor with autumn arrival component. The defining pattern is a winter peak around January, a moderate autumn component, moderate summer suppression, and slow arrival fast departure response dynamics. Its defining traits include year wrapping winter presence, core winter winter peak, and moderate autumn component. Compared with the full species set, autumn to winter weight ratio is higher than the whole-set average and decay to growth ratio is higher than the whole-set average.", + "n_species": 1, + "species": [ + "Redwing" + ] + }, + { + "cluster_id": 2, + "calendar_label": "Extended Spring Seasonal Presence", + "description": "Cluster of 13 species, mainly representing spring extended spring seasonal presence. The fitted active window runs roughly from March to October, with a mean peak around June, and and an average width of 6.6 months. It is characterised by very broad season and moderate active window. Common high-support traits include strong offseason suppression and early peak alignment.", + "n_species": 13, + "species": [ + "Dandelion", + "Brimstone Butterfly", + "Red Dead Nettle", + "Red Campion", + "Buttercup", + "Swallow", + "Chiffchaff", + "Peacock Butterfly", + "Cuckoo Pint", + "Common Poppy", + "Swift", + "Red Admiral Butterfly", + "Speckled Wood Butterfly" + ] + }, + { + "cluster_id": 3, + "calendar_label": "Rosebay Willowherb neighbourhood", + "description": "Single-species cluster containing Rosebay Willowherb, mainly representing autumn moderate autumn seasonal presence. The fitted active window runs roughly from June to September, with a mean peak around September, and and an average width of 3.1 months. It is characterised by moderate season and sharp active window. Its defining traits include autumn peak, moderate season, and sharp seasonal window. Compared with the full species set, season start month is higher than the whole-set average and peak month is higher than the whole-set average.", + "n_species": 1, + "species": [ + "Rosebay Willowherb" + ] + }, + { + "cluster_id": 4, + "calendar_label": "Snowdrop neighbourhood", + "description": "Single-species cluster containing Snowdrop, mainly representing winter narrow winter seasonal presence. The fitted active window runs roughly from February to March, with a mean peak around February, and and an average width of 1.9 months. It is characterised by narrow season and moderate active window. Its defining traits include winter peak, narrow season, and moderate seasonal window. Compared with the full species set, season midpoint month is lower than the whole-set average and season end month is lower than the whole-set average.", + "n_species": 1, + "species": [ + "Snowdrop" + ] + }, + { + "cluster_id": 5, + "calendar_label": "Moderate Spring Seasonal Presence", + "description": "Cluster of 5 species, mainly representing spring moderate spring seasonal presence. The fitted active window runs roughly from April to June, with a mean peak around May, and and an average width of 2.3 months. It is characterised by moderate season and sharp active window. Common high-support traits include spring peak, central peak alignment, and sharp seasonal window. Compared with the full species set, fit score is lower than the whole-set average and season end month is lower than the whole-set average.", + "n_species": 5, + "species": [ + "Bluebell", + "Garlic Mustard", + "Cow Parsley", + "Cowslip", + "Orange Tip Butterfly" + ] + }, + { + "cluster_id": 6, + "calendar_label": "Jay neighbourhood", + "description": "Single-species cluster containing Jay, mainly representing autumn resident with summer detectability collapse. Detectability peaks around October and and is lowest around August. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and decline biased response dynamics. Its defining traits include resident detectability pattern, weak baseline presence, and autumn detectability peak. Compared with the full species set, peak month is higher than the whole-set average and target amplitude is lower than the whole-set average.", + "n_species": 1, + "species": [ + "Jay" + ] + }, + { + "cluster_id": 7, + "calendar_label": "Resident With Spring Persistence And Summer Suppression", + "description": "Cluster of 7 species, mainly representing winter resident with spring persistence and summer suppression. Detectability peaks around February and and is lowest around September. The shared pattern includes strong baseline presence, strong summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, meaningful year end component, and strong baseline presence. Compared with the full species set, year end to winter weight ratio is higher than the whole-set average and baseline to peak ratio is higher than the whole-set average.", + "n_species": 7, + "species": [ + "Mute Swan", + "Robin", + "Blackbird", + "Goldfinch", + "Woodpigeon", + "Common Starling", + "Magpie" + ] + }, + { + "cluster_id": 8, + "calendar_label": "Resident With Summer Detectability Collapse", + "description": "Cluster of 10 species, mainly representing spring resident with summer detectability collapse. Detectability peaks around April and and is lowest around September. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, moderate summer suppression, and rapid decline biased response dynamics.", + "n_species": 10, + "species": [ + "House Sparrow", + "Common Cleavers", + "Shepherds Purse", + "Skylark", + "Wren", + "Blue Tit", + "Dunnock", + "Great Tit", + "Daisy", + "Song Thrush" + ] + } + ] +} \ No newline at end of file diff --git a/modelling/data/feature_matrix.json b/modelling/data/feature_matrix.json index 2eb882a..3077bdc 100644 --- a/modelling/data/feature_matrix.json +++ b/modelling/data/feature_matrix.json @@ -1,6 +1,6 @@ { "schema_version": "species-feature-table/v1", - "created_utc": "2026-05-12T09:48:38.767460+00:00", + "created_utc": "2026-05-12T19:31:33.002951+00:00", "description": "Whole-set seasonal ecology feature table compiled from per-species classification JSON files.", "n_species": 39, "source_files": [ diff --git a/modelling/data/species_similarity.json b/modelling/data/species_similarity.json index 4d335cb..c2c30e2 100644 --- a/modelling/data/species_similarity.json +++ b/modelling/data/species_similarity.json @@ -1,8 +1,8 @@ { "schema_version": "species-similarity/v1", - "created_utc": "2026-05-12T09:48:38.777271+00:00", + "created_utc": "2026-05-12T19:31:33.012198+00:00", "source_feature_schema_version": "species-feature-table/v1", - "source_feature_created_utc": "2026-05-12T09:48:38.767460+00:00", + "source_feature_created_utc": "2026-05-12T19:31:33.002951+00:00", "n_species": 39, "top_n": 5, "method": { diff --git a/modelling/scripts/run-calendar.sh b/modelling/scripts/run-calendar.sh new file mode 100755 index 0000000..82fd7ea --- /dev/null +++ b/modelling/scripts/run-calendar.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +if (( $# > 1 )); then + scriptname=$(basename -- "$0") + echo Usage: $scriptname [WRITE-CSV] + exit 1 +fi + +# Get the path to the modelling folder +MODELLING_ROOT=$( cd "$( dirname "$0" )/.." && pwd ) + +# Activate the virtuall environment +PROJECT_FOLDER=$( cd "$MODELLING_ROOT/.." && pwd) +source "$PROJECT_FOLDER/venv/bin/activate" + +# Build the paths to the per-model data folders +RESIDENT_DATA="$MODELLING_ROOT/resident-detectability/data" +SEASONAL_DATA="$MODELLING_ROOT/seasonal-presence/data" +WINTER_DATA="$MODELLING_ROOT/winter-visitor/data" + +# Build the ecological calendar pipeline +python "$MODELLING_ROOT/src/eco-calendar.py" \ + --input "$RESIDENT_DATA" "$SEASONAL_DATA" "$WINTER_DATA" \ + --clusters "$MODELLING_ROOT/data/cluster_analysis.json" \ + --extracted "$MODELLING_ROOT/data/extracted_clusters.json" \ + --activity "$MODELLING_ROOT/data/calendar_activity.json" \ + --heatmap "$MODELLING_ROOT/data/activity_heatmap.png" diff --git a/modelling/scripts/run-similarity.sh b/modelling/scripts/run-similarity.sh index 202c002..74b3ccd 100755 --- a/modelling/scripts/run-similarity.sh +++ b/modelling/scripts/run-similarity.sh @@ -39,8 +39,8 @@ if (( $# == 1 )); then esac fi -# Build the feature matrix -python "$MODELLING_ROOT/src/feature_matrix.py" \ +# Build the feature matrix, similarity and clustering pipeline +python "$MODELLING_ROOT/src/feature-matrix.py" \ --input "$RESIDENT_DATA" "$SEASONAL_DATA" "$WINTER_DATA" \ --json "$MODELLING_ROOT/data/feature_matrix.json" \ --similarity "$MODELLING_ROOT/data/species_similarity.json" \ diff --git a/modelling/src/eco-calendar.py b/modelling/src/eco-calendar.py new file mode 100644 index 0000000..8b560c3 --- /dev/null +++ b/modelling/src/eco-calendar.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +import argparse +from pathlib import Path + +from seasonal.calendar.heatmap import plot_neighbourhood_calendar_heatmap +from seasonal.calendar.activity import build_neighbourhood_monthly_activity +from seasonal.calendar.loader import load_synthesised_species_data +from seasonal.calendar.extractor import extract_calendar_cluster_metadata +from seasonal.support.console import print_message +from seasonal.support.json import load_json + + +def main() -> None: + """ + Main entry point for the feature, similarity and clustering pipeline + """ + parser = argparse.ArgumentParser() + parser.add_argument("-i", "--input-dirs", nargs="+", type=Path, required=True, + help="Directory containing simulated CSV files") + parser.add_argument("-cl", "--clusters", type=Path, required=True, help="Cluster analysis JSON file path") + parser.add_argument("-e", "--extracted", type=Path, required=True, help="Extracted cluster analysis JSON file path") + parser.add_argument("-a", "--activity", type=Path, required=True, help="Monthly neighbourhood activity CSV file path") + parser.add_argument("-hm", "--heatmap", type=Path, required=True, + help="Monthly neighbourhood activity heatmap PNG file path") + args = parser.parse_args() + + # Load the cluster analysis JSON file and use it to generate the extracted details + clusters = load_json(args.clusters) + extracted = extract_calendar_cluster_metadata(clusters, args.extracted) + print_message(f"Extracted clustering details written to {Path(args.extracted).name}") + + # Load the simulated CSV files for each species and build the monthly neighbourhood activity + simulated = load_synthesised_species_data(args.input_dirs) + activity = build_neighbourhood_monthly_activity(simulated, extracted, args.activity) + print_message(f"Neighbourhood activity details written to {Path(args.activity).name}") + + # Generate the heatmap + plot_neighbourhood_calendar_heatmap(activity, args.heatmap) + print_message(f"Neighbourhood activity heatmap written to {Path(args.heatmap).name}") + + +if __name__ == "__main__": + main() diff --git a/modelling/src/feature_matrix.py b/modelling/src/feature-matrix.py similarity index 91% rename from modelling/src/feature_matrix.py rename to modelling/src/feature-matrix.py index aea0ba4..9957fad 100644 --- a/modelling/src/feature_matrix.py +++ b/modelling/src/feature-matrix.py @@ -3,11 +3,11 @@ import argparse from pathlib import Path -from seasonal.features.species_similarity import build_species_similarity, save_similarity_summary -from seasonal.features.similarity_heatmap import generate_species_similarity_heatmap -from seasonal.features.similarity_clusters import extract_species_similarity_clusters, save_cluster_summary +from seasonal.similarity.similarity import build_species_similarity, save_similarity_summary +from seasonal.similarity.heatmap import generate_species_similarity_heatmap +from seasonal.similarity.clustering import extract_species_similarity_clusters, save_cluster_summary from seasonal.features.feature_matrix import build_feature_table, find_input_files, write_csv -from seasonal.features.similarity_dendrogram import plot_species_cluster_dendrogram +from seasonal.similarity.dendrogram import plot_species_cluster_dendrogram from seasonal.support.console import print_error, print_message from seasonal.support.json import write_json @@ -65,7 +65,7 @@ def main() -> None: """ - Main entry point for the feature matrix builder + Main entry point for the feature, similarity and clustering pipeline """ parser = argparse.ArgumentParser() parser.add_argument("-i", "--input-dirs", nargs="+", type=Path, required=True, diff --git a/modelling/src/seasonal/calendar/__init__.py b/modelling/src/seasonal/calendar/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modelling/src/seasonal/calendar/activity.py b/modelling/src/seasonal/calendar/activity.py new file mode 100644 index 0000000..20da98e --- /dev/null +++ b/modelling/src/seasonal/calendar/activity.py @@ -0,0 +1,187 @@ +from __future__ import annotations + +from collections import defaultdict +from pathlib import Path +from typing import Any + +from seasonal.support.json import write_json +from seasonal.support.calendar import month_label + + +def build_neighbourhood_monthly_activity( + synthesised_rows: list[dict[str, Any]], + calendar_clusters: dict[str, Any], + output_path: str | Path, + value_column: str = "synthesised", + month_column: str = "month", +) -> dict[str, Any]: + """ + Build monthly activity summaries for each seasonal ecological neighbourhood. + + :param synthesised_rows: Simulated output for all species scaled to observed data scale + :param calendar_clusters: Extracted cluser information taking from the clustering analysis + :param output_path: Path to the output activity JSON file to create + :param value_column: Name of the "value" column in synthesised_rows + :param month_column: Name of the "month" column in synthesised_rows + :return: Dictionary of neighbourhood monthly activity data + """ + + species_to_cluster = _build_species_to_cluster_lookup(calendar_clusters) + + species_max_values = _calculate_species_max_values( + synthesised_rows, + value_column=value_column, + ) + + grouped_values: dict[tuple[int, int], list[float]] = defaultdict(list) + + for row in synthesised_rows: + species = row.get("Species") + if not species or species not in species_to_cluster: + continue + + month = _parse_month(row.get(month_column)) + if month is None: + continue + + raw_value = _parse_float(row.get(value_column)) + if raw_value is None: + continue + + max_value = species_max_values.get(species) + if not max_value or max_value <= 0: + continue + + cluster_id = species_to_cluster[species] + normalised_value = raw_value / max_value + + grouped_values[(cluster_id, month)].append(normalised_value) + + output_clusters: list[dict[str, Any]] = [] + + for cluster in sorted( + calendar_clusters.get("clusters", []), + key=lambda c: c.get("cluster_id", 0), + ): + cluster_id = cluster["cluster_id"] + + monthly_activity: list[dict[str, Any]] = [] + + for month in range(1, 13): + values = grouped_values.get((cluster_id, month), []) + mean_activity = sum(values) / len(values) if values else 0.0 + + monthly_activity.append( + { + "month": month, + "month_name": month_label(month), + "mean_activity": round(mean_activity, 6), + "n_species_contributing": len(values), + } + ) + + output_clusters.append( + { + "cluster_id": cluster_id, + "calendar_label": cluster.get( + "calendar_label", + f"Cluster {cluster_id}", + ), + "n_species": cluster.get( + "n_species", + len(cluster.get("species", [])), + ), + "species": cluster.get("species", []), + "monthly_activity": monthly_activity, + } + ) + + output = { + "schema_version": "seasonal-ecological-calendar-activity/v1", + "source_cluster_schema_version": calendar_clusters.get("schema_version"), + "n_clusters": len(output_clusters), + "normalisation": { + "method": "species_max", + "description": ( + "Each species is normalised to its own maximum synthesised value " + "before cluster-level monthly aggregation." + ), + "value_column": value_column, + }, + "clusters": output_clusters, + } + + write_json(output_path, output) + + return output + + +def _build_species_to_cluster_lookup(calendar_clusters: dict[str, Any]) -> dict[str, int]: + """ + Build a lookup between a species and the ID for the cluster it belongs to + + :param calendar_clusters: Extracted cluser information taking from the clustering analysis + :return: Dictionary of species/cluster ID mappings + """ + species_to_cluster: dict[str, int] = {} + + for cluster in calendar_clusters.get("clusters", []): + cluster_id = cluster["cluster_id"] + + for species in cluster.get("species", []): + species_to_cluster[species] = cluster_id + + return species_to_cluster + + +def _calculate_species_max_values(synthesised_rows: list[dict[str, Any]], value_column: str) -> dict[str, float]: + """ + Build a lookup between a species and its maximum activity value + + :param synthesised_rows: Simulated output for all species scaled to observed data scale + :return: Dictionary of species/maximum activity mappings + """ + species_max_values: dict[str, float] = {} + + for row in synthesised_rows: + species = row.get("Species") + value = _parse_float(row.get(value_column)) + + if not species or value is None: + continue + + current_max = species_max_values.get(species, 0.0) + species_max_values[species] = max(current_max, value) + + return species_max_values + + +def _parse_month(value: Any) -> int | None: + """ + Parse a month number from a value + + :param value: Value to parse + :return: Month number or None + """ + try: + month = int(value) + except (TypeError, ValueError): + return None + + if 1 <= month <= 12: + return month + + return None + + +def _parse_float(value: Any) -> float | None: + """ + Parse a float from a value + + :param value: Value to parse + :return: Floating point value or None + """ + try: + return float(value) + except (TypeError, ValueError): + return None diff --git a/modelling/src/seasonal/calendar/extractor.py b/modelling/src/seasonal/calendar/extractor.py new file mode 100644 index 0000000..71e5c2b --- /dev/null +++ b/modelling/src/seasonal/calendar/extractor.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +def extract_calendar_cluster_metadata( + cluster_analysis: dict[str, Any], + output_path: str | Path, +) -> dict[str, Any]: + """ + Extract lightweight cluster metadata for seasonal ecological calendar generation + + This deliberately keeps only the fields needed to group species into calendar neighbourhoods + and label them for downstream visualisation + + :param cluster_analysis: Data loaded from th cluster analysis JSON output + :return: Simplified, extracted information + """ + + clusters = cluster_analysis.get("clusters", []) + + extracted_clusters: list[dict[str, Any]] = [] + + for cluster in clusters: + species = cluster.get("species", []) + + extracted_clusters.append( + { + "cluster_id": cluster.get("cluster_id"), + "calendar_label": _suggest_calendar_label(cluster), + "description": cluster.get("description", ""), + "n_species": cluster.get("n_species", len(species)), + "species": species, + } + ) + + extracted = { + "schema_version": "seasonal-ecological-calendar-clusters/v1", + "source_schema_version": cluster_analysis.get("schema_version"), + "source_created_utc": cluster_analysis.get("created_utc"), + "n_species": cluster_analysis.get("n_species"), + "n_clusters": cluster_analysis.get("n_clusters", len(extracted_clusters)), + "cluster_caveat": ( + cluster_analysis + .get("method", {}) + .get("cluster_caveat", "") + ), + "clusters": extracted_clusters, + } + + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with output_path.open("w", encoding="utf-8") as f: + json.dump(extracted, f, indent=2, ensure_ascii=False) + + return extracted + + +def _suggest_calendar_label(cluster: dict[str, Any]) -> str: + """ + Generate a short, readable calendar label from cluster metadata + + :param cluster: Dictionary containing the cluster properties for a single cluster + :return: Suggested cluster name + """ + + species = cluster.get("species", []) + n_species = cluster.get("n_species", len(species)) + dominant_class = cluster.get("dominant_primary_class") + + if n_species == 1 and species: + return f"{species[0]} neighbourhood" + + if dominant_class: + return dominant_class.replace("_", " ").title() + + cluster_id = cluster.get("cluster_id", "unknown") + return f"Cluster {cluster_id}" diff --git a/modelling/src/seasonal/calendar/heatmap.py b/modelling/src/seasonal/calendar/heatmap.py new file mode 100644 index 0000000..44a9b62 --- /dev/null +++ b/modelling/src/seasonal/calendar/heatmap.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +import matplotlib.pyplot as plt +import numpy as np + + +MONTH_NAMES = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] + + +def plot_neighbourhood_calendar_heatmap( + calendar_activity: dict[str, Any], + output_path: str | Path, + title: str = "Seasonal Ecological Calendar", +) -> None: + """ + Plot an analytical neighbourhood x month heatmap from a seasonal ecological + calendar activity artefact + + :param calendar_activity: Neighbourhood calendar activity dictionary + :param output_path: Path to the output activity JSON file to create + """ + + from matplotlib.gridspec import GridSpec + + clusters = calendar_activity.get("clusters", []) + + if not clusters: + raise ValueError("calendar_activity contains no clusters") + + labels: list[str] = [] + matrix: list[list[float]] = [] + + for cluster in clusters: + cluster_id = cluster.get("cluster_id") + # labels.append(f"Cluster {cluster_id}") + labels.append(cluster_id) + + month_values = [0.0] * 12 + + for item in cluster.get("monthly_activity", []): + month = int(item["month"]) + if 1 <= month <= 12: + month_values[month - 1] = float( + item.get("mean_activity", 0.0) + ) + + matrix.append(month_values) + + data = np.array(matrix) + + fig_height = max(5.5, 0.55 * len(labels) + 2.2) + + fig = plt.figure(figsize=(14, fig_height)) + + gs = GridSpec( + 2, + 1, + height_ratios=[4, 1.35], + hspace=0.22, + figure=fig, + ) + + ax = fig.add_subplot(gs[0]) + legend_ax = fig.add_subplot(gs[1]) + + im = ax.imshow( + data, + aspect="auto", + vmin=0, + vmax=1, + cmap="YlOrRd" + ) + + ax.set_xticks(range(12)) + ax.set_xticklabels(MONTH_NAMES) + + ax.set_yticks(range(len(labels))) + ax.set_yticklabels(labels) + + ax.set_xlabel("Month") + ax.set_ylabel("Cluster") + ax.set_title(title) + + cbar = fig.colorbar( + im, + ax=ax, + fraction=0.035, + pad=0.035, + ) + cbar.set_label("Mean normalised activity") + + legend_ax.axis("off") + + legend_lines = ["Cluster Descriptions:", "\n"] + + for cluster in clusters: + cluster_id = cluster.get("cluster_id") + label = cluster.get("calendar_label", cluster_id) + legend_lines.append(f"{cluster_id}: {label}") + + legend_text = "\n".join(legend_lines) + + legend_ax.text( + 0.0, + 1.0, + legend_text, + fontsize=10, + va="top", + ha="left", + transform=legend_ax.transAxes, + ) + + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + fig.savefig( + output_path, + dpi=200, + bbox_inches="tight", + ) + + plt.close(fig) diff --git a/modelling/src/seasonal/calendar/loader.py b/modelling/src/seasonal/calendar/loader.py new file mode 100644 index 0000000..cbcc8b0 --- /dev/null +++ b/modelling/src/seasonal/calendar/loader.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import csv +from pathlib import Path +from typing import Any + +SUFFIX = "_synthesised" + + +def load_synthesised_species_data(folders: list[str | Path]) -> list[dict[str, Any]]: + """ + Load and combine all _synthesised.csv files from the supplied folders. Adds a canonical + 'Species' field derived from the filename + + :param folders: List of folder paths containing simulated CSV files + """ + all_rows: list[dict[str, Any]] = [] + + for folder in folders: + folder = Path(folder) + + for csv_path in sorted(folder.glob(f"*{SUFFIX}.csv")): + species_name = _species_name_from_filename(csv_path) + + with csv_path.open("r", encoding="utf-8") as f: + reader = csv.DictReader(f) + + for row in reader: + row["Species"] = species_name + all_rows.append(row) + + return all_rows + + +def _species_name_from_filename(path: Path) -> str: + """ + Convert a filename _synthesised.csv to a species name. The element + if lowercase and uses "_" as the word separator rather than a space + + :param path: Path to the CSV file + :return: Species name + """ + + stem = path.stem + + if stem.endswith(f"{SUFFIX}"): + stem = stem[:-len(SUFFIX)] + + return stem.replace("_", " ").title() diff --git a/modelling/src/seasonal/similarity/__init__.py b/modelling/src/seasonal/similarity/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modelling/src/seasonal/features/similarity_clusters.py b/modelling/src/seasonal/similarity/clustering.py similarity index 99% rename from modelling/src/seasonal/features/similarity_clusters.py rename to modelling/src/seasonal/similarity/clustering.py index fd24fac..4851d8a 100644 --- a/modelling/src/seasonal/features/similarity_clusters.py +++ b/modelling/src/seasonal/similarity/clustering.py @@ -8,8 +8,8 @@ import numpy as np from scipy.cluster.hierarchy import fcluster -from seasonal.features.clustering import order_species_by_linkage, serialise_linkage_matrix -from seasonal.features.species_similarity import build_similarity_matrix, extract_species_names +from seasonal.support.clustering import order_species_by_linkage, serialise_linkage_matrix +from seasonal.similarity.similarity import build_similarity_matrix, extract_species_names from seasonal.support.numeric import round_float, safe_float from seasonal.support.calendar import circular_month_mean, signed_circular_month_difference, month_label diff --git a/modelling/src/seasonal/features/similarity_dendrogram.py b/modelling/src/seasonal/similarity/dendrogram.py similarity index 99% rename from modelling/src/seasonal/features/similarity_dendrogram.py rename to modelling/src/seasonal/similarity/dendrogram.py index bbc2788..9f7b11b 100644 --- a/modelling/src/seasonal/features/similarity_dendrogram.py +++ b/modelling/src/seasonal/similarity/dendrogram.py @@ -12,7 +12,7 @@ def _first_sentence(text: str) -> str: """ Extract the first sentence, excluding trainling full-stop, from a cluster description - + :param text: Full text :return: First sentence of the text """ @@ -66,7 +66,6 @@ def plot_species_cluster_dendrogram( title: str = "Species Similarity Dendrogram", colour_clusters: bool = True ) -> None: - # Load the data and extract the species and cluster details linkage_info = cluster_data.get("linkage", {}) linkage_matrix = np.asarray(linkage_info.get("matrix"), dtype=float) diff --git a/modelling/src/seasonal/features/similarity_heatmap.py b/modelling/src/seasonal/similarity/heatmap.py similarity index 95% rename from modelling/src/seasonal/features/similarity_heatmap.py rename to modelling/src/seasonal/similarity/heatmap.py index 9202f18..d6f12c9 100644 --- a/modelling/src/seasonal/features/similarity_heatmap.py +++ b/modelling/src/seasonal/similarity/heatmap.py @@ -4,8 +4,8 @@ from typing import Any, Dict, Optional, Tuple import matplotlib.pyplot as plt import numpy as np -from seasonal.features.clustering import order_species_by_linkage -from seasonal.features.species_similarity import build_similarity_matrix, extract_species_names +from seasonal.support.clustering import order_species_by_linkage +from seasonal.similarity.similarity import build_similarity_matrix, extract_species_names def generate_species_similarity_heatmap( diff --git a/modelling/src/seasonal/features/species_similarity.py b/modelling/src/seasonal/similarity/similarity.py similarity index 100% rename from modelling/src/seasonal/features/species_similarity.py rename to modelling/src/seasonal/similarity/similarity.py diff --git a/modelling/src/seasonal/features/clustering.py b/modelling/src/seasonal/support/clustering.py similarity index 100% rename from modelling/src/seasonal/features/clustering.py rename to modelling/src/seasonal/support/clustering.py