JosephPBaruch · JosephPBaruch · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -25,6 +25,7 @@ jobs:
             *.cache-to=type=gha,mode=max
         env:
           OPENTOPOGRAPHY_API_KEY: ${{ secrets.OPENTOPOGRAPHY_API_KEY }}
+          CACHEBUST: ${{ github.sha }}
 
       - name: Start services
         run: docker compose up -d --wait

diff --git a/backend/Dockerfile b/backend/Dockerfile
@@ -21,13 +21,18 @@ COPY . .
 # Train the yield prediction model at build time so it is always embedded in
 # the image and stays in sync with the code/data.  SECRET_KEY is a throwaway
 # value used only to satisfy django.setup(); no database is needed.
+# The training script enforces a minimum R-squared threshold and will exit
+# non-zero if the model does not meet the required accuracy.
+# CACHEBUST: pass a unique value (e.g. git SHA, timestamp) to force
+# retraining even when no source files have changed.
 ARG OPENTOPOGRAPHY_API_KEY=keykey
+ARG CACHEBUST
 RUN cd YieldPredictionModel \
     && SECRET_KEY=build-only-key \
        OPENTOPOGRAPHY_API_KEY=${OPENTOPOGRAPHY_API_KEY} \
        python -u CreateAndTrainYieldCalculatorModel.py \
     && test -f Models/yield_model.keras \
-    || { echo "ERROR: Model training failed - yield_model.keras not created"; exit 1; }
+    || { echo "ERROR: Model training failed - check accuracy report above for details"; exit 1; }
 
 # Drop root
 RUN useradd -m appuser && chown -R appuser /app

diff --git a/backend/YieldPredictionModel/CreateAndTrainYieldCalculatorModel.py b/backend/YieldPredictionModel/CreateAndTrainYieldCalculatorModel.py
@@ -24,6 +24,12 @@
 from helpers import Create_Model, encode, save_to_csv
 from core.services import create_charai_data
 
+# Minimum R-squared the trained model must achieve on the held-out test set.
+# If the model scores below this value, the script exits with a non-zero code
+# so that Docker builds and CI pipelines fail early with a clear error.
+# Adjust this threshold as training data or model architecture improves.
+MIN_R2_THRESHOLD = 0.0
+
 harvest_file_name_total = "./Data/CookHandHarvest_HY1999-HY2016_P3A3_20241029(in).csv"
 tiff_file_path = "./Data/tiff.tif"
 
@@ -41,24 +47,33 @@
 )
 
 # ---------- Cook Harvest Prep ----------
-logger.info("getting harvest")
+logger.info("Loading harvest data")
 harvest = pd.read_csv(harvest_file_name_total)
+logger.info("Raw harvest rows: %d, columns: %d", *harvest.shape)
 
-# print(harvest["Crop"].unique())
-
-# drop any columns that are more than 1000 missing values
+# Drop columns where more than 1000 values are missing -- these columns
+# are too sparse to be useful for training.
 harvest = harvest.loc[:, harvest.isna().sum() <= 1000]
 
-# Drop unnecessary "SampleID" Column
+# Drop unnecessary "SampleID" column
 harvest.drop(columns=["SampleID"], inplace=True)
 
-# Drop rows with missing "Crop" or "GrainYieldAirDry" Values
+# Drop rows with any remaining missing values (Crop, GrainYieldAirDry, etc.)
 harvest = harvest.dropna()
-harvest.isna().sum()
 
-# Drop columns: QCCoverage, QCFlags, CropExists
+# Drop metadata columns not used for training
 harvest.drop(columns=["QCCoverage", "QCFlags", "CropExists", "ID2", "HarvestYear"], inplace=True)
 
+# Remove rows where the crop failed or was not harvested (zero yield).
+# These represent planting failures, not valid yield observations, and
+# would bias the model toward predicting lower yields.
+zero_yield_count = (harvest["GrainYieldAirDry"] <= 0).sum()
+if zero_yield_count > 0:
+    logger.info("Removing %d rows with zero/negative yield (failed crops)", zero_yield_count)
+    harvest = harvest[harvest["GrainYieldAirDry"] > 0]
+
+logger.info("Cleaned harvest rows: %d", len(harvest))
+
 # ---------- Get CharAI Generated Data ----------
 
 charai = create_charai_data(logger, cook_farm_coords, tiff_file_path)
@@ -146,50 +161,33 @@
 rmse = mse ** 0.5
 r2 = r2_score(y_test, y_pred)
 
-# date = date
-# time = time
+# ---------- Accuracy Report ----------
+logger.info("--- Model Accuracy Report ---")
+logger.info("  Test Loss (MSE) : %.4f", test_loss)
+logger.info("  Test MAE        : %.4f", test_mae)
+logger.info("  RMSE            : %.4f", rmse)
+logger.info("  R-squared (R2)  : %.4f", r2)
+logger.info("  Min R2 Threshold: %.4f", MIN_R2_THRESHOLD)
+logger.info("  Training rows   : %d", len(X_train))
+logger.info("  Test rows       : %d", len(X_test))
+logger.info("  Features        : %s", ", ".join(YieldCalculator.MODEL_FEATURE_COLUMNS))
+logger.info("--- End Accuracy Report ---")
+
+# ---------- Accuracy Gate ----------
+if r2 < MIN_R2_THRESHOLD:
+    logger.error(
+        "ACCURACY CHECK FAILED: R2=%.4f is below the minimum threshold of %.4f. "
+        "The model does not meet the required accuracy for deployment. "
+        "Review training data, feature engineering, or model architecture. "
+        "To adjust the threshold, update MIN_R2_THRESHOLD in this script.",
+        r2,
+        MIN_R2_THRESHOLD,
+    )
+    sys.exit(1)
+
+logger.info("Accuracy check passed (R2=%.4f >= %.4f)", r2, MIN_R2_THRESHOLD)
 
 Path("./Models").mkdir(parents=True, exist_ok=True)
 model.save("./Models/yield_model.keras")
 logger.info("Model saved to ./Models/yield_model.keras")
 
-# model.save("./Models/yield_model{date}_{time}.keras")
-
-# ---------- Feature Sensitivity Analysis ----------
-# For each feature, perturb it by a small delta and measure how predicted
-# yield changes.  This tells us the direction and magnitude of each
-# feature's influence, which informs the biochar feature adjustments in
-# _calculate_biochar_yield.
-
-logger.info("--- Feature Sensitivity Analysis ---")
-
-feature_columns = YieldCalculator.MODEL_FEATURE_COLUMNS
-baseline_features = X_test.to_numpy(dtype=np.float32)
-baseline_preds = model.predict(baseline_features, verbose=0).flatten()
-
-# Use 1 % of each feature's standard deviation as the perturbation step.
-# For features with zero std (constant), fall back to 1 % of the mean or 0.01.
-PERTURBATION_FRACTION = 0.01
-
-for i, col in enumerate(feature_columns):
-    std = X_test[col].std()
-    mean = X_test[col].mean()
-    delta = std * PERTURBATION_FRACTION if std > 0 else abs(mean) * PERTURBATION_FRACTION or 0.01
-
-    perturbed = baseline_features.copy()
-    perturbed[:, i] += delta
-
-    perturbed_preds = model.predict(perturbed, verbose=0).flatten()
-    mean_yield_change = (perturbed_preds - baseline_preds).mean()
-    sensitivity = mean_yield_change / delta  # yield change per unit feature change
-
-    direction = "INCREASE" if sensitivity > 0 else "DECREASE"
-
-    logger.info(
-        f"  {col:25s} | std={std:.4f} mean={mean:.4f} delta={delta:.6f} "
-        f"| avg yield Δ = {mean_yield_change:+.4f} | sensitivity = {sensitivity:+.4f}/unit "
-        f"| To boost yield: {direction} this feature"
-    )
-
-logger.info("--- End Sensitivity Analysis ---")
-
diff --git a/backend/YieldPredictionModel/README.md b/backend/YieldPredictionModel/README.md
@@ -139,6 +139,95 @@ Export to Colab and run a script to train the model.
 
 Import the model into repo
 
+## Model Accuracy Requirements
+
+The training script (`CreateAndTrainYieldCalculatorModel.py`) enforces a minimum
+model accuracy on every run. If the trained model does not meet the threshold,
+the script exits with a non-zero code, which causes Docker builds and CI
+pipelines to fail with a clear error message.
+
+### Current Threshold
+
+| Metric       | Minimum Value | Notes                                        |
+| ------------ | ------------- | -------------------------------------------- |
+| R-squared    | 0.0           | Measured on 20% held-out test set            |
+
+The threshold is defined as `MIN_R2_THRESHOLD` at the top of the training
+script. It can be raised as the model architecture, feature set, or training
+data improves.
+
+### Why R-squared?
+
+R-squared (coefficient of determination) measures the proportion of variance in
+the target variable (`GrainYieldAirDry`) explained by the model. The model uses
+crop type and four terrain features (`Crop`, `elev_mean_m`, `slope_mean_deg`,
+`aspect_eastness`, `aspect_northness`). The threshold is a floor to catch
+broken training runs rather than a quality target.
+
+### What Happens on Failure
+
+When R-squared falls below the threshold the training script:
+1. Logs an `ERROR`-level message with the actual vs. required R-squared.
+2. Exits with code 1.
+3. The Docker build step in `Dockerfile` detects the non-zero exit and aborts.
+4. CI reports the failure in the build logs with a descriptive error.
+
+### How to Update the Threshold
+
+1. Open `CreateAndTrainYieldCalculatorModel.py`.
+2. Change `MIN_R2_THRESHOLD` to the new value.
+3. Rebuild the Docker image locally (`docker compose build backend`) to verify
+   the model still passes.
+4. Commit and push -- CI will enforce the new threshold automatically.
+
+### Accuracy Report in Logs
+
+Every training run prints an accuracy report to stdout/stderr:
+```
+--- Model Accuracy Report ---
+  Test Loss (MSE) : <value>
+  Test MAE        : <value>
+  RMSE            : <value>
+  R-squared (R2)  : <value>
+  Min R2 Threshold: <value>
+  Training rows   : <count>
+  Test rows       : <count>
+  Features        : Crop, elev_mean_m, slope_mean_deg, aspect_eastness, aspect_northness
+--- End Accuracy Report ---
+```
+
+### Data Quality -- Cook Harvest
+
+The training script cleans the Cook Farm harvest data before training:
+- Columns with more than 1000 missing values are dropped (too sparse).
+- Rows with any remaining missing values are removed.
+- Metadata columns (`SampleID`, `QCCoverage`, `QCFlags`, `CropExists`, `ID2`,
+  `HarvestYear`) are dropped.
+- **Zero-yield rows are removed.** These represent planting failures or
+  unharvested samples and would bias the model toward predicting lower yields.
+
+### Model Features
+
+The model uses five features (defined in `YieldCalculator.MODEL_FEATURE_COLUMNS`):
+
+| Feature            | Type        | Range         | Source         |
+| ------------------ | ----------- | ------------- | -------------- |
+| `Crop`             | Categorical | 0-11 (encoded)| Harvest data   |
+| `elev_mean_m`      | Continuous  | ~750-800      | DEM/GeoParser  |
+| `slope_mean_deg`   | Continuous  | ~0-20         | DEM/GeoParser  |
+| `aspect_eastness`  | Continuous  | -1 to 1       | DEM/GeoParser  |
+| `aspect_northness` | Continuous  | -1 to 1       | DEM/GeoParser  |
+
+**Crop encoding** uses a fixed alphabetical mapping defined in
+`YieldCalculator.CROP_ENCODING`. This must stay consistent between training and
+inference. Do not use `sklearn.LabelEncoder` directly as it produces different
+encodings depending on which values it sees.
+
+**Feature scaling** is handled by a `BatchNormalization` layer at the front of
+the neural network. The learned normalization parameters are stored inside the
+`.keras` model file, so inference automatically applies the same scaling without
+a separate scaler artifact.
+
 ## Model Artifacts in CI
 
 Model files (`*.keras`) generated by `CreateAndTrainYieldCalculatorModel.py` are

diff --git a/backend/YieldPredictionModel/helpers.py b/backend/YieldPredictionModel/helpers.py
@@ -2,29 +2,44 @@
 import pandas as pd
 import numpy as np
 import keras 
-from sklearn.preprocessing import LabelEncoder
 
 def save_to_csv(path: str, df: pd.DataFrame):
     out = Path(path)
     out.parent.mkdir(parents=True, exist_ok=True)
     df.to_csv(out, index=False)
 
 def encode(df):
-    le = LabelEncoder()
+    """Encode the Crop column to integers using the fixed mapping from YieldCalculator."""
+    from modules.Calculator import YieldCalculator
+
     crop_values = df["Crop"]
 
     # Handle duplicate "Crop" columns by using the first one.
     if isinstance(crop_values, pd.DataFrame):
         crop_values = crop_values.iloc[:, 0]
 
-    df["Crop"] = le.fit_transform(crop_values.astype(str))
+    encoded = crop_values.astype(str).map(YieldCalculator.CROP_ENCODING)
+    unknown = encoded.isna()
+    if unknown.any():
+        bad_codes = crop_values[unknown].unique().tolist()
+        raise ValueError(
+            f"Unknown crop code(s): {bad_codes}. "
+            f"Valid codes: {sorted(YieldCalculator.CROP_ENCODING.keys())}"
+        )
+    df["Crop"] = encoded
 
 def Create_Model(input_dim):
     model = keras.models.Sequential()
 
     # Input layer
     model.add(keras.layers.Input(shape=(input_dim,)))
 
+    # Normalize features so the network sees approximately zero-mean,
+    # unit-variance inputs regardless of original scale (e.g. elevation
+    # ~750-800 vs aspect ~-1..1).  The learned statistics are stored in
+    # the .keras file so inference applies the same normalization.
+    model.add(keras.layers.BatchNormalization())
+
     # Hidden layers
     model.add(keras.layers.Dense(
         units=128,
@@ -41,21 +56,19 @@ def Create_Model(input_dim):
         activation='relu',
         kernel_initializer='he_normal'
     ))
-    # Optional regularization (uncomment if you see overfitting)
-    # model.add(keras.layers.Dropout(0.2))
 
     # Output layer for regression (one continuous value)
     model.add(keras.layers.Dense(
         units=1,
-        activation='linear',          # << no activation = regression
+        activation='linear',
         kernel_initializer='glorot_uniform'
     ))
 
     # Compile for regression
     model.compile(
         optimizer=keras.optimizers.Adam(learning_rate=1e-3),
-        loss='mse',                   # mean squared error
-        metrics=['mae']               # mean absolute error
+        loss='mse',
+        metrics=['mae']
     )
 
     return model
diff --git a/backend/modules/Calculator/test_calculator.py b/backend/modules/Calculator/test_calculator.py
@@ -55,6 +55,7 @@
                 'centroid_lat': centroid_lat,
                 'centroid_lon': centroid_lon,
                 'pixel_count': 4,
+                'Crop': 'WW',
                 'elev_mean_m': elev_mean,
                 'elev_min_m': elev_mean - 1,
                 'elev_max_m': elev_mean + 1,

diff --git a/backend/modules/Calculator/yield_calculator.py b/backend/modules/Calculator/yield_calculator.py
@@ -24,11 +24,20 @@ class YieldCalculator:
 
     MODEL_LOCATION_ENV_VAR = "MODEL_LOCATION"
     MODEL_FEATURE_COLUMNS = [
+        "Crop",
         "elev_mean_m",
         "slope_mean_deg",
         "aspect_eastness",
         "aspect_northness",
     ]
+
+    # Fixed crop-to-integer mapping for deterministic encoding across
+    # training and inference.  Alphabetically sorted codes from the Cook
+    # Farm training CSV.  Must stay in sync with crop_types.py.
+    CROP_ENCODING = {
+        "AL": 0, "GB": 1, "SB": 2, "SC": 3, "SP": 4, "SW": 5,
+        "WB": 6, "WC": 7, "WL": 8, "WP": 9, "WT": 10, "WW": 11,
+    }
 
     # Base yield parameters (yield (idk what unit) per acre equivalent per grid cell)
     BASE_YIELD = 50.0
@@ -156,7 +165,20 @@ def _calculate(self, df):
                 f"Model not loaded. Set {self.MODEL_LOCATION_ENV_VAR} to a valid model path and initialize with fetch_model=True."
             )
 
-        features = df.loc[:, self.MODEL_FEATURE_COLUMNS].to_numpy(dtype=np.float32)
+        features = df.loc[:, self.MODEL_FEATURE_COLUMNS].copy()
+
+        # Encode Crop to its integer code when the column is still strings.
+        if features["Crop"].dtype == object:
+            features["Crop"] = features["Crop"].map(self.CROP_ENCODING)
+            unknown = features["Crop"].isna()
+            if unknown.any():
+                bad_codes = df.loc[unknown.values, "Crop"].unique().tolist()
+                raise ValueError(
+                    f"Unknown crop code(s): {bad_codes}. "
+                    f"Valid codes: {sorted(self.CROP_ENCODING.keys())}"
+                )
+
+        features = features.to_numpy(dtype=np.float32)
         expected_dim = self.model.input_shape[-1]
         if expected_dim is not None and features.shape[1] != expected_dim:
             raise ValueError(

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -19,6 +19,7 @@ services:
       context: ./backend
       args:
         OPENTOPOGRAPHY_API_KEY: ${OPENTOPOGRAPHY_API_KEY:-keykey}
+        CACHEBUST: ${CACHEBUST:-}
     image: django-backend
     container_name: django-backend
     depends_on: