From 7a90cc2a0025d4d63a6d77288121085a6a3a9052 Mon Sep 17 00:00:00 2001 From: karteekiitg <120569182+karteekiitg@users.noreply.github.com> Date: Tue, 20 May 2025 02:25:11 +0530 Subject: [PATCH 1/4] feat: Cloudflare DNS ad-blocking Signed-off-by: Karteek <120569182+karteekiitg@users.noreply.github.com> --- .github/workflows/cf_adblock.yaml | 122 +++++ .gitignore | 2 + tofu/cf-adblock/DOCS.md | 163 ++++++ tofu/cf-adblock/README.md | 58 ++ tofu/cf-adblock/adblock_urls.txt | 6 + tofu/cf-adblock/backend.tofu | 5 + tofu/cf-adblock/chunk_adblock_lists.sh | 269 +++++++++ .../cloudflare_zero_trust_dns_location.tofu | 42 ++ .../cloudflare_zero_trust_gateway_policy.tofu | 18 + tofu/cf-adblock/manage_cloudflare_adblock.py | 518 ++++++++++++++++++ tofu/cf-adblock/providers.tofu | 32 ++ tofu/cf-adblock/variables.tofu | 21 + 12 files changed, 1256 insertions(+) create mode 100644 .github/workflows/cf_adblock.yaml create mode 100644 tofu/cf-adblock/DOCS.md create mode 100644 tofu/cf-adblock/README.md create mode 100644 tofu/cf-adblock/adblock_urls.txt create mode 100644 tofu/cf-adblock/backend.tofu create mode 100644 tofu/cf-adblock/chunk_adblock_lists.sh create mode 100644 tofu/cf-adblock/cloudflare_zero_trust_dns_location.tofu create mode 100644 tofu/cf-adblock/cloudflare_zero_trust_gateway_policy.tofu create mode 100644 tofu/cf-adblock/manage_cloudflare_adblock.py create mode 100644 tofu/cf-adblock/providers.tofu create mode 100644 tofu/cf-adblock/variables.tofu diff --git a/.github/workflows/cf_adblock.yaml b/.github/workflows/cf_adblock.yaml new file mode 100644 index 000000000..2fe8f1d6c --- /dev/null +++ b/.github/workflows/cf_adblock.yaml @@ -0,0 +1,122 @@ +name: Monthly Cloudflare Adblock Update + +on: + workflow_dispatch: # Allows manual triggering + schedule: + - cron: "0 0 1 * *" # Runs at 00:00 UTC on the 1st day of every month + +env: + TF_VAR_gcs_env: prod + +permissions: + contents: read + id-token: write + +jobs: + update_cf_adblock: + runs-on: ubuntu-latest + container: + image: ghcr.io/karteekiitg/k8s_setup:latest + + steps: + - name: Checkout repository + id: checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - name: Load .env file to environment + shell: bash + run: | + if [ -f "./.env" ]; then + echo "Sourcing .env file..." + grep -v '^[[:space:]]*#' ./.env | grep -v '^[[:space:]]*$' | grep '=' >> $GITHUB_ENV + echo "Finished processing .env file for GITHUB_ENV." + else + echo -e "\033[31mError: .env file not found at ./.\033[0m" + exit 1 + fi + + - name: Load secrets to environment + shell: bash + env: # Environment variables specific to THIS step + TF_VAR_infisical_client_secret: ${{ secrets.INFISICAL_CLIENT_SECRET }} + run: | + echo "Making setup_infisical.sh executable..." + chmod +x ./.devcontainer/setup_infisical.sh + echo "Running setup_infisical.sh..." + ./.devcontainer/setup_infisical.sh + if [ $? -ne 0 ]; then + echo -e "\033[31mError: setup_infisical.sh failed. See script output above for details.\033[0m" + exit 1 + fi + + EXPORT_FILE="$HOME/.infisical_exports.env" + + if [ -f "$EXPORT_FILE" ]; then + echo "Sourcing secrets from $EXPORT_FILE to GITHUB_ENV (filtering, handling 'export' prefix, and stripping quotes)..." + + # Pre-filter with grep to remove comments and truly empty lines, ensure '=' exists + # Then pipe into the while loop for further processing + grep -v '^[[:space:]]*#' "$EXPORT_FILE" | grep -v '^[[:space:]]*$' | grep '=' | \ + while IFS= read -r line || [ -n "$line" ]; do # Read whole line + # Remove "export " prefix if it exists from the already filtered line + line_no_export="${line#export }" + + # At this point, 'line_no_export' should be in KEY=VALUE format + # (possibly with quotes around VALUE) because of the preceding grep filters. + # We still split to handle the value quoting. + + key="${line_no_export%%=*}" + value_with_potential_quotes="${line_no_export#*=}" + + # Remove leading/trailing single quotes from value_with_potential_quotes + value_cleaned="${value_with_potential_quotes#\'}" + value_cleaned="${value_cleaned%\'}" + # Remove leading/trailing double quotes from value_with_potential_quotes + value_cleaned="${value_cleaned#\"}" + value_cleaned="${value_cleaned%\"}" + + echo "$key=$value_cleaned" >> $GITHUB_ENV + done + + echo "Finished processing $EXPORT_FILE for GITHUB_ENV." + echo "Removing $EXPORT_FILE..." + rm -f "$EXPORT_FILE" + else + echo -e "\033[31mError: Secrets export file ($EXPORT_FILE) was not found after running setup_infisical.sh.\033[0m" + exit 1 + fi + echo "Secrets loaded and temporary file removed." + + - name: Authenticate to Google Cloud + id: google-auth + uses: google-github-actions/auth@ba79af03959ebeac9769e648f473a284504d9193 + with: + workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }} # Now from Infisical via env + service_account: ${{ env.GCP_SERVICE_ACCOUNT_EMAIL }} # Now from Infisical via env + + - name: Run Adblock List Chunking Script + run: bash chunk_adblock_lists.sh 1000 90 + working-directory: ./tofu/cf-adblock # Ensures script is run in the correct context + + - name: OpenTofu Init for cf-adblock + run: tofu init + working-directory: ./tofu/cf-adblock + + - name: OpenTofu Apply for cf-adblock + id: apply_cf_adblock + shell: bash + run: tofu apply -auto-approve + working-directory: ./tofu/cf-adblock + + - name: Install Python dependencies + shell: bash + run: | + echo "Installing cloudflare Python library..." + pip3 install cloudflare + + - name: Run Cloudflare Adblock Management Script + shell: bash + run: | + echo "Running Python script manage_cloudflare_adblock.py..." + python3 manage_cloudflare_adblock.py 1000 90 + working-directory: ./tofu/cf-adblock # Runs Python script from the same dir as chunker & TF diff --git a/.gitignore b/.gitignore index 495d3f8ae..97084e5ed 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ override.tf.json *.pem *.crt + +processed_adblock_chunks diff --git a/tofu/cf-adblock/DOCS.md b/tofu/cf-adblock/DOCS.md new file mode 100644 index 000000000..02167e8d4 --- /dev/null +++ b/tofu/cf-adblock/DOCS.md @@ -0,0 +1,163 @@ +# Cloudflare Adblock & Malware DNS Filtering - Detailed Documentation + +This document provides detailed explanations of the Cloudflare Adblock & Malware DNS Filtering setup, including its components, architecture, rationale, and execution steps. For a quick overview and getting started, please refer to the main [README.md](../README.md). + +## Overview + +This project enhances network security and user experience by filtering unwanted content at the DNS level using Cloudflare Zero Trust Gateway DNS policies. It employs a hybrid approach, combining OpenTofu for managing core infrastructure resources with a Python script and shell scripting for the dynamic management of large adblock domain lists and their associated Cloudflare policy. + +## Key Components & Functionality + +1. **`adblock_urls.txt`**: + + - Contains URLs to external ad/malware domain lists (e.g., Hagezi). Each line should be a single URL. Lines starting with `#` are treated as comments and ignored. Empty lines are also ignored. This file is the primary source for defining which external lists are used to build the Cloudflare adblock lists. You can add or remove list URLs here to change the sources. + +2. **`chunk_adblock_lists.sh` (Shell Script)**: + + - **Purpose**: This script automates the process of downloading, consolidating, cleaning, sorting, and chunking the domain lists from the URLs specified in `adblock_urls.txt`. It handles potential duplicates by creating a unique sorted list before splitting. It splits the large list into smaller files (e.g., `adblock_chunk_000.txt`, `adblock_chunk_001.txt`, etc.) in the `./processed_adblock_chunks/` directory. This chunking is essential to comply with Cloudflare Zero Trust list item limits (currently 1000 items per list on the free tier). The script uses hashing and deterministic spillover to ensure that the mapping of domains to chunk files remains consistent between runs, even if the source lists have minor changes, minimizing unnecessary updates to Cloudflare lists. + - **Usage**: This script is executed by the GitHub Action before the Python management script runs. It can also be run manually from the `tofu/cf-adblock/` directory (`bash ./chunk_adblock_lists.sh `) to prepare the domain data locally. + +3. **OpenTofu Configuration (`.tofu` files in `./tofu/cf-adblock/`)**: + + - Manages the core Cloudflare Zero Trust infrastructure resources that are relatively static, have complex interdependencies best defined declaratively with Infrastructure as Code, or manage stateful components like the GCS backend. + - **`backend.tofu`**: Configures the GCS backend for OpenTofu state management. This stores the state file (`tofu.tfstate`) in a Google Cloud Storage bucket, allowing multiple users or automated processes (like GitHub Actions) to work with the same infrastructure state securely (especially when combined with state encryption). The `prefix` (`cf-adblock/prod`) helps organize state files within the bucket. + - **`providers.tofu`**: Defines the required external providers for OpenTofu to interact with Cloudflare and potentially other services like HTTP. It specifies the source (`cloudflare/cloudflare`, `hashicorp/http`) and acceptable version constraints (`>= 5.3.0`, `>=3.5.0`). It also configures state and plan encryption using PBKDF2 and AES-GCM methods, requiring a passphrase variable (`var.tofu_encryption_passphrase`). + - **`variables.tofu`**: Defines input variables used by the OpenTofu configuration. These include sensitive variables for Cloudflare authentication (`cloudflare_secondary_account_id`, `cloudflare_secondary_api_token`, `tofu_encryption_passphrase`) and the GCS bucket name (`bucket_name`). Variable definitions specify type, description, and whether they are sensitive. Values are typically provided via environment variables (prefixed with `TF_VAR_`) or other OpenTofu input methods. + - **`cloudflare_zero_trust_gateway_policy.tofu`**: Defines a specific DNS Gateway policy resource named `block_malware`. This policy is configured to block known threats based on Cloudflare's predefined security categories using a `traffic` expression (`any(dns.security_category[*] in {...})`). This policy is distinct from the ad-blocking policy, which is managed dynamically by the Python script. + - **`cloudflare_zero_trust_dns_location.tofu`**: Sets up a custom DNS location resource (named "HomeLab") within Cloudflare Zero Trust. This resource defines the endpoints (DoH, DoT, IPv4, IPv6) that Cloudflare will provide for this location. It includes outputs (`dns_location_homelab`, `dns_location_homelab_id`) to make the dynamically assigned DNS endpoint details and the location's unique ID available after OpenTofu apply. This ID is then used by the Python script to associate the dynamically managed adblock policy with this specific location. + +4. **`manage_cloudflare_adblock.py` (Python Script)**: + - **Purpose**: This script is the core logic for handling the dynamic aspects of the adblocking setup – specifically, the creation, update, and deletion of Cloudflare Zero Trust lists populated with adblock domains, and the management of the Gateway policy that uses these lists. It interacts directly with the Cloudflare API using the `cloudflare` Python library. + - Reads the chunk files generated by `chunk_adblock_lists.sh` from the `./processed_adblock_chunks/` directory. + - Uses a hash-based approach for change detection: it calculates a hash of the sorted domains within each chunk file and compares it to a hash embedded in the description of the corresponding Cloudflare Zero Trust list. This allows the script to efficiently determine if a list's content has actually changed, avoiding unnecessary API calls for unchanged lists. + - Based on the comparison, it performs necessary operations: Creates new `cloudflare_zero_trust_list` resources for new chunk files, updates existing lists whose content has changed, and deletes lists in Cloudflare that no longer have a corresponding chunk file. + - Manages the main "Block Ads - Managed by Script" Gateway policy. It constructs the `traffic` expression for this policy to include _all_ the IDs of the `cloudflare_zero_trust_list` resources that are currently being managed by the script. + - **Usage**: This script is designed to be executed by the GitHub Action _after_ the chunking script has run and OpenTofu has applied its configuration. It takes arguments for the maximum items allowed per list and the maximum number of lists to manage, aligning with the chunking script's parameters and Cloudflare's limits. It requires Cloudflare account ID and API token, which are expected to be provided via environment variables (usually sourced from secrets). + +## Rationale for Scripted List Management + +Managing very large and frequently updated lists of domains (potentially thousands or tens of thousands) directly as items within `cloudflare_zero_trust_list` resources defined in HCL has several significant drawbacks when using OpenTofu: + +- **State File Size and Processing**: Including thousands of domain entries directly as values within resource attributes in the OpenTofu state file can make the state file extremely large. A large state file can significantly slow down OpenTofu operations (`plan`, `apply`, `state list`, `state show`, etc.), increase memory usage, and make the state file cumbersome to work with, even with remote backends. +- **Plan Complexity**: Even minor changes in the source adblock lists (adding or removing a few domains) can result in massive, complex, and difficult-to-review diffs in OpenTofu plans. This makes it hard to understand the actual changes being applied and increases the risk of overlooking unintended consequences. +- **Update Performance**: Applying changes to a single resource with a very large number of items can be slow and may occasionally hit API rate limits or timeouts. While OpenTofu providers handle API interactions, orchestrating this outside of the core HCL resource definition provides more control. +- **Limit Management**: Manually splitting a large domain list into multiple `cloudflare_zero_trust_list` resources in HCL to adhere to Cloudflare's item limits per list (e.g., 1000) is a complex and error-prone task. A script can automate the chunking based on defined limits and manage the lifecycle of multiple list resources dynamically. +- **Efficient Change Detection**: Implementing efficient, content-based change detection (like checking if the _set_ of domains has changed, not just the order) directly in HCL is not straightforward or performant. A script allows for calculating and embedding a content hash (like SHA-256) in the list description or metadata. The script can then fetch the existing lists, compare hashes, and only perform API calls (create/update) for lists whose underlying domain content has genuinely changed. This significantly reduces unnecessary API traffic and state updates. +- **Dynamic Policy Referencing**: The "Block Ads" Gateway policy needs to reference _all_ the individual `cloudflare_zero_trust_list` IDs created from the chunks. As chunk files are added, removed, or change resulting in new list IDs, the policy definition needs to be updated. A script can dynamically fetch the IDs of all currently managed lists and construct the policy's `traffic` expression accordingly, ensuring the policy always reflects the complete set of adblock lists. Doing this purely in HCL with a dynamic number of resources referencing each other can be complex. + +By using a Python script orchestrated alongside OpenTofu, we leverage OpenTofu for managing the stable, declarative infrastructure (backend state, providers, variables, the malware policy, and the DNS location which provides a stable ID) and the script for the dynamic, stateful, and data-intensive operations related to the adblock lists and their referencing policy via the Cloudflare API. This provides a more flexible, performant, and maintainable solution for this particular use case compared to a pure OpenTofu approach for the adblock lists themselves. + +## GitHub Action Automation + +The [GitHub Action](/.github/workflows/cf_adblock.yaml) workflow automates the process of updating the adblock lists and Cloudflare configuration on a regular schedule or via manual trigger. + +Here is a breakdown of the steps in the workflow: + +1. **Triggers**: The workflow is configured to run on a monthly schedule (`cron: "0 0 1 * *"`, meaning at 00:00 UTC on the 1st day of every month) and can also be manually triggered via the GitHub Actions UI (`workflow_dispatch`). +2. **Environment Variables**: Sets the `TF_VAR_gcs_env` environment variable to `prod`, used by the OpenTofu backend configuration. +3. **Permissions**: Grants necessary permissions for checking out the code (`contents: read`) and authenticating to Google Cloud using Workload Identity Federation (`id-token: write`). +4. **Checkout repository**: Uses the `actions/checkout` action to clone the repository code onto the runner. +5. **Load .env file to environment**: (Assumes a local `.env` file might be present for local devcontainer setup, although typically secrets are handled via Infisical in the action). This step sources environment variables from a `.env` file at the root of the repository, if it exists, and adds them to the GitHub Actions environment. +6. **Load secrets to environment**: This crucial step authenticates to Infisical using a client secret (`secrets.INFISICAL_CLIENT_SECRET`) and runs a setup script (`./.devcontainer/setup_infisical.sh`). This script is responsible for fetching secrets stored in Infisical (including `TF_VAR_cloudflare_secondary_account_id`, `TF_VAR_cloudflare_secondary_api_token`, `TF_VAR_bucket_name`, `TF_VAR_tofu_encryption_passphrase`, `GCP_WORKLOAD_IDENTITY_PROVIDER`, and `GCP_SERVICE_ACCOUNT_EMAIL`) and exporting them to a file. The workflow then reads this file, parses the `KEY=VALUE` lines, cleans up quotes, and adds these secrets as environment variables to the GitHub Actions runner for subsequent steps to use. The temporary file containing secrets is then removed. +7. **Authenticate to Google Cloud**: Uses the `google-github-actions/auth` action to authenticate the workflow to Google Cloud using Workload Identity Federation. It uses the GCP Workload Identity Provider ID and Service Account email fetched from Infisical secrets. This step sets up credentials that allow OpenTofu and other GCP tools to interact with GCP resources, specifically the GCS bucket for state. +8. **Run Adblock List Chunking Script**: Executes the `chunk_adblock_lists.sh` script with arguments (e.g., `1000 90`) from within the `./tofu/cf-adblock/` directory. This script downloads the domain lists, processes them, and generates the chunk files in `./processed_adblock_chunks/`. +9. **OpenTofu Init for cf-adblock**: Runs `tofu init` from the `./tofu/cf-adblock/` directory. This initializes the OpenTofu working directory, downloads necessary providers (Cloudflare, HTTP), and configures the GCS backend based on the `backend.tofu` file and the environment variables sourced from secrets. +10. **OpenTofu Apply for cf-adblock**: Runs `tofu apply -auto-approve` from the `./tofu/cf-adblock/` directory. This applies the OpenTofu configuration, creating or updating the static resources defined in the `.tofu` files (providers, backend state, variables, malware policy, DNS location). The `-auto-approve` flag bypasses interactive approval, suitable for automation. +11. **Install Python dependencies**: Installs the required Python libraries for the management script using `pip3 install cloudflare`. +12. **Run Cloudflare Adblock Management Script**: (Note: This step appears commented out (`#- name:`) in the provided workflow file, but it is the intended final step to complete the update process). This step executes the `manage_cloudflare_adblock.py` script with necessary arguments (e.g., `1000 90`) from the `./tofu/cf-adblock/` directory. The script uses the `CLOUDFLARE_ACCOUNT_ID` and `CLOUDFLARE_API_TOKEN` environment variables (which were sourced from secrets and OpenTofu output) to authenticate to Cloudflare and manage the adblock lists and policy. + +## Required Inputs (Variables & Secrets) + +To successfully run this setup, both OpenTofu and the Python script require certain configuration inputs. These should be managed securely, ideally via a secrets management system like Infisical, and surfaced as environment variables for the workflow and manual execution. + +- `TF_VAR_cloudflare_secondary_account_id`: Your Cloudflare Account ID where the Zero Trust configurations (lists, policies, locations) will be managed. This is used by both OpenTofu (for resources like the malware policy and DNS location) and the Python script (for list and policy management). +- `TF_VAR_cloudflare_secondary_api_token`: A Cloudflare API Token with the necessary permissions to manage Zero Trust Gateway lists, policies, and locations. This is a **sensitive secret** and must be kept secure. It is used by both OpenTofu and the Python script for authenticating with the Cloudflare API. +- `TF_VAR_bucket_name`: The globally unique name of the Google Cloud Storage bucket used for storing the OpenTofu state file. This is used by the OpenTofu backend configuration. +- `TF_VAR_tofu_encryption_passphrase`: A passphrase used to encrypt the OpenTofu state file stored in GCS. This is a **sensitive secret** and must be kept secure. Used by the OpenTofu `encryption` block. +- `GCP_WORKLOAD_IDENTITY_PROVIDER`: (Used by GitHub Action) The full name of the GCP Workload Identity Provider configured for GitHub Actions. This is required for the GitHub Action to authenticate to Google Cloud using Workload Identity Federation. +- `GCP_SERVICE_ACCOUNT_EMAIL`: (Used by GitHub Action) The email address of the GCP Service Account that the GitHub Action will impersonate using Workload Identity Federation. This service account must have permissions to read/write objects in the GCS state bucket. + +## Manual Setup & Execution (Local Environment) + +While the primary method for updating the lists and policy is the automated GitHub Action, you may need to run the process manually for testing, development, or initial setup in a local environment (like a devcontainer). + +Note: By default, every month, the lists and policy are updated automatically via the [GitHub Action](/.github/workflows/cf_adblock.yaml). + +To run manually: + +1. **Prerequisites**: + + - Ensure you have OpenTofu (or Terraform) installed. + - Ensure you have Python 3 and `pip` installed. + - Ensure you have `curl` and `grep` installed (usually available on Linux/macOS). + - Ensure you have authenticated to Google Cloud and have the necessary permissions to access the GCS state bucket. If using a devcontainer, follow instructions in the [devcontainer README](../../.devcontainer/README.md) on the steps to set up the devcontainer environment, including Infisical and GCP authentication. + - Ensure required environment variables (`TF_VAR_cloudflare_secondary_account_id`, `TF_VAR_cloudflare_secondary_api_token`, `TF_VAR_bucket_name`, `TF_VAR_tofu_encryption_passphrase`) are set in your local environment. If using Infisical, run the setup/export process to populate these variables in your shell session. + - Navigate to the OpenTofu directory: `cd starter12/tofu/cf-adblock`. + +2. **Prepare Domain Lists**: + + - Run the chunking script to download, process, and split the domain lists. Replace `` and `` with your desired limits (e.g., 1000 and 90). + + ```bash + bash ./chunk_adblock_lists.sh + ``` + + - Verify that chunk files (`adblock_chunk_*.txt`) have been created in the `./processed_adblock_chunks/` directory. + +3. **Initialize OpenTofu**: + + - Initialize the OpenTofu working directory. This sets up the backend configuration and downloads the required provider plugins based on your `providers.tofu` and `backend.tofu` files and environment variables. + + ```bash + tofu init + ``` + +4. **Apply Static OpenTofu Resources**: + + - Run `tofu apply` to create or update the OpenTofu-managed resources (GCS backend state setup, providers configuration, variables, the malware policy, and the DNS location). Review the plan shown by OpenTofu carefully before confirming the apply. + + ```bash + tofu apply + ``` + +5. **Install Python dependencies**: + + - Install the necessary Python library for interacting with the Cloudflare API. + + ```bash + pip install cloudflare + ``` + +6. **Run Dynamic List Management Script**: + - Execute the Python script to manage the adblock lists and policy in Cloudflare. Provide the same limits used for the chunking script. The script will use the environment variables (`CLOUDFLARE_ACCOUNT_ID`, `CLOUDFLARE_API_TOKEN`) for authentication and configuration. + ```bash + python3 manage_cloudflare_adblock.py + ``` + +This sequence of manual steps mirrors the automation in the GitHub Action and allows you to update the Cloudflare adblock configuration from your local environment. + +### Commands + +Here's a quick overview of the main steps (run from the `./tofu/cf-adblock/` directory): + +```bash +# Prepare Domain Lists: Download, process, and split domains into chunks +bash ./chunk_adblock_lists.sh + +# Initialize OpenTofu working directory and backend +tofu init + +# Apply Static OpenTofu Resources (backend state, providers, malware policy, DNS location) +# Review plan carefully before confirming! +tofu apply + +# Install Python dependencies for the management script +pip install cloudflare + +# Run Dynamic List Management Script to manage Cloudflare lists and adblock policy +python3 manage_cloudflare_adblock.py +``` + +## Acknowledgements + +This part of cloudflare ad-blocking was inspired by Marco Lancini's [blog post](https://blog.marcolancini.it/2022/blog-serverless-ad-blocking-with-cloudflare-gateway/) on serverless ad-blocking with Cloudflare Gateway. diff --git a/tofu/cf-adblock/README.md b/tofu/cf-adblock/README.md new file mode 100644 index 000000000..637c3bcfe --- /dev/null +++ b/tofu/cf-adblock/README.md @@ -0,0 +1,58 @@ +# Cloudflare Adblock & Malware DNS Filtering + +Automated management of Cloudflare Zero Trust Gateway DNS policies for ad and malware blocking. This setup uses a combination of OpenTofu for core infrastructure resources and a Python script for managing dynamic adblock domain lists and their associated policy via the Cloudflare API. + +## My Usage + +I generally tend to avoid hosting piHole / AdGuard, as when they go down, we lose access to the internet. Setting HA is not quite straight forward. Also it mostly only covers home network, not mobile network. + +Even if using piHole / AdGuard, you can use to set this DoH endpoint as upstream. So, I use this setup in the following way, after getting DoH endpoint / ipv6 address from cloudflare: + +1. On Browsers, android, ios, etc. i use the DoH endpoint to directly on top of using uBo and sponsorblock. +2. My router only supports ipv4 addresses as dns servers. So I use 1.1.1.2 / 1.0.0.2 as dns servers to block malware by default. If your router / devices supports DoH or DoT by default, always use it instead of ipv4 / ipv6. +3. If using cloudflare warp as your vpn / zerotrust setup, your devices are automatically protected by warp. I also use the ipv6 address as upstream for tailscale / netbird, so that I am also protected by default, when using these as my vpn / zerotrust. +4. I use a secondary cloudflare account, using a cheap [1.111B class domain](https://gen.xyz/1111b). + +## Overview + +Enhances network security and user experience by filtering unwanted content at the DNS level using Cloudflare Gateway. + +**Key Components & Functionality:** + +1. **`adblock_urls.txt`**: + - Lists URLs of external ad/malware domain sources. +2. **`chunk_adblock_lists.sh` (Shell Script)**: + - Downloads, processes, and splits domains from `adblock_urls.txt` into chunk files for Cloudflare lists, handling limits and changes. +3. **OpenTofu Configuration (`.tofu` files)**: + - Manages core infrastructure resources like the backend, providers, variables, the malware policy, and the DNS location. +4. **`manage_cloudflare_adblock.py` (Python Script)**: + - Manages Cloudflare Zero Trust lists and the associated adblock policy dynamically via the API based on chunk files, using hash-based change detection. + +For detailed descriptions of each component, see [DOCS.md](./DOCS.md). + +## GitHub Action Automation (`cf_adblock.yaml`) + +The update process is automated via a [GitHub Action](/.github/workflows/cf_adblock.yaml). It runs monthly on a schedule or can be triggered manually. The workflow prepares the domain lists using `chunk_adblock_lists.sh`, applies the OpenTofu configuration for static resources (like the DNS location and malware policy), and then runs the Python script (`manage_cloudflare_adblock.py`) to manage the dynamic adblock lists and the associated policy using the Cloudflare API and OpenTofu outputs (like the DNS Location ID). + +For a detailed breakdown of the workflow steps, see [DOCS.md](./DOCS.md#github-action-automation). + +## Required Inputs (Variables & Secrets) + +Configure these securely. The GitHub Action fetches them via Infisical secrets automatically (surfaced as `TF_VAR_...` or regular environment variables). They must also be present in devcontainer. + +- `TF_VAR_cloudflare_secondary_account_id`: Your Cloudflare Account ID for Zero Trust configurations (used by OpenTofu and the Python script). +- `TF_VAR_cloudflare_secondary_api_token`: Cloudflare API Token with necessary permissions for Zero Trust management (used by OpenTofu and the Python script). **Sensitive secret.** +- `TF_VAR_bucket_name`: GCS bucket name for OpenTofu remote state. +- `TF_VAR_tofu_encryption_passphrase`: Passphrase for OpenTofu state encryption. **Sensitive secret.** +- `GCP_WORKLOAD_IDENTITY_PROVIDER`: GCP Workload Identity Provider ID for GitHub Actions authentication. +- `GCP_SERVICE_ACCOUNT_EMAIL`: GCP Service Account email for GCS access. + +## Manual Setup & Execution (Local Environment) + +While the primary method for updating the lists and policy is the automated GitHub Action, you may need to run the process manually for testing, development, or initial setup in a local environment (like a devcontainer). + +For detailed prerequisites, step-by-step instructions, and the command reference, please refer to [DOCS.md](./DOCS.md#manual-setup--execution-local-environment). + +## Acknowledgements + +This part of cloudflare ad-blocking was inspired by Marco Lancini's [blog post](https://blog.marcolancini.it/2022/blog-serverless-ad-blocking-with-cloudflare-gateway/) on serverless ad-blocking with Cloudflare Gateway. diff --git a/tofu/cf-adblock/adblock_urls.txt b/tofu/cf-adblock/adblock_urls.txt new file mode 100644 index 000000000..757c6c3f9 --- /dev/null +++ b/tofu/cf-adblock/adblock_urls.txt @@ -0,0 +1,6 @@ +terraform { + backend "gcs" { + bucket = var.bucket_name + prefix = "cf-adblock/prod" + } +} diff --git a/tofu/cf-adblock/backend.tofu b/tofu/cf-adblock/backend.tofu new file mode 100644 index 000000000..b8da2677f --- /dev/null +++ b/tofu/cf-adblock/backend.tofu @@ -0,0 +1,5 @@ +terraform { + backend "gcs" { + bucket = var.bucket_name + } +} diff --git a/tofu/cf-adblock/chunk_adblock_lists.sh b/tofu/cf-adblock/chunk_adblock_lists.sh new file mode 100644 index 000000000..76317b25e --- /dev/null +++ b/tofu/cf-adblock/chunk_adblock_lists.sh @@ -0,0 +1,269 @@ +#!/bin/bash +set -euo pipefail + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " + echo " Reads URLs from ./adblock_urls.txt (must be in current directory)." + echo " Distributes domains into hash buckets with deterministic overflow spillover." + echo " Outputs bucket files to ./processed_adblock_chunks/" + echo "" + echo "Example: $0 1000 90 # Creates up to 90 buckets with max 1000 domains each" + echo " # With deterministic spillover handling" + exit 1 +fi + +MAX_DOMAINS_PER_BUCKET="$1" +NUM_BUCKETS="$2" +URL_SOURCE_FILE="./adblock_urls.txt" +OUTPUT_DIR="./processed_adblock_chunks" +MAX_TOTAL_DOMAINS=$((MAX_DOMAINS_PER_BUCKET * NUM_BUCKETS)) + +# Validation +if ! [[ "$MAX_DOMAINS_PER_BUCKET" =~ ^[0-9]+$ ]] || [ "$MAX_DOMAINS_PER_BUCKET" -lt 1 ]; then + echo -e "\e[31mError: MAX_DOMAINS_PER_BUCKET must be a positive integer.\e[0m" >&2 + exit 1 +fi + +if ! [[ "$NUM_BUCKETS" =~ ^[0-9]+$ ]] || [ "$NUM_BUCKETS" -lt 1 ]; then + echo -e "\e[31mError: NUM_BUCKETS must be a positive integer.\e[0m" >&2 + exit 1 +fi + +if [ "$NUM_BUCKETS" -gt 95 ]; then + echo "Warning: NUM_BUCKETS ($NUM_BUCKETS) exceeds recommended free tier limit of 95." >&2 +fi + +if [ "$MAX_DOMAINS_PER_BUCKET" -gt 1000 ]; then + echo "Warning: MAX_DOMAINS_PER_BUCKET ($MAX_DOMAINS_PER_BUCKET) exceeds Cloudflare free tier limit of 1000." >&2 +fi + +if [ ! -f "$URL_SOURCE_FILE" ]; then + echo -e "\e[31mError: URL source file not found at $URL_SOURCE_FILE.\e[0m" >&2 + exit 1 +fi + +echo "Configuration:" >&2 +echo " Max domains per bucket: $MAX_DOMAINS_PER_BUCKET" >&2 +echo " Number of buckets: $NUM_BUCKETS" >&2 +echo " Total capacity: $MAX_TOTAL_DOMAINS domains" >&2 +echo " Output directory: $OUTPUT_DIR" >&2 + +# Read URLs from source file +URLS=() +while IFS= read -r line || [[ -n "$line" ]]; do + # Remove comments and skip empty lines + processed_line=$(echo "$line" | sed -e 's/#.*//' | xargs) # Remove # and onwards, then trim + if [ -n "$processed_line" ]; then + URLS+=("$processed_line") + fi +done < "$URL_SOURCE_FILE" + +if [ ${#URLS[@]} -eq 0 ]; then + echo -e "\e[31mNo valid URLs found in $URL_SOURCE_FILE. Creating empty $OUTPUT_DIR and exiting.\e[0m" >&2 + mkdir -p "$OUTPUT_DIR" + exit 0 +fi + +mkdir -p "$OUTPUT_DIR" + +# Temporary files +TMP_MERGED_CONTENT=$(mktemp) +TMP_SORTED_UNIQUE_DOMAINS=$(mktemp) +TMP_HASH_MAPPING=$(mktemp) +TMP_BUCKET_ASSIGNMENTS=$(mktemp) +trap 'rm -f "$TMP_MERGED_CONTENT" "$TMP_SORTED_UNIQUE_DOMAINS" "$TMP_HASH_MAPPING" "${TMP_HASH_MAPPING}.sorted" "$TMP_BUCKET_ASSIGNMENTS"' EXIT SIGINT SIGTERM ERR + +# Download content from all URLs +echo "Downloading content from ${#URLS[@]} URLs specified in $URL_SOURCE_FILE..." >&2 +for URL in "${URLS[@]}"; do + echo " Downloading: $URL" >&2 + if curl -sSLf "$URL" >> "$TMP_MERGED_CONTENT"; then + echo >> "$TMP_MERGED_CONTENT" + else + echo -e "\e[31mWarning: Failed to download or got an error for URL: $URL. Skipping.\e[0m" >&2 + fi +done + +# Process downloaded content +echo "Processing downloaded content (filter, sort, unique)..." >&2 +grep -vE "^\s*#|^\s*$" "$TMP_MERGED_CONTENT" | sort -u > "$TMP_SORTED_UNIQUE_DOMAINS" + +TOTAL_DOMAINS_COUNT=$(wc -l < "$TMP_SORTED_UNIQUE_DOMAINS" | xargs) +if ! [[ "$TOTAL_DOMAINS_COUNT" =~ ^[0-9]+$ ]]; then + TOTAL_DOMAINS_COUNT=0 +fi + +echo "Total unique domains found: $TOTAL_DOMAINS_COUNT" >&2 + +if [ "$TOTAL_DOMAINS_COUNT" -gt "$MAX_TOTAL_DOMAINS" ]; then + echo -e "\e[31mError: Total unique domains ($TOTAL_DOMAINS_COUNT) exceeds capacity of $MAX_TOTAL_DOMAINS.\e[0m" >&2 + echo -e "\e[31mConsider increasing NUM_BUCKETS or MAX_DOMAINS_PER_BUCKET.\e[0m" >&2 + exit 1 +fi + +if [ "$TOTAL_DOMAINS_COUNT" -eq 0 ]; then + echo -e "\e[31mNo valid domains found after filtering. No bucket files will be created in $OUTPUT_DIR.\e[0m" >&2 + exit 0 +fi + +# Phase 1: Calculate primary hash bucket for each domain +echo "Calculating primary hash assignments..." >&2 + +while IFS= read -r domain; do + if [ -n "$domain" ]; then + # Calculate primary hash bucket using SHA-256 (first 8 hex chars for 32-bit range) + HASH_HEX=$(printf "%s" "$domain" | sha256sum | cut -c1-8) + HASH_DEC=$((0x$HASH_HEX)) + PRIMARY_BUCKET=$((HASH_DEC % NUM_BUCKETS)) + + # Record: primary_bucket domain + echo "$PRIMARY_BUCKET $domain" >> "$TMP_HASH_MAPPING" + fi +done < "$TMP_SORTED_UNIQUE_DOMAINS" + +# Phase 2: Group by primary bucket and handle overflow with deterministic spillover +echo "Processing overflow with deterministic spillover..." >&2 + +declare -a BUCKET_COUNTS +for ((i=0; i "${TMP_HASH_MAPPING}.sorted" + +# Process each bucket group +current_bucket=-1 +bucket_domains=() + +process_bucket_group() { + local bucket_id=$1 + local domains=("${@:2}") + + if [ ${#domains[@]} -eq 0 ]; then + return + fi + + bucket_num=$(printf "%03d" $bucket_id) + echo " Bucket $bucket_num: ${#domains[@]} domains" >&2 + + # First MAX_DOMAINS_PER_BUCKET domains go to primary bucket + local assigned_to_primary=0 + local overflow_count=0 + + for domain in "${domains[@]}"; do + if [ $assigned_to_primary -lt $MAX_DOMAINS_PER_BUCKET ]; then + echo "$bucket_id $domain" >> "$TMP_BUCKET_ASSIGNMENTS" + BUCKET_COUNTS[bucket_id]=$((BUCKET_COUNTS[bucket_id] + 1)) + assigned_to_primary=$((assigned_to_primary + 1)) + else + # Handle overflow with deterministic spillover + overflow_count=$((overflow_count + 1)) + + # Find next available bucket using deterministic search + local spillover_bucket=-1 + for ((search_offset=1; search_offset&2 + echo -e "\e[31mDomain: $domain\e[0m" >&2 + echo -e "\e[31mAll buckets are at capacity.\e[0m" >&2 + exit 1 + fi + + echo "$spillover_bucket $domain" >> "$TMP_BUCKET_ASSIGNMENTS" + BUCKET_COUNTS[spillover_bucket]=$((BUCKET_COUNTS[spillover_bucket] + 1)) + + if [ $overflow_count -eq 1 ]; then + spillover_num=$(printf "%03d" $spillover_bucket) + echo " → Overflow: spillover to bucket $spillover_num" >&2 + fi + fi + done + + if [ $overflow_count -gt 0 ]; then + echo " → Total spillover: $overflow_count domains" >&2 + fi +} + +# Process domains grouped by primary bucket +while IFS=' ' read -r bucket_id domain; do + if [ "$bucket_id" != "$current_bucket" ]; then + # Process previous bucket group if exists + if [ $current_bucket -ne -1 ]; then + process_bucket_group $current_bucket "${bucket_domains[@]}" + fi + + # Start new bucket group + current_bucket=$bucket_id + bucket_domains=("$domain") + else + # Add to current bucket group + bucket_domains+=("$domain") + fi +done < "${TMP_HASH_MAPPING}.sorted" + +# Process the last bucket group +if [ $current_bucket -ne -1 ]; then + process_bucket_group $current_bucket "${bucket_domains[@]}" +fi + +# Phase 3: Write domains to bucket files +echo "Writing domains to bucket files..." >&2 + +# Clear output directory of old bucket files +rm -f "$OUTPUT_DIR"/adblock_chunk_*.txt + +# Sort final assignments by bucket ID, then write to files +sort -k1,1n -k2,2 "$TMP_BUCKET_ASSIGNMENTS" | while IFS=' ' read -r bucket_id domain; do + BUCKET_FILE="$OUTPUT_DIR/adblock_chunk_$(printf "%03d" $bucket_id).txt" + echo "$domain" >> "$BUCKET_FILE" +done + +# Show final statistics +echo "" >&2 +echo "Final bucket distribution:" >&2 +USED_BUCKETS=0 +OVERFLOW_BUCKETS=0 +for ((i=0; i&2 + else + echo " Bucket $bucket_num: $domain_count domains" >&2 + fi + + # Count how many buckets received spillover + PRIMARY_COUNT=$(sort -k1,1n "$TMP_HASH_MAPPING" | awk -v bucket=$i '$1 == bucket' | wc -l) + if [ ${BUCKET_COUNTS[i]} -gt $PRIMARY_COUNT ]; then + OVERFLOW_BUCKETS=$((OVERFLOW_BUCKETS + 1)) + fi + fi +done + +echo " Used buckets: $USED_BUCKETS/$NUM_BUCKETS" >&2 +echo " Buckets with spillover: $OVERFLOW_BUCKETS" >&2 + +# List created files +CREATED_FILES=($(ls "$OUTPUT_DIR"/adblock_chunk_*.txt 2>/dev/null || true)) +echo "" >&2 +echo "Created ${#CREATED_FILES[@]} bucket files in $OUTPUT_DIR" >&2 + +echo "" >&2 +echo -e "\e[32mDeterministic Spillover Guarantees:\e[0m" >&2 +echo " ✓ Consistent assignment: same domains always spill to same buckets" >&2 +echo " ✓ Stable overflow handling: domains sorted alphabetically within each hash group" >&2 +echo " ✓ Predictable spillover: overflow goes to next available bucket (circular search)" >&2 +echo " ✓ No cascading updates: spillover domains have fixed assignments" >&2 +echo "" >&2 +echo -e "\e[32mScript completed successfully. Deterministic hash buckets with spillover in $OUTPUT_DIR\e[0m" >&2 +exit 0 diff --git a/tofu/cf-adblock/cloudflare_zero_trust_dns_location.tofu b/tofu/cf-adblock/cloudflare_zero_trust_dns_location.tofu new file mode 100644 index 000000000..1609305a4 --- /dev/null +++ b/tofu/cf-adblock/cloudflare_zero_trust_dns_location.tofu @@ -0,0 +1,42 @@ +resource "cloudflare_zero_trust_dns_location" "homelab" { + account_id = var.cloudflare_secondary_account_id + name = "HomeLab" # This will be the name in the Cloudflare dashboard + client_default = true # Set to true if this should be the default location for WARP clients + ecs_support = false + + endpoints = { + doh = { + enabled = true # Enables DNS over HTTPS + } + dot = { + enabled = false # DNS over TLS, can be enabled if needed + } + ipv4 = { + enabled = false # Enables a dedicated IPv4 DNS resolver for this location + } + ipv6 = { + enabled = true # Enables a dedicated IPv6 DNS resolver, can be enabled if needed + } + } +} + +output "dns_location_homelab" { + description = "DNS location - HomeLab (Cloudflare-assigned IPs)" + value = { + # These attributes will be populated with the unique IPs/hostnames assigned by Cloudflare + # after a successful 'tofu apply'. + id = cloudflare_zero_trust_dns_location.homelab.id # Added ID output for external scripts + doh = "https://${cloudflare_zero_trust_dns_location.homelab.doh_subdomain}.cloudflare-gateway.com/dns-query" + ipv4_destination = cloudflare_zero_trust_dns_location.homelab.ipv4_destination + ipv4_destination_backup = cloudflare_zero_trust_dns_location.homelab.ipv4_destination_backup # May not be populated if only one IPv4 is assigned + # 'ip' might be populated with an IPv6 if ipv6 endpoint is enabled and assigned. + # For IPv4, refer to ipv4_destination. + ip = cloudflare_zero_trust_dns_location.homelab.ip + # dns_destination_ipv6_block_id is not relevant when Cloudflare assigns IPs. + } +} + +output "dns_location_homelab_id" { + description = "The ID of the HomeLab DNS location for use in other configurations or scripts." + value = cloudflare_zero_trust_dns_location.homelab.id +} diff --git a/tofu/cf-adblock/cloudflare_zero_trust_gateway_policy.tofu b/tofu/cf-adblock/cloudflare_zero_trust_gateway_policy.tofu new file mode 100644 index 000000000..ab6cb8746 --- /dev/null +++ b/tofu/cf-adblock/cloudflare_zero_trust_gateway_policy.tofu @@ -0,0 +1,18 @@ +resource "cloudflare_zero_trust_gateway_policy" "block_malware" { + account_id = var.cloudflare_secondary_account_id + + name = "Block malware" + description = "Block known threats based on Cloudflare's threat intelligence" + + enabled = true + precedence = 10 + + # Block all security risks + filters = ["dns"] + traffic = "any(dns.security_category[*] in {178 80 83 176 175 117 131 134 151 153 68})" + action = "block" + + rule_settings = { + block_page_enabled = true + } +} diff --git a/tofu/cf-adblock/manage_cloudflare_adblock.py b/tofu/cf-adblock/manage_cloudflare_adblock.py new file mode 100644 index 000000000..6ae67fe56 --- /dev/null +++ b/tofu/cf-adblock/manage_cloudflare_adblock.py @@ -0,0 +1,518 @@ +#!/usr/bin/env python3 + +import os +import sys +import glob +import time +import hashlib +from cloudflare import Cloudflare +from cloudflare._exceptions import APIError, APIConnectionError, RateLimitError +import logging +import argparse +from typing import List, Dict, Optional, Any, Tuple, Literal + +# --- Configuration --- +# These would ideally be fetched from environment variables or a config file +# For GitHub Actions, CLOUDFLARE_ACCOUNT_ID and CLOUDFLARE_API_TOKEN will be set as env vars +CLOUDFLARE_ACCOUNT_ID = os.environ.get("TF_VAR_cloudflare_secondary_account_id") +CLOUDFLARE_API_TOKEN = os.environ.get("TF_VAR_cloudflare_secondary_api_token") +# The DNS Location ID will be passed as an argument or fetched from Terraform output +# For now, let's assume it's passed as an environment variable by the GitHub Action +DNS_LOCATION_ID = os.environ.get("DNS_LOCATION_ID") + +PROCESSED_CHUNKS_DIR = "./processed_adblock_chunks" # Relative to script execution +CHUNK_FILE_PATTERN = "adblock_chunk_*.txt" +LIST_NAME_PREFIX = "ad-block-list-" # To identify lists managed by this script +POLICY_NAME = "Block Ads - Managed by Script" +POLICY_DESCRIPTION = "Blocks ad domains using lists generated from external sources. Managed by Python script." + +# ANSI color codes for terminal output +class Colors: + RED = "\033[0;31m" # Errors + GREEN = "\033[0;32m" # Success + YELLOW = "\033[0;33m" # Warnings + RESET = "\033[0m" # Others + +# --- Logging Setup --- +# Formatter to add colors to log levels +class ColoredFormatter(logging.Formatter): + def __init__(self, fmt: Optional[str] = None, datefmt: Optional[str] = None, style: Literal['%', '{', '$'] = '%', validate: bool = True): + super().__init__(fmt, datefmt, style, validate=validate) + self.level_colors = { + logging.ERROR: Colors.RED, + logging.WARNING: Colors.YELLOW, + # INFO and DEBUG will use default terminal color (via RESET) + } + + def format(self, record): + log_message = super().format(record) + log_level_color = self.level_colors.get(record.levelno, Colors.RESET) + + # For INFO and DEBUG, log_level_color will be Colors.RESET, + # effectively not prepending a specific color for these levels, + # but ensuring a reset if the previous line was colored. + # If it's ERROR or WARNING, it will use the specified color. + # We always append RESET to ensure color doesn't bleed. + if record.levelno in (logging.ERROR, logging.WARNING): + return f"{log_level_color}{log_message}{Colors.RESET}" + return f"{log_message}{Colors.RESET}" + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +# Apply the colored formatter +if logger.hasHandlers(): + for handler in logger.handlers: + original_formatter = handler.formatter + fmt = original_formatter._fmt if original_formatter else '%(asctime)s - %(levelname)s - %(message)s' + datefmt = original_formatter.datefmt if original_formatter else None + colored_formatter = ColoredFormatter(fmt=fmt, datefmt=datefmt) + handler.setFormatter(colored_formatter) +elif logging.root.hasHandlers(): + for handler in logging.root.handlers: + original_formatter = handler.formatter + fmt = original_formatter._fmt if original_formatter else '%(asctime)s - %(levelname)s - %(message)s' + datefmt = original_formatter.datefmt if original_formatter else None + colored_formatter = ColoredFormatter(fmt=fmt, datefmt=datefmt) + handler.setFormatter(colored_formatter) + + +# --- Hash-based Change Detection --- +def calculate_list_hash(items: List[str]) -> str: + """Calculate SHA-256 hash of sorted list items for change detection.""" + # Sort items to ensure consistent hash regardless of order + sorted_items = sorted(set(items)) # Also removes duplicates + content = '\n'.join(sorted_items).encode('utf-8') + return hashlib.sha256(content).hexdigest()[:16] # Use first 16 chars for brevity + +def get_list_description_with_hash(base_description: str, items_hash: str) -> str: + """Embed hash in description for change detection.""" + return f"{base_description} [Hash: {items_hash}]" + +def extract_hash_from_description(description: str) -> Optional[str]: + """Extract hash from list description if present.""" + if "[Hash: " in description and "]" in description: + start = description.find("[Hash: ") + 7 + end = description.find("]", start) + if start > 6 and end > start: + return description[start:end] + return None + +def has_list_changed(current_items: List[str], existing_description: str) -> bool: + """Check if list has changed by comparing hashes.""" + current_hash = calculate_list_hash(current_items) + existing_hash = extract_hash_from_description(existing_description) + + if existing_hash is None: + logger.info("No hash found in existing description, assuming change needed.") + return True + + changed = current_hash != existing_hash + logger.debug(f"Hash comparison - Current: {current_hash}, Existing: {existing_hash}, Changed: {changed}") + return changed + +# --- Cloudflare API Client Wrapper --- +class CloudflareManager: + def __init__(self, account_id: str, api_token: str): + if not account_id or not api_token: + logger.error("Cloudflare Account ID and API Token must be provided.") + raise ValueError("Cloudflare Account ID and API Token must be provided.") + self.account_id = account_id + self.cf = Cloudflare(api_token=api_token) + logger.info("Cloudflare client initialized.") + + def _api_call_with_retry(self, method_func, *args, **kwargs) -> Any: + # Cloudflare specific retry logic + delays = [2, 5, 30, 30, 60] + # 'retries' now represents the number of times we will retry after the initial attempt. + # This means there will be (retries + 1) total attempts. + retries = len(delays) + for i in range(retries + 1): + try: + return method_func(*args, **kwargs) + except (RateLimitError, APIConnectionError) as e: + # Handle both rate limits and connection errors with the same retry logic + error_type = "Rate limit" if isinstance(e, RateLimitError) else "Connection error" + logger.warning(f"{error_type}: {e}") + if i < retries: + delay = delays[i] + logger.warning(f"Retrying in {delay}s... (Attempt {i+1}/{retries})") + time.sleep(delay) + else: + logger.error(f"Failed after {retries} attempts due to {error_type.lower()}") + raise + except APIError as e: + # Handle various API errors + error_msg = str(e).lower() + if "not found" in error_msg or "404" in error_msg: + logger.warning(f"Resource not found: {e}. This might be expected for GET operations.") + return None + elif "already exists" in error_msg or "409" in error_msg: + logger.warning(f"Resource already exists: {e}") + raise # Re-raise to handle in context + else: + logger.error(f"Cloudflare API Error: {e}") + raise # Re-raise other API errors + except Exception as e: + logger.error(f"An unexpected error occurred during API call: {e}") + if i < retries: + delay = delays[i] + logger.warning(f"Retrying in {delay}s... (Attempt {i+1}/{retries})") + time.sleep(delay) + else: + raise + raise Exception(f"API call failed after {retries} retries.") + + def get_all_zt_lists_by_prefix(self, prefix: str) -> Dict[str, Dict[str, Any]]: + logger.info(f"Fetching all Zero Trust lists with prefix {prefix}...") + + def _get_lists(): + return self.cf.zero_trust.gateway.lists.list(account_id=self.account_id) + + lists_response = self._api_call_with_retry(_get_lists) + + script_lists = {} + if lists_response: + for lst in lists_response: + if lst.name.startswith(prefix): + script_lists[lst.name] = { + 'id': lst.id, + 'count': getattr(lst, 'count', 0), + 'description': getattr(lst, 'description', '') + } + logger.info(f"Found {len(script_lists)} existing lists managed by this script with prefix {prefix}.") + return script_lists + + def get_zt_policy_by_name(self, name: str) -> Optional[Dict[str, Any]]: + logger.info(f"Fetching Zero Trust Gateway policy by name: {name}...") + + def _get_policies(): + return self.cf.zero_trust.gateway.rules.list(account_id=self.account_id) + + policies_response = self._api_call_with_retry(_get_policies) + + if policies_response: + for policy in policies_response: + if policy.name == name: + logger.info(f"Found existing policy {name} with ID {policy.id}.") + return { + 'id': policy.id, + 'name': policy.name, + 'description': getattr(policy, 'description', ''), + 'action': getattr(policy, 'action', ''), + 'enabled': getattr(policy, 'enabled', True), + 'filters': getattr(policy, 'filters', []), + 'traffic': getattr(policy, 'traffic', ''), + 'precedence': getattr(policy, 'precedence', 0) + } + logger.info(f"Policy {name} not found.") + return None + + def create_zt_list(self, name: str, items: List[str], description: str) -> Optional[str]: + logger.info(f"Creating Zero Trust list: {name} with {len(items)} items.") + + def _create_list(): + return self.cf.zero_trust.gateway.lists.create( + account_id=self.account_id, + name=name, + type="DOMAIN", + description=description, + items=[{"value": item} for item in items] + ) + + response = self._api_call_with_retry(_create_list) + list_id = response.id if response else None + if list_id: + logger.info(f"Successfully created list {name} with ID {list_id}.") + return list_id + + def update_zt_list(self, list_id: str, name: str, items: List[str], description: str) -> bool: + logger.info(f"Updating Zero Trust list ID {list_id} ({name}) with {len(items)} items.") + + try: + def _update_list(): + return self.cf.zero_trust.gateway.lists.update( + list_id=list_id, + account_id=self.account_id, + name=name, + description=description, + items=[{"value": item} for item in items] + ) + + self._api_call_with_retry(_update_list) + logger.info(f"Successfully updated list ID '{list_id}' ('{name}').") + return True + except Exception as e: + logger.error(f"Failed to update list ID '{list_id}' ('{name}'): {e}") + return False + + def delete_zt_list(self, list_id: str, list_name: str) -> bool: + logger.info(f"Deleting Zero Trust list ID '{list_id}' ('{list_name}').") + + def _delete_list(): + return self.cf.zero_trust.gateway.lists.delete( + list_id=list_id, + account_id=self.account_id + ) + + self._api_call_with_retry(_delete_list) + logger.info(f"Successfully deleted list ID '{list_id}' ('{list_name}').") + return True + + def create_or_update_zt_gateway_dns_policy(self, + policy_name: str, + description: str, + list_ids_for_policy: List[str], + dns_location_id: Optional[str], + existing_policy_details: Optional[Dict[str, Any]]) -> Optional[str]: + + # Constructing the DNS Policy Payload + if list_ids_for_policy: + # Format for traffic rule: "any(dns.domains[*] in $uuid1) or any(dns.domains[*] in $uuid2)" + # The list ID in the rule needs to be the UUID without dashes. + formatted_list_refs = [f"any(dns.domains[*] in ${list_id.replace('-', '')})" for list_id in list_ids_for_policy] + traffic_expression = " or ".join(formatted_list_refs) + else: + traffic_expression = "1==0" # No lists, policy effectively blocks nothing + + policy_data = { + 'name': policy_name, + 'description': description, + 'action': 'block', + 'enabled': True, + 'filters': ['dns'], + 'traffic': traffic_expression, + 'precedence': 11 + } + + # Add location_ids if provided + if dns_location_id: + policy_data['location_ids'] = [dns_location_id] + + if existing_policy_details: + policy_id = existing_policy_details['id'] + logger.info(f"Updating existing policy {policy_name} (ID: {policy_id}) to reference {len(list_ids_for_policy)} list(s).") + + def _update_policy(): + return self.cf.zero_trust.gateway.rules.update( + rule_id=policy_id, + account_id=self.account_id, + **policy_data + ) + + response = self._api_call_with_retry(_update_policy) + logger.info(f"Successfully updated policy {policy_name}.") + return policy_id + else: + logger.info(f"Creating new policy {policy_name} to reference {len(list_ids_for_policy)} list(s).") + + def _create_policy(): + return self.cf.zero_trust.gateway.rules.create( + account_id=self.account_id, + **policy_data + ) + + response = self._api_call_with_retry(_create_policy) + new_policy_id = response.id if response else None + if new_policy_id: + logger.info(f"Successfully created policy {policy_name} with ID {new_policy_id}.") + return new_policy_id + +# --- Main Logic --- +def _check_env_vars() -> Tuple[str, str]: + """Checks for required environment variables and returns them if found, otherwise exits.""" + account_id = CLOUDFLARE_ACCOUNT_ID + api_token = CLOUDFLARE_API_TOKEN + + if not account_id or not api_token: + logger.critical("CLOUDFLARE_ACCOUNT_ID and CLOUDFLARE_API_TOKEN environment variables are required.") + sys.exit(1) + + if not DNS_LOCATION_ID: + logger.warning("DNS_LOCATION_ID not provided. The adblock policy will not be explicitly tied to a specific location in its definition.") + + return account_id, api_token + +def _load_desired_state_from_chunks( + chunk_files_path_pattern: str, list_name_prefix: str, max_list_items: int, max_total_lists: int +) -> Dict[str, List[str]]: + """Loads domain lists from chunk files with free tier limits.""" + desired_lists_from_files: Dict[str, List[str]] = {} + chunk_files = sorted(glob.glob(os.path.join(PROCESSED_CHUNKS_DIR, chunk_files_path_pattern))) + logger.info(f"Found {len(chunk_files)} chunk files in {PROCESSED_CHUNKS_DIR} matching pattern {chunk_files_path_pattern}.") + + if len(chunk_files) > max_total_lists: + logger.warning(f"Found {len(chunk_files)} chunk files, but free tier limit is {max_total_lists} lists. Using first {max_total_lists} files.") + chunk_files = chunk_files[:max_total_lists] + + for chunk_file_path in chunk_files: + base_filename = os.path.basename(chunk_file_path) + list_name_suffix = base_filename.replace(".txt", "").replace("_", "-") + list_name = list_name_prefix + list_name_suffix + + try: + with open(chunk_file_path, 'r') as f: + domains = [line.strip() for line in f if line.strip()] + if len(domains) > max_list_items: + logger.warning( + f"List {list_name} from file {chunk_file_path} has {len(domains)} items, " + f"exceeding max of {max_list_items}. Truncating." + ) + desired_lists_from_files[list_name] = domains[:max_list_items] + elif domains: + desired_lists_from_files[list_name] = domains + else: + logger.info(f"Chunk file {chunk_file_path} is empty. Skipping list creation for {list_name}.") + except Exception as e: + logger.error(f"Error reading or processing chunk file {chunk_file_path}: {e}") + + logger.info(f"Desired state: {len(desired_lists_from_files)} non-empty lists to be managed from files.") + return desired_lists_from_files + +def _reconcile_lists_with_hash_detection( + desired_lists: Dict[str, List[str]], existing_cf_lists: Dict[str, Dict[str, Any]] +) -> Tuple[Dict[str, List[str]], Dict[str, Dict[str, Any]], Dict[str, str]]: + """Determines list operations using hash-based change detection.""" + list_ops_create: Dict[str, List[str]] = {} + list_ops_update: Dict[str, Dict[str, Any]] = {} # name -> {'id': id, 'items': items} + list_ops_delete: Dict[str, str] = {} # name -> id + + for desired_name, desired_items in desired_lists.items(): + if desired_name in existing_cf_lists: + existing_data = existing_cf_lists[desired_name] + existing_description = existing_data.get('description', '') + + # Use hash-based change detection + if has_list_changed(desired_items, existing_description): + logger.info(f"List {desired_name} has changed (hash mismatch), scheduling for update.") + list_ops_update[desired_name] = { + 'id': existing_data['id'], + 'items': desired_items + } + else: + logger.info(f"List {desired_name} unchanged (hash match), skipping update.") + else: + logger.info(f"List {desired_name} doesn't exist, scheduling for creation.") + list_ops_create[desired_name] = desired_items + + for existing_name, existing_data in existing_cf_lists.items(): + if existing_name not in desired_lists: + logger.info(f"List {existing_name} no longer needed, scheduling for deletion.") + list_ops_delete[existing_name] = existing_data['id'] + + logger.info(f"List operations planned: Create: {len(list_ops_create)}, Update: {len(list_ops_update)}, Delete: {len(list_ops_delete)}") + return list_ops_create, list_ops_update, list_ops_delete + +def _execute_list_operations_and_get_ids( + cf_manager: CloudflareManager, + list_ops_create: Dict[str, List[str]], + list_ops_update: Dict[str, Dict[str, Any]], + existing_cf_lists: Dict[str, Dict[str, Any]] +) -> List[str]: + """Executes create and update operations for lists and returns ALL managed list IDs.""" + managed_list_ids_for_policy: List[str] = [] + + # Create new lists + for name, items in list_ops_create.items(): + items_hash = calculate_list_hash(items) + description = get_list_description_with_hash(f"Adblock list from {name}. Managed by script.", items_hash) + new_list_id = cf_manager.create_zt_list(name, items, description) + if new_list_id: + logger.info(f"Successfully created list {name} with ID {new_list_id}.") + managed_list_ids_for_policy.append(new_list_id) + else: + logger.error(f"Failed to create list {name}. It will not be included in the policy.") + + # Update existing lists + for name, data in list_ops_update.items(): + items_hash = calculate_list_hash(data['items']) + description = get_list_description_with_hash(f"Adblock list from {name}. Managed by script.", items_hash) + if cf_manager.update_zt_list(data['id'], name, data['items'], description): + logger.info(f"Successfully updated list {name} (ID: {data['id']}).") + managed_list_ids_for_policy.append(data['id']) + else: + logger.error(f"Failed to update list {name} (ID: {data['id']}).") + + # Add IDs of existing lists that didn't need updates + for name, data in existing_cf_lists.items(): + if name not in list_ops_create and name not in list_ops_update: + managed_list_ids_for_policy.append(data['id']) + logger.debug(f"Including unchanged list {name} (ID: {data['id']}) in policy.") + + return sorted(list(set(managed_list_ids_for_policy))) + +def _delete_orphaned_lists(cf_manager: CloudflareManager, list_ops_delete: Dict[str, str]): + """Deletes lists that are no longer desired.""" + if not list_ops_delete: + logger.info("No lists to delete.") + return + + logger.info(f"Deleting {len(list_ops_delete)} orphaned lists...") + for name, list_id_to_delete in list_ops_delete.items(): + cf_manager.delete_zt_list(list_id_to_delete, name) + +def main(max_list_items_arg: int, max_total_lists_arg: int): + logger.info("Starting Cloudflare Adblock Management Script...") + account_id, api_token = _check_env_vars() + + cf_manager = CloudflareManager(account_id, api_token) + + # 1. Get desired state from chunk files + desired_lists_from_files = _load_desired_state_from_chunks( + CHUNK_FILE_PATTERN, LIST_NAME_PREFIX, max_list_items_arg, max_total_lists_arg + ) + + if len(desired_lists_from_files) == 0: + logger.warning("No valid lists found from chunk files. Exiting.") + return + + # 2. Get current state from Cloudflare + logger.info("Fetching current state from Cloudflare...") + existing_cf_lists = cf_manager.get_all_zt_lists_by_prefix(LIST_NAME_PREFIX) + existing_adblock_policy_details = cf_manager.get_zt_policy_by_name(POLICY_NAME) + + # 3. Reconcile Lists with Hash-based Change Detection + list_ops_create, list_ops_update, list_ops_delete = _reconcile_lists_with_hash_detection( + desired_lists_from_files, existing_cf_lists + ) + + # Skip processing if no changes needed + if not list_ops_create and not list_ops_update and not list_ops_delete: + logger.info("No list changes detected. Checking if policy update is needed...") + all_current_list_ids = [data['id'] for data in existing_cf_lists.values()] + else: + # 4. Execute list operations (Create/Update) and get IDs for policy + all_current_list_ids = _execute_list_operations_and_get_ids( + cf_manager, list_ops_create, list_ops_update, existing_cf_lists + ) + + # 5. Update/Create Gateway DNS Policy + logger.info(f"Updating policy {POLICY_NAME} to reference {len(all_current_list_ids)} lists.") + cf_manager.create_or_update_zt_gateway_dns_policy( + POLICY_NAME, + POLICY_DESCRIPTION, + all_current_list_ids, + DNS_LOCATION_ID, + existing_adblock_policy_details + ) + + # 6. Delete orphaned lists + _delete_orphaned_lists(cf_manager, list_ops_delete) + + logger.info(f"{Colors.GREEN}Cloudflare Adblock Management Script finished.{Colors.RESET}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Manage Cloudflare Zero Trust adblock lists and policy.") + parser.add_argument( + "max_list_items", + type=int, + help="Maximum number of items (domains) allowed per Cloudflare list." + ) + parser.add_argument( + "max_total_lists", + type=int, + help="Maximum number of lists to create/manage, typically constrained by Cloudflare free tier limits (e.g., 95)." + ) + args = parser.parse_args() + + main(args.max_list_items, args.max_total_lists) diff --git a/tofu/cf-adblock/providers.tofu b/tofu/cf-adblock/providers.tofu new file mode 100644 index 000000000..4a4de7c6a --- /dev/null +++ b/tofu/cf-adblock/providers.tofu @@ -0,0 +1,32 @@ +terraform { + required_providers { + http = { + source = "hashicorp/http" + version = ">=3.5.0" + } + cloudflare = { + source = "cloudflare/cloudflare" + version = ">= 5.3.0" + } + } + encryption { + key_provider "pbkdf2" "my_passphrase" { + passphrase = var.tofu_encryption_passphrase + } + method "aes_gcm" "my_method" { + keys = key_provider.pbkdf2.my_passphrase + } + state { + method = method.aes_gcm.my_method + enforced = true + } + plan { + method = method.aes_gcm.my_method + enforced = true + } + } +} + +provider "cloudflare" { + api_token = var.cloudflare_secondary_api_token +} diff --git a/tofu/cf-adblock/variables.tofu b/tofu/cf-adblock/variables.tofu new file mode 100644 index 000000000..2cf1df972 --- /dev/null +++ b/tofu/cf-adblock/variables.tofu @@ -0,0 +1,21 @@ +variable "tofu_encryption_passphrase" { + description = "State encryption passphrase" + type = string + sensitive = true + default = null +} + +variable "cloudflare_secondary_account_id" { + type = string +} + +variable "cloudflare_secondary_api_token" { + type = string + sensitive = true +} + +# Define a variable for the GCS bucket name +variable "bucket_name" { + description = "The globally unique name for the GCS bucket." + type = string +} From 574668154abd7e37d8e63f4099ae628335805431 Mon Sep 17 00:00:00 2001 From: Karteek <120569182+karteekiitg@users.noreply.github.com> Date: Sat, 12 Jul 2025 02:48:38 +0530 Subject: [PATCH 2/4] feat: add a clean way to generate cloudflare scoped account tokens for each separate project/module feat: add email alias to utilise cloudflare email routing and combat email spam fix: move adblock location to cloudflare dir fix: few other minor things Signed-off-by: Karteek <120569182+karteekiitg@users.noreply.github.com> --- .github/workflows/cf_adblock.yaml | 17 ++-- tofu/README.md | 45 ++++++++++ tofu/cf-adblock/adblock_urls.txt | 6 -- tofu/cf-adblock/backend.tofu | 5 -- tofu/cf-adblock/variables.tofu | 21 ----- tofu/cloudflare/account-tokens/DOCS.md | 49 ++++++++++ tofu/cloudflare/account-tokens/README.md | 14 +++ tofu/cloudflare/account-tokens/backend.tofu | 18 ++++ tofu/cloudflare/account-tokens/cf.tofu | 90 +++++++++++++++++++ tofu/cloudflare/account-tokens/infisical.tofu | 29 ++++++ .../account-tokens/infisical_variables.tofu | 30 +++++++ tofu/cloudflare/account-tokens/providers.tofu | 33 +++++++ tofu/cloudflare/account-tokens/variables.tofu | 56 ++++++++++++ .../adblock}/DOCS.md | 37 +++----- .../adblock}/README.md | 8 +- tofu/cloudflare/adblock/adblock_urls.txt | 7 ++ tofu/cloudflare/adblock/backend.tofu | 18 ++++ .../adblock}/chunk_adblock_lists.sh | 0 .../cloudflare_zero_trust_dns_location.tofu | 2 +- .../cloudflare_zero_trust_gateway_policy.tofu | 2 +- .../adblock}/manage_cloudflare_adblock.py | 4 +- .../adblock}/providers.tofu | 6 +- tofu/cloudflare/adblock/variables.tofu | 48 ++++++++++ tofu/cloudflare/email-alias/DOCS.md | 67 ++++++++++++++ tofu/cloudflare/email-alias/README.md | 19 ++++ tofu/cloudflare/email-alias/backend.tofu | 18 ++++ tofu/cloudflare/email-alias/email.tofu | 40 +++++++++ tofu/cloudflare/email-alias/email_vars.tofu | 50 +++++++++++ .../email-alias/latest_release.tofu | 14 +++ tofu/cloudflare/email-alias/providers.tofu | 33 +++++++ tofu/cloudflare/email-alias/variables.tofu | 56 ++++++++++++ tofu/cloudflare/email-alias/workers.tofu | 32 +++++++ 32 files changed, 796 insertions(+), 78 deletions(-) create mode 100644 tofu/README.md delete mode 100644 tofu/cf-adblock/adblock_urls.txt delete mode 100644 tofu/cf-adblock/backend.tofu delete mode 100644 tofu/cf-adblock/variables.tofu create mode 100644 tofu/cloudflare/account-tokens/DOCS.md create mode 100644 tofu/cloudflare/account-tokens/README.md create mode 100644 tofu/cloudflare/account-tokens/backend.tofu create mode 100644 tofu/cloudflare/account-tokens/cf.tofu create mode 100644 tofu/cloudflare/account-tokens/infisical.tofu create mode 100644 tofu/cloudflare/account-tokens/infisical_variables.tofu create mode 100644 tofu/cloudflare/account-tokens/providers.tofu create mode 100644 tofu/cloudflare/account-tokens/variables.tofu rename tofu/{cf-adblock => cloudflare/adblock}/DOCS.md (82%) rename tofu/{cf-adblock => cloudflare/adblock}/README.md (84%) create mode 100644 tofu/cloudflare/adblock/adblock_urls.txt create mode 100644 tofu/cloudflare/adblock/backend.tofu rename tofu/{cf-adblock => cloudflare/adblock}/chunk_adblock_lists.sh (100%) rename tofu/{cf-adblock => cloudflare/adblock}/cloudflare_zero_trust_dns_location.tofu (97%) rename tofu/{cf-adblock => cloudflare/adblock}/cloudflare_zero_trust_gateway_policy.tofu (89%) rename tofu/{cf-adblock => cloudflare/adblock}/manage_cloudflare_adblock.py (99%) rename tofu/{cf-adblock => cloudflare/adblock}/providers.tofu (79%) create mode 100644 tofu/cloudflare/adblock/variables.tofu create mode 100644 tofu/cloudflare/email-alias/DOCS.md create mode 100644 tofu/cloudflare/email-alias/README.md create mode 100644 tofu/cloudflare/email-alias/backend.tofu create mode 100644 tofu/cloudflare/email-alias/email.tofu create mode 100644 tofu/cloudflare/email-alias/email_vars.tofu create mode 100644 tofu/cloudflare/email-alias/latest_release.tofu create mode 100644 tofu/cloudflare/email-alias/providers.tofu create mode 100644 tofu/cloudflare/email-alias/variables.tofu create mode 100644 tofu/cloudflare/email-alias/workers.tofu diff --git a/.github/workflows/cf_adblock.yaml b/.github/workflows/cf_adblock.yaml index 2fe8f1d6c..6d07ab72e 100644 --- a/.github/workflows/cf_adblock.yaml +++ b/.github/workflows/cf_adblock.yaml @@ -6,7 +6,7 @@ on: - cron: "0 0 1 * *" # Runs at 00:00 UTC on the 1st day of every month env: - TF_VAR_gcs_env: prod + TF_VAR_branch_env: prod permissions: contents: read @@ -87,26 +87,19 @@ jobs: fi echo "Secrets loaded and temporary file removed." - - name: Authenticate to Google Cloud - id: google-auth - uses: google-github-actions/auth@ba79af03959ebeac9769e648f473a284504d9193 - with: - workload_identity_provider: ${{ env.GCP_WORKLOAD_IDENTITY_PROVIDER }} # Now from Infisical via env - service_account: ${{ env.GCP_SERVICE_ACCOUNT_EMAIL }} # Now from Infisical via env - - name: Run Adblock List Chunking Script run: bash chunk_adblock_lists.sh 1000 90 - working-directory: ./tofu/cf-adblock # Ensures script is run in the correct context + working-directory: ./tofu/cloudflare/adblock - name: OpenTofu Init for cf-adblock run: tofu init - working-directory: ./tofu/cf-adblock + working-directory: ./tofu/cloudflare/adblock - name: OpenTofu Apply for cf-adblock id: apply_cf_adblock shell: bash run: tofu apply -auto-approve - working-directory: ./tofu/cf-adblock + working-directory: ./tofu/cloudflare/adblock - name: Install Python dependencies shell: bash @@ -119,4 +112,4 @@ jobs: run: | echo "Running Python script manage_cloudflare_adblock.py..." python3 manage_cloudflare_adblock.py 1000 90 - working-directory: ./tofu/cf-adblock # Runs Python script from the same dir as chunker & TF + working-directory: ./tofu/cloudflare/adblock # Runs Python script from the same dir as chunker & TF diff --git a/tofu/README.md b/tofu/README.md new file mode 100644 index 000000000..53fa6e484 --- /dev/null +++ b/tofu/README.md @@ -0,0 +1,45 @@ +# OpenTofu Infrastructure + +This directory contains all the OpenTofu modules for managing the infrastructure of this homelab. The modules are designed to be applied in a specific order to ensure dependencies are met. + +## Execution Order & Workflow + +It is crucial to apply these modules in the following sequence: + +### 1. Remote State Backend (Optional but Recommended) + +Setting up a remote backend is the first step for managing state collaboratively and securely. If you do not set up a remote backend, OpenTofu will use a local state file by default. The `kubernetes` module contains samples for local(default), GCS, and R2 backends that can be adapted for other modules. + +Choose one of the following options if you want to use remote state: + +- **Cloudflare R2**: See the instructions in [`./remote-state/cf/README.md`](./remote-state/cf/README.md) +- **Google Cloud Storage (GCS)**: See the instructions in [`./remote-state/gcs/README.md`](./remote-state/gcs/README.md) + +### 2. Cloudflare Account Tokens + +**This is a prerequisite for all other Cloudflare modules.** + +This module creates the scoped API tokens that are required to authenticate and authorize the other Cloudflare-related modules. + +- **Instructions**: See [`./cloudflare/account-tokens/README.md`](./cloudflare/account-tokens/README.md) + +### 3. Other Cloudlare Modules + +Once the prerequisites are met, you can apply the other modules as needed. By default, all modules below are configured to use the Cloudflare R2 remote state backend. If you want to use a different remote backend, you will need to adjust the `backend.tofu` file in each module accordingly (for reference check [`Kubernetes`](./kubernetes/README.md)). + +- **Cloudflare Adblock**: Manages Cloudflare Zero Trust Gateway DNS policies for ad and malware blocking. + - **Instructions**: See [`./cloudflare/adblock/README.md`](./cloudflare/adblock/README.md) + +- **Cloudflare Email Alias**: Configures a powerful and secure email forwarding system using Cloudflare Email Routing and a custom worker. + - **Instructions**: See [`./cloudflare/email-alias/README.md`](./cloudflare/email-alias/README.md) + +### 4. Kubernetes + +- **Kubernetes**: Provisions the Kubernetes cluster on proxmox using talos. This module is flexible and supports local state, as well as GCS and R2 remote backends. You can find sample backend configurations within its directory. + - **Instructions**: See [`./kubernetes/README.md`](./kubernetes/README.md) + +### Suggestions (Opinionated) + +I suggest you to use R2 remote state backend. It is the default for all new modules, except kubernetes. As most of us use cloudflare anyway, and has a generous 10GB free tier, I feel its a good default. + +I also suggest using infisical. It is opensource and has a very generous free tier. All new modules, except Kubernetes, are configured to use infisical for secrets management by default. For Kubernetes module, infisical is optional, but recommended. diff --git a/tofu/cf-adblock/adblock_urls.txt b/tofu/cf-adblock/adblock_urls.txt deleted file mode 100644 index 757c6c3f9..000000000 --- a/tofu/cf-adblock/adblock_urls.txt +++ /dev/null @@ -1,6 +0,0 @@ -terraform { - backend "gcs" { - bucket = var.bucket_name - prefix = "cf-adblock/prod" - } -} diff --git a/tofu/cf-adblock/backend.tofu b/tofu/cf-adblock/backend.tofu deleted file mode 100644 index b8da2677f..000000000 --- a/tofu/cf-adblock/backend.tofu +++ /dev/null @@ -1,5 +0,0 @@ -terraform { - backend "gcs" { - bucket = var.bucket_name - } -} diff --git a/tofu/cf-adblock/variables.tofu b/tofu/cf-adblock/variables.tofu deleted file mode 100644 index 2cf1df972..000000000 --- a/tofu/cf-adblock/variables.tofu +++ /dev/null @@ -1,21 +0,0 @@ -variable "tofu_encryption_passphrase" { - description = "State encryption passphrase" - type = string - sensitive = true - default = null -} - -variable "cloudflare_secondary_account_id" { - type = string -} - -variable "cloudflare_secondary_api_token" { - type = string - sensitive = true -} - -# Define a variable for the GCS bucket name -variable "bucket_name" { - description = "The globally unique name for the GCS bucket." - type = string -} diff --git a/tofu/cloudflare/account-tokens/DOCS.md b/tofu/cloudflare/account-tokens/DOCS.md new file mode 100644 index 000000000..e6b6df51a --- /dev/null +++ b/tofu/cloudflare/account-tokens/DOCS.md @@ -0,0 +1,49 @@ +## Overview + +This OpenTofu module automates the creation of scoped Cloudflare Account API Tokens. By generating specific, permission-limited tokens, it enhances security by adhering to the principle of least privilege. Instead of using a master API token for all operations, other OpenTofu modules or CI/CD pipelines can use these tokens which only have the permissions necessary for their specific tasks. + +## Key Resources + +- **`cloudflare_account_token`**: This is the primary resource used to create the API tokens. Two distinct tokens are generated: + - **`zero_trust_tofu_token`**: This token is granted a combination of permissions that allow it to manage Cloudflare Zero Trust configurations and DNS records. This is ideal for tasks like updating ad-block lists or other DNS-based filtering. + - **`email_tofu_token`**: This token is configured with permissions to manage email routing rules and associated Workers. This is useful for automating email forwarding or processing. + +- **`infisical_secret`**: For each token created, a corresponding secret is created in Infisical. This allows for the secure storage and retrieval of the token values. The secrets are named with a `TF_VAR_` prefix, making them easily consumable as environment variables in other OpenTofu configurations or scripts. + +- **`cloudflare_account_permission_groups` data source**: This data source is used to dynamically fetch the available permission groups from the Cloudflare API. This avoids hardcoding permission group IDs, making the configuration more robust and adaptable to changes in the Cloudflare API. + +## Instructions + +### Prerequisites + +Before applying this module, you must have the following in place: + +1. **Configured Remote State**: A remote backend (in this case, Cloudflare R2) must be fully configured and operational. The `backend.tf` file in this module is already configured to use the R2 bucket. +2. **Infisical Project**: An Infisical project must exist, and you must have the necessary credentials (client ID, client secret, project ID) to authenticate and write secrets. +3. **Environment Variables**: The following environment variables must be set in your execution environment (e.g., your devcontainer's `.env` file): + - `TF_VAR_cloudflare_account_id` - set it in infisical manually + - `TF_VAR_cloudflare_master_account_api_token` - set it in infisical manually + - `TF_VAR_cloudflare_r2_tofu_access_key` - automatically set in the devcontainer by [cloudflare remote state](../../remote-state/cf/README.md). + - `TF_VAR_cloudflare_r2_tofu_access_secret` - automatically set in the devcontainer by [cloudflare remote state](../../remote-state/cf/README.md). + - `TF_VAR_bucket_name` - automatically set in the devcontainer when set in the `.env` file in the root folder. + - `TF_VAR_branch_env`- automatically set in the devcontainer base on the current branch. + - `TF_VAR_tofu_encryption_passphrase` - set it in infisical manually + - `TF_VAR_infisical_domain` - automatically set in the devcontainer when set in the `.env` file in the root folder. + - `TF_VAR_infisical_client_id` - automatically set in the devcontainer when set in the `.env` file in the root folder. + - `TF_VAR_infisical_client_secret` - set it in [devcontainer](/.devcontainer/README.md) manually. + - `TF_VAR_infisical_project_id` - automatically set in the devcontainer when set in the `.env` file in the root folder. + - Note: You might need to run `source ~/.zshrc` in your devcontainer to ensure the environment variables are loaded correctly after they are automatically set up in Infisical for the first time by remote state. + +### Execution + +Once the prerequisites are met, you can apply the configuration: + +```bash +# Initialize tofu +tofu init + +# Run tofu apply to create the tokens and secrets +tofu apply +``` + +After a successful apply, the generated tokens will be securely stored in your Infisical project under the path specified by `var.infisical_rw_secrets_path`. diff --git a/tofu/cloudflare/account-tokens/README.md b/tofu/cloudflare/account-tokens/README.md new file mode 100644 index 000000000..701d4cb11 --- /dev/null +++ b/tofu/cloudflare/account-tokens/README.md @@ -0,0 +1,14 @@ +## Overview + +This OpenTofu module is responsible for creating and managing specific, scoped Cloudflare Account API Tokens. These tokens are designed for use in other OpenTofu modules and CI/CD pipelines to perform automated tasks within the Cloudflare ecosystem. + +## Key Resources + +- **Scoped API Tokens**: Creates dedicated tokens for: + - **Zero Trust & DNS**: For programmatically managing Zero Trust policies, lists, and DNS records. + - **Email & Workers**: For automating email routing rules and related Worker scripts. +- **Secrets Management with Infisical**: Securely stores the generated API tokens in a specified Infisical project and path for other modules and services to consume. + +## Instructions + +This module assumes a pre-existing and configured OpenTofu remote state backend. For detailed prerequisites and step-by-step instructions, please refer to [DOCS.md](./DOCS.md). diff --git a/tofu/cloudflare/account-tokens/backend.tofu b/tofu/cloudflare/account-tokens/backend.tofu new file mode 100644 index 000000000..b189d6adc --- /dev/null +++ b/tofu/cloudflare/account-tokens/backend.tofu @@ -0,0 +1,18 @@ +terraform { + backend "s3" { + bucket = var.bucket_name + key = "cloudflare/account-tokens/${var.branch_env}/terraform.tfstate" + region = "auto" + skip_credentials_validation = true + skip_metadata_api_check = true + skip_region_validation = true + skip_requesting_account_id = true + skip_s3_checksum = true + use_path_style = true + endpoints = { + s3 = "https://${var.cloudflare_account_id}.r2.cloudflarestorage.com" + } + access_key = var.cloudflare_r2_tofu_access_key + secret_key = var.cloudflare_r2_tofu_access_secret + } +} diff --git a/tofu/cloudflare/account-tokens/cf.tofu b/tofu/cloudflare/account-tokens/cf.tofu new file mode 100644 index 000000000..bdf310aab --- /dev/null +++ b/tofu/cloudflare/account-tokens/cf.tofu @@ -0,0 +1,90 @@ +locals { + # Create a nested map: scope -> permission_name -> id + api_token_permission_groups_map = { + account = { + for perm in data.cloudflare_account_api_token_permission_groups_list.all.result : + perm.name => perm.id + if contains(perm.scopes, "com.cloudflare.api.account") + } + zone = { + for perm in data.cloudflare_account_api_token_permission_groups_list.all.result : + perm.name => perm.id + if contains(perm.scopes, "com.cloudflare.api.account.zone") + } + # Add R2 scope mapping + r2 = { + for perm in data.cloudflare_account_api_token_permission_groups_list.all.result : + perm.name => perm.id + if contains(perm.scopes, "com.cloudflare.edge.r2.bucket") + } + } +} + +# Get API token permission groups data +data "cloudflare_account_api_token_permission_groups_list" "all" { + account_id = var.cloudflare_account_id +} + +# Create Account token for Zero Trust access with proper permissions +resource "cloudflare_account_token" "zero_trust_tofu_token" { + name = "Zero Trust Tofu Token" + account_id = var.cloudflare_account_id + + policies = [{ + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.account["Zero Trust Write"] + }] + resources = { + "com.cloudflare.api.account.${var.cloudflare_account_id}" = "*" + } + }, { + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.account["Cloudflare Zero Trust Secure DNS Locations Write"] + }] + resources = { + "com.cloudflare.api.account.${var.cloudflare_account_id}" = "*" + } + }, { + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.zone["DNS Write"] + }] + resources = { + "com.cloudflare.api.account.zone.${var.cloudflare_zone_id}" = "*" + } + }] +} + +# Create Account token for Zero Trust access with proper permissions +resource "cloudflare_account_token" "email_tofu_token" { + name = "Email Tofu Token" + account_id = var.cloudflare_account_id + + policies = [{ + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.account["Email Routing Addresses Write"] + }] + resources = { + "com.cloudflare.api.account.${var.cloudflare_account_id}" = "*" + } + }, { + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.zone["Email Routing Rules Write"] + }] + resources = { + "com.cloudflare.api.account.zone.${var.cloudflare_zone_id}" = "*" + } + }, { + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.account["Workers Scripts Write"] + }] + resources = { + "com.cloudflare.api.account.${var.cloudflare_account_id}" = "*" + } + }] +} diff --git a/tofu/cloudflare/account-tokens/infisical.tofu b/tofu/cloudflare/account-tokens/infisical.tofu new file mode 100644 index 000000000..9ac4b3a5f --- /dev/null +++ b/tofu/cloudflare/account-tokens/infisical.tofu @@ -0,0 +1,29 @@ +provider "infisical" { + host = var.infisical_domain + auth = { + universal = { + client_id = var.infisical_client_id + client_secret = var.infisical_client_secret + } + } +} + +# Store Zero Trust Account Token +resource "infisical_secret" "cloudflare_zero_trust_tofu_token" { + name = "TF_VAR_cloudflare_zero_trust_tofu_token" + value = cloudflare_account_token.zero_trust_tofu_token.value + + env_slug = var.branch_env + folder_path = var.infisical_rw_secrets_path + workspace_id = var.infisical_project_id +} + +# Store Email Account Token +resource "infisical_secret" "cloudflare_email_tofu_token" { + name = "TF_VAR_cloudflare_email_tofu_token" + value = cloudflare_account_token.email_tofu_token.value + + env_slug = var.branch_env + folder_path = var.infisical_rw_secrets_path + workspace_id = var.infisical_project_id +} diff --git a/tofu/cloudflare/account-tokens/infisical_variables.tofu b/tofu/cloudflare/account-tokens/infisical_variables.tofu new file mode 100644 index 000000000..d2e290c1d --- /dev/null +++ b/tofu/cloudflare/account-tokens/infisical_variables.tofu @@ -0,0 +1,30 @@ +variable "infisical_domain" { + description = "Infisical Domain" + type = string + default = "https://app.infisical.com" +} + +variable "infisical_client_id" { + description = "Infisical Client ID" + type = string + default = null +} + +variable "infisical_project_id" { + description = "Infisical Project ID" + type = string + default = null +} + +variable "infisical_rw_secrets_path" { + description = "Infisical Client Secret" + type = string + default = "/tofu_rw" +} + +variable "infisical_client_secret" { + description = "Infisical Client Secret" + type = string + sensitive = true + default = null +} diff --git a/tofu/cloudflare/account-tokens/providers.tofu b/tofu/cloudflare/account-tokens/providers.tofu new file mode 100644 index 000000000..0e0d8a54e --- /dev/null +++ b/tofu/cloudflare/account-tokens/providers.tofu @@ -0,0 +1,33 @@ + +terraform { + required_providers { + cloudflare = { + source = "cloudflare/cloudflare" + version = ">= 5.6.0" + } + infisical = { + source = "infisical/infisical" + version = ">= 0.15.21" + } + } + encryption { + key_provider "pbkdf2" "my_passphrase" { + passphrase = var.tofu_encryption_passphrase + } + method "aes_gcm" "my_method" { + keys = key_provider.pbkdf2.my_passphrase + } + state { + method = method.aes_gcm.my_method + enforced = true + } + plan { + method = method.aes_gcm.my_method + enforced = true + } + } +} + +provider "cloudflare" { + api_token = var.cloudflare_master_account_api_token +} diff --git a/tofu/cloudflare/account-tokens/variables.tofu b/tofu/cloudflare/account-tokens/variables.tofu new file mode 100644 index 000000000..b0b1b3118 --- /dev/null +++ b/tofu/cloudflare/account-tokens/variables.tofu @@ -0,0 +1,56 @@ + +variable "cloudflare_account_id" { + type = string + description = "The Cloudflare account ID." +} + +variable "cloudflare_zone_id" { + description = "The Zone ID to apply zone-level permissions to." + type = string +} + +variable "cloudflare_master_account_api_token" { + type = string + description = "The Cloudflare Account API token for creating access creds." + sensitive = true +} + +variable "cloudflare_r2_tofu_access_key" { + type = string + description = "The Cloudflare R2 access key for tofu remote state." +} + +variable "cloudflare_r2_tofu_access_secret" { + type = string + description = "The Cloudflare R2 access secret for tofu remote state." +} + +# Define a variable for the bucket name. +# Set this in .env file in root, which should automatically set it in devcontainer env. +variable "bucket_name" { + description = "The globally unique name for the bucket." + type = string + + validation { + condition = can(regex("^[a-z0-9][a-z0-9-]*[a-z0-9]$", var.bucket_name)) + error_message = "Bucket name must be lowercase, alphanumeric, and hyphens only." + } + + validation { + condition = length(var.bucket_name) >= 3 && length(var.bucket_name) <= 63 + error_message = "Bucket name must be between 3 and 63 characters." + } +} + +# Define a variable for part of bucket prefix. +# This should be set automatically based on branch logic in devcontainer. +variable "branch_env" { + description = "Part of bucket prefix." + type = string +} + +variable "tofu_encryption_passphrase" { + description = "The encryption passphrase for tofu state encryption." + type = string + sensitive = true +} diff --git a/tofu/cf-adblock/DOCS.md b/tofu/cloudflare/adblock/DOCS.md similarity index 82% rename from tofu/cf-adblock/DOCS.md rename to tofu/cloudflare/adblock/DOCS.md index 02167e8d4..cae8a60f2 100644 --- a/tofu/cf-adblock/DOCS.md +++ b/tofu/cloudflare/adblock/DOCS.md @@ -9,20 +9,17 @@ This project enhances network security and user experience by filtering unwanted ## Key Components & Functionality 1. **`adblock_urls.txt`**: - - Contains URLs to external ad/malware domain lists (e.g., Hagezi). Each line should be a single URL. Lines starting with `#` are treated as comments and ignored. Empty lines are also ignored. This file is the primary source for defining which external lists are used to build the Cloudflare adblock lists. You can add or remove list URLs here to change the sources. 2. **`chunk_adblock_lists.sh` (Shell Script)**: - - **Purpose**: This script automates the process of downloading, consolidating, cleaning, sorting, and chunking the domain lists from the URLs specified in `adblock_urls.txt`. It handles potential duplicates by creating a unique sorted list before splitting. It splits the large list into smaller files (e.g., `adblock_chunk_000.txt`, `adblock_chunk_001.txt`, etc.) in the `./processed_adblock_chunks/` directory. This chunking is essential to comply with Cloudflare Zero Trust list item limits (currently 1000 items per list on the free tier). The script uses hashing and deterministic spillover to ensure that the mapping of domains to chunk files remains consistent between runs, even if the source lists have minor changes, minimizing unnecessary updates to Cloudflare lists. - **Usage**: This script is executed by the GitHub Action before the Python management script runs. It can also be run manually from the `tofu/cf-adblock/` directory (`bash ./chunk_adblock_lists.sh `) to prepare the domain data locally. 3. **OpenTofu Configuration (`.tofu` files in `./tofu/cf-adblock/`)**: - - Manages the core Cloudflare Zero Trust infrastructure resources that are relatively static, have complex interdependencies best defined declaratively with Infrastructure as Code, or manage stateful components like the GCS backend. - **`backend.tofu`**: Configures the GCS backend for OpenTofu state management. This stores the state file (`tofu.tfstate`) in a Google Cloud Storage bucket, allowing multiple users or automated processes (like GitHub Actions) to work with the same infrastructure state securely (especially when combined with state encryption). The `prefix` (`cf-adblock/prod`) helps organize state files within the bucket. - **`providers.tofu`**: Defines the required external providers for OpenTofu to interact with Cloudflare and potentially other services like HTTP. It specifies the source (`cloudflare/cloudflare`, `hashicorp/http`) and acceptable version constraints (`>= 5.3.0`, `>=3.5.0`). It also configures state and plan encryption using PBKDF2 and AES-GCM methods, requiring a passphrase variable (`var.tofu_encryption_passphrase`). - - **`variables.tofu`**: Defines input variables used by the OpenTofu configuration. These include sensitive variables for Cloudflare authentication (`cloudflare_secondary_account_id`, `cloudflare_secondary_api_token`, `tofu_encryption_passphrase`) and the GCS bucket name (`bucket_name`). Variable definitions specify type, description, and whether they are sensitive. Values are typically provided via environment variables (prefixed with `TF_VAR_`) or other OpenTofu input methods. + - **`variables.tofu`**: Defines input variables used by the OpenTofu configuration. These include sensitive variables for Cloudflare authentication (`cloudflare_account_id`, `cloudflare_zero_trust_tofu_token`, `tofu_encryption_passphrase`) and the GCS bucket name (`bucket_name`). Variable definitions specify type, description, and whether they are sensitive. Values are typically provided via environment variables (prefixed with `TF_VAR_`) or other OpenTofu input methods. - **`cloudflare_zero_trust_gateway_policy.tofu`**: Defines a specific DNS Gateway policy resource named `block_malware`. This policy is configured to block known threats based on Cloudflare's predefined security categories using a `traffic` expression (`any(dns.security_category[*] in {...})`). This policy is distinct from the ad-blocking policy, which is managed dynamically by the Python script. - **`cloudflare_zero_trust_dns_location.tofu`**: Sets up a custom DNS location resource (named "HomeLab") within Cloudflare Zero Trust. This resource defines the endpoints (DoH, DoT, IPv4, IPv6) that Cloudflare will provide for this location. It includes outputs (`dns_location_homelab`, `dns_location_homelab_id`) to make the dynamically assigned DNS endpoint details and the location's unique ID available after OpenTofu apply. This ID is then used by the Python script to associate the dynamically managed adblock policy with this specific location. @@ -54,28 +51,28 @@ The [GitHub Action](/.github/workflows/cf_adblock.yaml) workflow automates the p Here is a breakdown of the steps in the workflow: 1. **Triggers**: The workflow is configured to run on a monthly schedule (`cron: "0 0 1 * *"`, meaning at 00:00 UTC on the 1st day of every month) and can also be manually triggered via the GitHub Actions UI (`workflow_dispatch`). -2. **Environment Variables**: Sets the `TF_VAR_gcs_env` environment variable to `prod`, used by the OpenTofu backend configuration. +2. **Environment Variables**: Sets the `TF_VAR_branch_env` environment variable to `prod`, used by the OpenTofu backend configuration. 3. **Permissions**: Grants necessary permissions for checking out the code (`contents: read`) and authenticating to Google Cloud using Workload Identity Federation (`id-token: write`). 4. **Checkout repository**: Uses the `actions/checkout` action to clone the repository code onto the runner. 5. **Load .env file to environment**: (Assumes a local `.env` file might be present for local devcontainer setup, although typically secrets are handled via Infisical in the action). This step sources environment variables from a `.env` file at the root of the repository, if it exists, and adds them to the GitHub Actions environment. -6. **Load secrets to environment**: This crucial step authenticates to Infisical using a client secret (`secrets.INFISICAL_CLIENT_SECRET`) and runs a setup script (`./.devcontainer/setup_infisical.sh`). This script is responsible for fetching secrets stored in Infisical (including `TF_VAR_cloudflare_secondary_account_id`, `TF_VAR_cloudflare_secondary_api_token`, `TF_VAR_bucket_name`, `TF_VAR_tofu_encryption_passphrase`, `GCP_WORKLOAD_IDENTITY_PROVIDER`, and `GCP_SERVICE_ACCOUNT_EMAIL`) and exporting them to a file. The workflow then reads this file, parses the `KEY=VALUE` lines, cleans up quotes, and adds these secrets as environment variables to the GitHub Actions runner for subsequent steps to use. The temporary file containing secrets is then removed. -7. **Authenticate to Google Cloud**: Uses the `google-github-actions/auth` action to authenticate the workflow to Google Cloud using Workload Identity Federation. It uses the GCP Workload Identity Provider ID and Service Account email fetched from Infisical secrets. This step sets up credentials that allow OpenTofu and other GCP tools to interact with GCP resources, specifically the GCS bucket for state. -8. **Run Adblock List Chunking Script**: Executes the `chunk_adblock_lists.sh` script with arguments (e.g., `1000 90`) from within the `./tofu/cf-adblock/` directory. This script downloads the domain lists, processes them, and generates the chunk files in `./processed_adblock_chunks/`. -9. **OpenTofu Init for cf-adblock**: Runs `tofu init` from the `./tofu/cf-adblock/` directory. This initializes the OpenTofu working directory, downloads necessary providers (Cloudflare, HTTP), and configures the GCS backend based on the `backend.tofu` file and the environment variables sourced from secrets. -10. **OpenTofu Apply for cf-adblock**: Runs `tofu apply -auto-approve` from the `./tofu/cf-adblock/` directory. This applies the OpenTofu configuration, creating or updating the static resources defined in the `.tofu` files (providers, backend state, variables, malware policy, DNS location). The `-auto-approve` flag bypasses interactive approval, suitable for automation. -11. **Install Python dependencies**: Installs the required Python libraries for the management script using `pip3 install cloudflare`. -12. **Run Cloudflare Adblock Management Script**: (Note: This step appears commented out (`#- name:`) in the provided workflow file, but it is the intended final step to complete the update process). This step executes the `manage_cloudflare_adblock.py` script with necessary arguments (e.g., `1000 90`) from the `./tofu/cf-adblock/` directory. The script uses the `CLOUDFLARE_ACCOUNT_ID` and `CLOUDFLARE_API_TOKEN` environment variables (which were sourced from secrets and OpenTofu output) to authenticate to Cloudflare and manage the adblock lists and policy. +6. **Load secrets to environment**: This crucial step authenticates to Infisical using a client secret (`secrets.INFISICAL_CLIENT_SECRET`) and runs a setup script (`./.devcontainer/setup_infisical.sh`). This script is responsible for fetching secrets stored in Infisical (including `TF_VAR_cloudflare_account_id`, `TF_VAR_cloudflare_zero_trust_tofu_token`, `TF_VAR_bucket_name`, `TF_VAR_tofu_encryption_passphrase`) and exporting them to a file. The workflow then reads this file, parses the `KEY=VALUE` lines, cleans up quotes, and adds these secrets as environment variables to the GitHub Actions runner for subsequent steps to use. The temporary file containing secrets is then removed. +7. **Run Adblock List Chunking Script**: Executes the `chunk_adblock_lists.sh` script with arguments (e.g., `1000 90`) from within the `./tofu/cf-adblock/` directory. This script downloads the domain lists, processes them, and generates the chunk files in `./processed_adblock_chunks/`. +8. **OpenTofu Init for cf-adblock**: Runs `tofu init` from the `./tofu/cf-adblock/` directory. This initializes the OpenTofu working directory, downloads necessary providers (Cloudflare, HTTP), and configures the GCS backend based on the `backend.tofu` file and the environment variables sourced from secrets. +9. **OpenTofu Apply for cf-adblock**: Runs `tofu apply -auto-approve` from the `./tofu/cf-adblock/` directory. This applies the OpenTofu configuration, creating or updating the static resources defined in the `.tofu` files (providers, backend state, variables, malware policy, DNS location). The `-auto-approve` flag bypasses interactive approval, suitable for automation. +10. **Install Python dependencies**: Installs the required Python libraries for the management script using `pip3 install cloudflare`. +11. **Run Cloudflare Adblock Management Script**: (Note: This step appears commented out (`#- name:`) in the provided workflow file, but it is the intended final step to complete the update process). This step executes the `manage_cloudflare_adblock.py` script with necessary arguments (e.g., `1000 90`) from the `./tofu/cf-adblock/` directory. The script uses the `CLOUDFLARE_ACCOUNT_ID` and `CLOUDFLARE_API_TOKEN` environment variables (which were sourced from secrets and OpenTofu output) to authenticate to Cloudflare and manage the adblock lists and policy. ## Required Inputs (Variables & Secrets) To successfully run this setup, both OpenTofu and the Python script require certain configuration inputs. These should be managed securely, ideally via a secrets management system like Infisical, and surfaced as environment variables for the workflow and manual execution. -- `TF_VAR_cloudflare_secondary_account_id`: Your Cloudflare Account ID where the Zero Trust configurations (lists, policies, locations) will be managed. This is used by both OpenTofu (for resources like the malware policy and DNS location) and the Python script (for list and policy management). -- `TF_VAR_cloudflare_secondary_api_token`: A Cloudflare API Token with the necessary permissions to manage Zero Trust Gateway lists, policies, and locations. This is a **sensitive secret** and must be kept secure. It is used by both OpenTofu and the Python script for authenticating with the Cloudflare API. +- `TF_VAR_cloudflare_account_id`: Your Cloudflare Account ID where the Zero Trust configurations (lists, policies, locations) will be managed. This is used by both OpenTofu (for resources like the malware policy and DNS location) and the Python script (for list and policy management). +- `TF_VAR_cloudflare_zero_trust_tofu_token`: A Cloudflare API Token with the necessary permissions to manage Zero Trust Gateway lists, policies, and locations. This is a **sensitive secret** and must be kept secure. It is used by both OpenTofu and the Python script for authenticating with the Cloudflare API. Note: You might need to run `source ~/.zshrc` in your devcontainer to ensure the environment variables are loaded correctly after they are automatically set up in Infisical for the first time by account-tokens. - `TF_VAR_bucket_name`: The globally unique name of the Google Cloud Storage bucket used for storing the OpenTofu state file. This is used by the OpenTofu backend configuration. - `TF_VAR_tofu_encryption_passphrase`: A passphrase used to encrypt the OpenTofu state file stored in GCS. This is a **sensitive secret** and must be kept secure. Used by the OpenTofu `encryption` block. -- `GCP_WORKLOAD_IDENTITY_PROVIDER`: (Used by GitHub Action) The full name of the GCP Workload Identity Provider configured for GitHub Actions. This is required for the GitHub Action to authenticate to Google Cloud using Workload Identity Federation. -- `GCP_SERVICE_ACCOUNT_EMAIL`: (Used by GitHub Action) The email address of the GCP Service Account that the GitHub Action will impersonate using Workload Identity Federation. This service account must have permissions to read/write objects in the GCS state bucket. +- `TF_VAR_cloudflare_r2_tofu_access_key`: R2 remote state key, setup in infisical automatically by [cloudflare remote state](../../remote-state/cf/README.md). +- `TF_VAR_cloudflare_r2_tofu_access_secret`: R2 remote state secret, setup in infisical automatically by [cloudflare remote state](../../remote-state/cf/README.md). +- Note: You might need to run `source ~/.zshrc` in your devcontainer to ensure the environment variables are loaded correctly after they are automatically set up in Infisical for the first time by remote state. ## Manual Setup & Execution (Local Environment) @@ -86,16 +83,13 @@ Note: By default, every month, the lists and policy are updated automatically vi To run manually: 1. **Prerequisites**: - - Ensure you have OpenTofu (or Terraform) installed. - Ensure you have Python 3 and `pip` installed. - Ensure you have `curl` and `grep` installed (usually available on Linux/macOS). - - Ensure you have authenticated to Google Cloud and have the necessary permissions to access the GCS state bucket. If using a devcontainer, follow instructions in the [devcontainer README](../../.devcontainer/README.md) on the steps to set up the devcontainer environment, including Infisical and GCP authentication. - - Ensure required environment variables (`TF_VAR_cloudflare_secondary_account_id`, `TF_VAR_cloudflare_secondary_api_token`, `TF_VAR_bucket_name`, `TF_VAR_tofu_encryption_passphrase`) are set in your local environment. If using Infisical, run the setup/export process to populate these variables in your shell session. + - Ensure required environment variables (`TF_VAR_cloudflare_account_id`, `TF_VAR_cloudflare_zero_trust_tofu_token`, `TF_VAR_bucket_name`, `TF_VAR_tofu_encryption_passphrase`) are set in your local environment. If using Infisical, run the setup/export process to populate these variables in your shell session. - Navigate to the OpenTofu directory: `cd starter12/tofu/cf-adblock`. 2. **Prepare Domain Lists**: - - Run the chunking script to download, process, and split the domain lists. Replace `` and `` with your desired limits (e.g., 1000 and 90). ```bash @@ -105,7 +99,6 @@ To run manually: - Verify that chunk files (`adblock_chunk_*.txt`) have been created in the `./processed_adblock_chunks/` directory. 3. **Initialize OpenTofu**: - - Initialize the OpenTofu working directory. This sets up the backend configuration and downloads the required provider plugins based on your `providers.tofu` and `backend.tofu` files and environment variables. ```bash @@ -113,7 +106,6 @@ To run manually: ``` 4. **Apply Static OpenTofu Resources**: - - Run `tofu apply` to create or update the OpenTofu-managed resources (GCS backend state setup, providers configuration, variables, the malware policy, and the DNS location). Review the plan shown by OpenTofu carefully before confirming the apply. ```bash @@ -121,7 +113,6 @@ To run manually: ``` 5. **Install Python dependencies**: - - Install the necessary Python library for interacting with the Cloudflare API. ```bash diff --git a/tofu/cf-adblock/README.md b/tofu/cloudflare/adblock/README.md similarity index 84% rename from tofu/cf-adblock/README.md rename to tofu/cloudflare/adblock/README.md index 637c3bcfe..d6df0c661 100644 --- a/tofu/cf-adblock/README.md +++ b/tofu/cloudflare/adblock/README.md @@ -40,12 +40,12 @@ For a detailed breakdown of the workflow steps, see [DOCS.md](./DOCS.md#github-a Configure these securely. The GitHub Action fetches them via Infisical secrets automatically (surfaced as `TF_VAR_...` or regular environment variables). They must also be present in devcontainer. -- `TF_VAR_cloudflare_secondary_account_id`: Your Cloudflare Account ID for Zero Trust configurations (used by OpenTofu and the Python script). -- `TF_VAR_cloudflare_secondary_api_token`: Cloudflare API Token with necessary permissions for Zero Trust management (used by OpenTofu and the Python script). **Sensitive secret.** +- `TF_VAR_cloudflare_account_id`: Your Cloudflare Account ID for Zero Trust configurations (used by OpenTofu and the Python script). +- `TF_VAR_cloudflare_zero_trust_tofu_token`: Cloudflare API Token with necessary permissions for Zero Trust management (used by OpenTofu and the Python script). **Sensitive secret.** - Generated automatically via [account-tokens](../account-tokens/README.md) - `TF_VAR_bucket_name`: GCS bucket name for OpenTofu remote state. - `TF_VAR_tofu_encryption_passphrase`: Passphrase for OpenTofu state encryption. **Sensitive secret.** -- `GCP_WORKLOAD_IDENTITY_PROVIDER`: GCP Workload Identity Provider ID for GitHub Actions authentication. -- `GCP_SERVICE_ACCOUNT_EMAIL`: GCP Service Account email for GCS access. +- `TF_VAR_cloudflare_r2_tofu_access_key`: Cloudflare R2 access key for remote state. **Sensitive secret.** - Generated automatically via [remote-state/cf](../../remote-state/cf/README.md) +- `TF_VAR_cloudflare_r2_tofu_access_secret`: Cloudflare R2 access secret for remote state. **Sensitive secret.** - Generated automatically via [remote-state/cf](../../remote-state/cf/README.md) ## Manual Setup & Execution (Local Environment) diff --git a/tofu/cloudflare/adblock/adblock_urls.txt b/tofu/cloudflare/adblock/adblock_urls.txt new file mode 100644 index 000000000..8eadeed75 --- /dev/null +++ b/tofu/cloudflare/adblock/adblock_urls.txt @@ -0,0 +1,7 @@ +https://raw.githubusercontent.com/hagezi/dns-blocklists/main/domains/native.amazon.txt +https://raw.githubusercontent.com/hagezi/dns-blocklists/main/domains/native.apple.txt +https://raw.githubusercontent.com/hagezi/dns-blocklists/main/domains/native.samsung.txt +https://raw.githubusercontent.com/hagezi/dns-blocklists/main/domains/native.lgwebos.txt +https://raw.githubusercontent.com/hagezi/dns-blocklists/main/domains/native.oppo-realme.txt +https://raw.githubusercontent.com/hagezi/dns-blocklists/main/domains/native.xiaomi.txt +https://raw.githubusercontent.com/hagezi/dns-blocklists/main/wildcard/pro.mini-onlydomains.txt diff --git a/tofu/cloudflare/adblock/backend.tofu b/tofu/cloudflare/adblock/backend.tofu new file mode 100644 index 000000000..e9b0f0eb2 --- /dev/null +++ b/tofu/cloudflare/adblock/backend.tofu @@ -0,0 +1,18 @@ +terraform { + backend "s3" { + bucket = var.bucket_name + key = "cloudflare/adblock/${var.branch_env}/terraform.tfstate" + region = "auto" + skip_credentials_validation = true + skip_metadata_api_check = true + skip_region_validation = true + skip_requesting_account_id = true + skip_s3_checksum = true + use_path_style = true + endpoints = { + s3 = "https://${var.cloudflare_account_id}.r2.cloudflarestorage.com" + } + access_key = var.cloudflare_r2_tofu_access_key + secret_key = var.cloudflare_r2_tofu_access_secret + } +} diff --git a/tofu/cf-adblock/chunk_adblock_lists.sh b/tofu/cloudflare/adblock/chunk_adblock_lists.sh similarity index 100% rename from tofu/cf-adblock/chunk_adblock_lists.sh rename to tofu/cloudflare/adblock/chunk_adblock_lists.sh diff --git a/tofu/cf-adblock/cloudflare_zero_trust_dns_location.tofu b/tofu/cloudflare/adblock/cloudflare_zero_trust_dns_location.tofu similarity index 97% rename from tofu/cf-adblock/cloudflare_zero_trust_dns_location.tofu rename to tofu/cloudflare/adblock/cloudflare_zero_trust_dns_location.tofu index 1609305a4..d64a6a180 100644 --- a/tofu/cf-adblock/cloudflare_zero_trust_dns_location.tofu +++ b/tofu/cloudflare/adblock/cloudflare_zero_trust_dns_location.tofu @@ -1,5 +1,5 @@ resource "cloudflare_zero_trust_dns_location" "homelab" { - account_id = var.cloudflare_secondary_account_id + account_id = var.cloudflare_account_id name = "HomeLab" # This will be the name in the Cloudflare dashboard client_default = true # Set to true if this should be the default location for WARP clients ecs_support = false diff --git a/tofu/cf-adblock/cloudflare_zero_trust_gateway_policy.tofu b/tofu/cloudflare/adblock/cloudflare_zero_trust_gateway_policy.tofu similarity index 89% rename from tofu/cf-adblock/cloudflare_zero_trust_gateway_policy.tofu rename to tofu/cloudflare/adblock/cloudflare_zero_trust_gateway_policy.tofu index ab6cb8746..887f17306 100644 --- a/tofu/cf-adblock/cloudflare_zero_trust_gateway_policy.tofu +++ b/tofu/cloudflare/adblock/cloudflare_zero_trust_gateway_policy.tofu @@ -1,5 +1,5 @@ resource "cloudflare_zero_trust_gateway_policy" "block_malware" { - account_id = var.cloudflare_secondary_account_id + account_id = var.cloudflare_account_id name = "Block malware" description = "Block known threats based on Cloudflare's threat intelligence" diff --git a/tofu/cf-adblock/manage_cloudflare_adblock.py b/tofu/cloudflare/adblock/manage_cloudflare_adblock.py similarity index 99% rename from tofu/cf-adblock/manage_cloudflare_adblock.py rename to tofu/cloudflare/adblock/manage_cloudflare_adblock.py index 6ae67fe56..fe013d7ed 100644 --- a/tofu/cf-adblock/manage_cloudflare_adblock.py +++ b/tofu/cloudflare/adblock/manage_cloudflare_adblock.py @@ -14,8 +14,8 @@ # --- Configuration --- # These would ideally be fetched from environment variables or a config file # For GitHub Actions, CLOUDFLARE_ACCOUNT_ID and CLOUDFLARE_API_TOKEN will be set as env vars -CLOUDFLARE_ACCOUNT_ID = os.environ.get("TF_VAR_cloudflare_secondary_account_id") -CLOUDFLARE_API_TOKEN = os.environ.get("TF_VAR_cloudflare_secondary_api_token") +CLOUDFLARE_ACCOUNT_ID = os.environ.get("TF_VAR_cloudflare_account_id") +CLOUDFLARE_API_TOKEN = os.environ.get("TF_VAR_cloudflare_zero_trust_tofu_token") # The DNS Location ID will be passed as an argument or fetched from Terraform output # For now, let's assume it's passed as an environment variable by the GitHub Action DNS_LOCATION_ID = os.environ.get("DNS_LOCATION_ID") diff --git a/tofu/cf-adblock/providers.tofu b/tofu/cloudflare/adblock/providers.tofu similarity index 79% rename from tofu/cf-adblock/providers.tofu rename to tofu/cloudflare/adblock/providers.tofu index 4a4de7c6a..994389658 100644 --- a/tofu/cf-adblock/providers.tofu +++ b/tofu/cloudflare/adblock/providers.tofu @@ -17,16 +17,16 @@ terraform { keys = key_provider.pbkdf2.my_passphrase } state { - method = method.aes_gcm.my_method + method = method.aes_gcm.my_method enforced = true } plan { - method = method.aes_gcm.my_method + method = method.aes_gcm.my_method enforced = true } } } provider "cloudflare" { - api_token = var.cloudflare_secondary_api_token + api_token = var.cloudflare_zero_trust_tofu_token } diff --git a/tofu/cloudflare/adblock/variables.tofu b/tofu/cloudflare/adblock/variables.tofu new file mode 100644 index 000000000..c8485f322 --- /dev/null +++ b/tofu/cloudflare/adblock/variables.tofu @@ -0,0 +1,48 @@ +variable "tofu_encryption_passphrase" { + description = "State encryption passphrase" + type = string + sensitive = true + default = null +} + +variable "cloudflare_account_id" { + type = string +} + +variable "cloudflare_zero_trust_tofu_token" { + type = string + sensitive = true +} + +variable "cloudflare_r2_tofu_access_key" { + type = string + description = "The Cloudflare R2 access key for tofu remote state." +} + +variable "cloudflare_r2_tofu_access_secret" { + type = string + description = "The Cloudflare R2 access secret for tofu remote state." +} + +# Define a variable for the bucket name +variable "bucket_name" { + description = "The globally unique name for the bucket." + type = string + + validation { + condition = can(regex("^[a-z0-9][a-z0-9-]*[a-z0-9]$", var.bucket_name)) + error_message = "Bucket name must be lowercase, alphanumeric, and hyphens only." + } + + validation { + condition = length(var.bucket_name) >= 3 && length(var.bucket_name) <= 63 + error_message = "Bucket name must be between 3 and 63 characters." + } +} + +# Define a variable for part of bucket prefix. +# This should be set automatically based on branch logic in devcontainer. +variable "branch_env" { + description = "Part of bucket prefix." + type = string +} diff --git a/tofu/cloudflare/email-alias/DOCS.md b/tofu/cloudflare/email-alias/DOCS.md new file mode 100644 index 000000000..2de270984 --- /dev/null +++ b/tofu/cloudflare/email-alias/DOCS.md @@ -0,0 +1,67 @@ +## Overview + +This OpenTofu module automates the setup of a sophisticated email forwarding system using Cloudflare Email Routing and the open-source [email-gateway-cloudflare](https://github.com/CutTheCrapTech/email-gateway-cloudflare) worker. This setup provides a robust solution for managing both simple email forwarding and advanced, secure email aliasing. + +## The `email-gateway-cloudflare` Worker + +The core of the dynamic email forwarding is the `email-gateway-cloudflare` worker. This worker provides several key features: + +- **Secure Email Forwarding**: The worker uses HMAC-based email aliases to cryptographically verify incoming emails. This prevents spammers and unauthorized senders from using your domain and flooding your inbox, a common problem with traditional catch-all addresses. +- **Dynamic Alias Generation**: You can generate secure, private email aliases on the fly for different services. This helps protect your real email address and track where spam is coming from. +- **Easy Alias Creation**: The open-source [browser extensions](https://github.com/CutTheCrapTech/email-alias-extensions) make it easy to generate these secure aliases directly in your browser. +- **Future-Improvements**: The project aims to integrate the [email-sanitizer](https://github.com/CutTheCrapTech/email-scrubber-core) library to strip tracking pixels and clean URLs from incoming emails. This feature is pending support for email body modification in Cloudflare Workers. + +## Key Resources + +- **`cloudflare_email_routing_address`**: Creates the destination email addresses where emails will be forwarded. **Note:** These addresses must be manually verified in the Cloudflare dashboard before they can be used. +- **`cloudflare_email_routing_rule`**: Defines static forwarding rules that map a source email address to a destination address. +- **`cloudflare_email_routing_catch_all`**: Configures a catch-all rule that forwards any email that doesn't match a static rule to the `email-gateway` worker. +- **`cloudflare_worker_script`**: Deploys the `email-gateway` worker, which enables dynamic and private email forwarding. The worker is configured with environment variables and secrets for customization. +- **`http` data source**: Dynamically fetches the latest version of the `email-gateway` worker script from the official GitHub repository, ensuring you are always running the latest version. + +## Prerequisites + +Before applying this module, you must complete the following steps in your Cloudflare dashboard: + +1. **Configure DNS for Email Routing**: Navigate to your zone's "Email Routing" settings and follow the instructions to add the required MX and TXT records to your DNS. This is a one-time setup that enables Cloudflare to handle your domain's email. +2. **Verify Destination Addresses**: Any email address you intend to use as a destination for forwarding must be manually verified. You can do this in the "Email Routing" settings of your Cloudflare dashboard. + +## Instructions + +### Environment Variables + +Ensure the following environment variables are set in your execution environment: + +- `TF_VAR_cloudflare_account_id` - set it in infisical manually +- `TF_VAR_cloudflare_zone_id` - set it in infisical manually +- `TF_VAR_cloudflare_email_tofu_token` - automatically set in the devcontainer by [cloudflare account tokens](../account-tokens/cf/README.md). +- `TF_VAR_cloudflare_r2_tofu_access_key` - automatically set in the devcontainer by [cloudflare remote state](../../remote-state/cf/README.md). +- `TF_VAR_cloudflare_r2_tofu_access_secret` - automatically set in the devcontainer by [cloudflare remote state](../../remote-state/cf/README.md). +- `TF_VAR_bucket_name` - automatically set in the devcontainer when set in the `.env` file in the root folder. +- `TF_VAR_branch_env`- automatically set in the devcontainer base on the current branch. +- `TF_VAR_tofu_encryption_passphrase` - set it in infisical manually +- `TF_VAR_email_options` - Detailed docs on how to set these variables can be found in the [email-gateway-cloudflare](https://github.com/CutTheCrapTech/email-gateway-cloudflare). +- `TF_VAR_email_secret_mapping` - Detailed docs on how to set these variables can be found in the [email-gateway-cloudflare](https://github.com/CutTheCrapTech/email-gateway-cloudflare). +- `TF_VAR_email_routing_addresses` - destination addresses - example: `["x@gmail.com", "y@gmail.com"]` +- `TF_VAR_email_routing_rules` - routing rules for non catch-all forwarding - example: `{"a@your-domain.com": "x@gmail.com", "b@your-domain.com": "y@gmail.com"}` +- Note: You might need to run `source ~/.zshrc` in your devcontainer to ensure some of environment variables are loaded correctly after they are automatically set up in Infisical for the first time by remote state / account tokens. + +### Execution + +Once the prerequisites are met and the environment variables are set, you can apply the configuration: + +```bash +# Initialize tofu +tofu init + +# Run tofu apply to create the email routing rules and worker +tofu apply +``` + +## Known Issues + +### Perpetual Diff in Worker Bindings + +Due to the way the Cloudflare Terraform provider handles `secret_text` bindings for workers, you may notice a perpetual "in-place update" for the `cloudflare_workers_script.email_gateway_worker` resource in your `tofu plan` output. The provider cannot read the secret's value back from Cloudflare, so it conservatively proposes an update on every plan to ensure the secret is correctly set. + +This is expected and harmless. The plan will simply re-apply the same secret value. It is a known inconvenience of the provider's design, and you can safely proceed with the apply. diff --git a/tofu/cloudflare/email-alias/README.md b/tofu/cloudflare/email-alias/README.md new file mode 100644 index 000000000..eed675d1d --- /dev/null +++ b/tofu/cloudflare/email-alias/README.md @@ -0,0 +1,19 @@ +## Overview + +This OpenTofu module configures a powerful and secure email forwarding system using Cloudflare Email Routing and the open-source [email-gateway-cloudflare](https://github.com/CutTheCrapTech/email-gateway-cloudflare) worker. It enables both standard email forwarding and dynamic, secure, and private email alias generation. + +## Key Features + +- **Standard Email Forwarding**: Create simple, static rules to forward emails bound to custom addresses to designated destinations, using a mapping. +- **Secure Catch-all Worker**: Deploys a worker that acts as a catch-all, processing all emails that don't match a static rule. This worker uses HMAC-based email aliases to prevent spam and unauthorized use of your domain. +- **Dynamic Worker Versioning**: Automatically fetches and deploys the latest version of the email gateway worker from GitHub, ensuring you always have the latest features and security updates. +- **Easy Alias Generation**: Secure email aliases can be easily generated using the open-source [browser extensions](https://github.com/CutTheCrapTech/email-alias-extensions) for both [chrome](https://chromewebstore.google.com/detail/email-alias-generator/ghhkompkfhenihpidldalcocbfplkdgm) and firefox. + +## Prerequisites + +- **DNS Configuration**: Before using this module, you must configure the DNS records for email routing in your Cloudflare dashboard. Follow the instructions in the Cloudflare dashboard to add the required MX and TXT records. +- **Destination Email Verification**: You must manually verify any destination email addresses in the Cloudflare dashboard before they can be used in routing rules. + +## Instructions + +For detailed prerequisites and step-by-step instructions, please refer to [DOCS.md](./DOCS.md). diff --git a/tofu/cloudflare/email-alias/backend.tofu b/tofu/cloudflare/email-alias/backend.tofu new file mode 100644 index 000000000..c6cc0bf59 --- /dev/null +++ b/tofu/cloudflare/email-alias/backend.tofu @@ -0,0 +1,18 @@ +terraform { + backend "s3" { + bucket = var.bucket_name + key = "cloudflare/email-alias/${var.branch_env}/terraform.tfstate" + region = "auto" + skip_credentials_validation = true + skip_metadata_api_check = true + skip_region_validation = true + skip_requesting_account_id = true + skip_s3_checksum = true + use_path_style = true + endpoints = { + s3 = "https://${var.cloudflare_account_id}.r2.cloudflarestorage.com" + } + access_key = var.cloudflare_r2_tofu_access_key + secret_key = var.cloudflare_r2_tofu_access_secret + } +} diff --git a/tofu/cloudflare/email-alias/email.tofu b/tofu/cloudflare/email-alias/email.tofu new file mode 100644 index 000000000..ad43aeae6 --- /dev/null +++ b/tofu/cloudflare/email-alias/email.tofu @@ -0,0 +1,40 @@ +resource "cloudflare_email_routing_address" "email_routing_addresses" { + for_each = toset(var.email_routing_addresses) + account_id = var.cloudflare_account_id + email = each.value +} + +resource "cloudflare_email_routing_rule" "email_routing_rules" { + for_each = var.email_routing_rules + zone_id = var.cloudflare_zone_id + + actions = [{ + type = "forward" + value = [each.value] + }] + + matchers = [{ + type = "literal" + field = "to" + value = each.key + }] + + enabled = true + name = "EmailRule_${substr(md5(each.key), 0, 8)}" + priority = 0 +} + +resource "cloudflare_email_routing_catch_all" "email_routing_catch_all" { + zone_id = var.cloudflare_zone_id + actions = [{ + type = "worker" + value = ["email-gateway-${var.branch_env}"] + }] + matchers = [{ + type = "all" + }] + enabled = true + name = "Catch-all rule." + + depends_on = [cloudflare_workers_script.email_gateway_worker] +} diff --git a/tofu/cloudflare/email-alias/email_vars.tofu b/tofu/cloudflare/email-alias/email_vars.tofu new file mode 100644 index 000000000..ba343f8ac --- /dev/null +++ b/tofu/cloudflare/email-alias/email_vars.tofu @@ -0,0 +1,50 @@ +variable "email_routing_addresses" { + description = "List of destination email addresses for routing" + type = list(string) + default = [] +} + +variable "email_routing_rules" { + description = "Map of source email addresses to destination email addresses" + type = map(string) + default = {} + validation { + condition = alltrue([ + for dest_email in values(var.email_routing_rules) : + contains(var.email_routing_addresses, dest_email) + ]) + error_message = "All destination emails in 'email_routing_rules' must be present in the 'email_routing_addresses' list." + } +} + +variable "email_options" { + description = "Email options" + type = string + validation { + condition = ( + can(jsondecode(var.email_options)) && + ( + try(jsondecode(var.email_options).default_email_address, null) == null || + contains(var.email_routing_addresses, jsondecode(var.email_options).default_email_address) + ) + ) + error_message = "The 'email_options' must be a valid JSON string. If it contains a 'default_email_address', that address must be listed in 'email_routing_addresses'." + } +} + +variable "email_secret_mapping" { + description = "Email secret mapping" + type = string + validation { + condition = ( + # First, ensure the variable is a valid JSON object. + can(jsondecode(var.email_secret_mapping)) && + # If it is, then perform the validation on the decoded map. + alltrue([ + for dest_email in values(jsondecode(var.email_secret_mapping)) : + contains(var.email_routing_addresses, dest_email) + ]) + ) + error_message = "The 'email_secret_mapping' must be a valid JSON string, and all its destination emails must be present in the 'email_routing_addresses' list." + } +} diff --git a/tofu/cloudflare/email-alias/latest_release.tofu b/tofu/cloudflare/email-alias/latest_release.tofu new file mode 100644 index 000000000..c17e657ae --- /dev/null +++ b/tofu/cloudflare/email-alias/latest_release.tofu @@ -0,0 +1,14 @@ +# 1. Fetch latest release data from GitHub API +data "http" "github_latest_release" { + url = "https://api.github.com/repos/CutTheCrapTech/email-gateway-cloudflare/releases/latest" + + # GitHub API may require this header + request_headers = { + Accept = "application/vnd.github.v3+json" + } +} + +# 2. Parse the JSON response and extract the tag name (version) +locals { + latest_version = jsondecode(data.http.github_latest_release.response_body).tag_name +} diff --git a/tofu/cloudflare/email-alias/providers.tofu b/tofu/cloudflare/email-alias/providers.tofu new file mode 100644 index 000000000..30ade08a1 --- /dev/null +++ b/tofu/cloudflare/email-alias/providers.tofu @@ -0,0 +1,33 @@ + +terraform { + required_providers { + cloudflare = { + source = "cloudflare/cloudflare" + version = ">= 5.6.0" + } + http = { + source = "hashicorp/http" + version = ">= 3.5.0" + } + } + encryption { + key_provider "pbkdf2" "my_passphrase" { + passphrase = var.tofu_encryption_passphrase + } + method "aes_gcm" "my_method" { + keys = key_provider.pbkdf2.my_passphrase + } + state { + method = method.aes_gcm.my_method + enforced = true + } + plan { + method = method.aes_gcm.my_method + enforced = true + } + } +} + +provider "cloudflare" { + api_token = var.cloudflare_email_tofu_token +} diff --git a/tofu/cloudflare/email-alias/variables.tofu b/tofu/cloudflare/email-alias/variables.tofu new file mode 100644 index 000000000..0baf2550c --- /dev/null +++ b/tofu/cloudflare/email-alias/variables.tofu @@ -0,0 +1,56 @@ + +variable "cloudflare_account_id" { + type = string + description = "The Cloudflare account ID." +} + +variable "cloudflare_zone_id" { + type = string + description = "The Cloudflare zone ID." +} + +variable "cloudflare_email_tofu_token" { + type = string + description = "The Cloudflare Account API token for Email." + sensitive = true +} + +variable "cloudflare_r2_tofu_access_key" { + type = string + description = "The Cloudflare R2 access key for tofu remote state." +} + +variable "cloudflare_r2_tofu_access_secret" { + type = string + description = "The Cloudflare R2 access secret for tofu remote state." +} + +# Define a variable for the bucket name. +# Set this in .env file in root, which should automatically set it in devcontainer env. +variable "bucket_name" { + description = "The globally unique name for the bucket." + type = string + + validation { + condition = can(regex("^[a-z0-9][a-z0-9-]*[a-z0-9]$", var.bucket_name)) + error_message = "Bucket name must be lowercase, alphanumeric, and hyphens only." + } + + validation { + condition = length(var.bucket_name) >= 3 && length(var.bucket_name) <= 63 + error_message = "Bucket name must be between 3 and 63 characters." + } +} + +# Define a variable for part of bucket prefix. +# This should be set automatically based on branch logic in devcontainer. +variable "branch_env" { + description = "Part of bucket prefix." + type = string +} + +variable "tofu_encryption_passphrase" { + description = "The encryption passphrase for tofu state encryption." + type = string + sensitive = true +} diff --git a/tofu/cloudflare/email-alias/workers.tofu b/tofu/cloudflare/email-alias/workers.tofu new file mode 100644 index 000000000..e780bc576 --- /dev/null +++ b/tofu/cloudflare/email-alias/workers.tofu @@ -0,0 +1,32 @@ +# Fetch the worker script content from the URL +data "http" "email_gateway_worker_script" { + url = "https://github.com/CutTheCrapTech/email-gateway-cloudflare/releases/download/${local.latest_version}/worker.js" +} + +# Create the Cloudflare Worker script +resource "cloudflare_workers_script" "email_gateway_worker" { + account_id = var.cloudflare_account_id + script_name = "email-gateway-${var.branch_env}" + content = data.http.email_gateway_worker_script.response_body + main_module = "worker.js" + compatibility_date = "2025-07-10" + + bindings = [ + # Bindings are ordered alphabetically to ensure a stable plan. + { + name = "EMAIL_OPTIONS" + type = "plain_text" + text = var.email_options + }, + { + name = "EMAIL_SECRET_MAPPING" + type = "secret_text" + text = var.email_secret_mapping + } + ] + + observability = { + enabled = true + head_sampling_rate = 1 + } +} From f232ec488520cfe2805e5d1d42fd5e8e8facd58d Mon Sep 17 00:00:00 2001 From: Karteek <120569182+karteekiitg@users.noreply.github.com> Date: Sun, 20 Jul 2025 14:47:59 +0530 Subject: [PATCH 3/4] fix: Correct worker.js release after moving to monorepo --- tofu/cloudflare/email-alias/latest_release.tofu | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tofu/cloudflare/email-alias/latest_release.tofu b/tofu/cloudflare/email-alias/latest_release.tofu index c17e657ae..60f177781 100644 --- a/tofu/cloudflare/email-alias/latest_release.tofu +++ b/tofu/cloudflare/email-alias/latest_release.tofu @@ -1,6 +1,6 @@ # 1. Fetch latest release data from GitHub API -data "http" "github_latest_release" { - url = "https://api.github.com/repos/CutTheCrapTech/email-gateway-cloudflare/releases/latest" +data "http" "github_releases" { + url = "https://api.github.com/repos/CutTheCrapTech/email-gateway-cloudflare/releases" # GitHub API may require this header request_headers = { @@ -8,7 +8,10 @@ data "http" "github_latest_release" { } } -# 2. Parse the JSON response and extract the tag name (version) +# 2. Parse the JSON response and extract the tag name (version) for the correct package locals { - latest_version = jsondecode(data.http.github_latest_release.response_body).tag_name + package_name = "@email-gateway/cloudflare-worker" + releases = jsondecode(data.http.github_releases.response_body) + worker_release = [for r in local.releases : r if startswith(r.tag_name, "${local.package_name}@")][0] + latest_version = local.worker_release.tag_name } From 5156297972e4210e67ba2ff54f1e71d221a2781e Mon Sep 17 00:00:00 2001 From: Karteek <120569182+karteekiitg@users.noreply.github.com> Date: Thu, 24 Jul 2025 04:50:07 +0530 Subject: [PATCH 4/4] fix: Add github actions workers cf acc token --- tofu/cloudflare/account-tokens/cf.tofu | 50 ++++++++++++++++++- tofu/cloudflare/account-tokens/infisical.tofu | 10 ++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/tofu/cloudflare/account-tokens/cf.tofu b/tofu/cloudflare/account-tokens/cf.tofu index bdf310aab..2ef0bd53e 100644 --- a/tofu/cloudflare/account-tokens/cf.tofu +++ b/tofu/cloudflare/account-tokens/cf.tofu @@ -57,7 +57,7 @@ resource "cloudflare_account_token" "zero_trust_tofu_token" { }] } -# Create Account token for Zero Trust access with proper permissions +# Create Account token for Email and Workers access with proper permissions resource "cloudflare_account_token" "email_tofu_token" { name = "Email Tofu Token" account_id = var.cloudflare_account_id @@ -88,3 +88,51 @@ resource "cloudflare_account_token" "email_tofu_token" { } }] } + +# Create Account token for Github Actions Workers Deployments access with proper permissions +resource "cloudflare_account_token" "gha_workers_deployment" { + name = "GitHub Actions Workers Deployment Token" + account_id = var.cloudflare_account_id + + policies = [{ + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.account["Workers Scripts Write"] + }] + resources = { + "com.cloudflare.api.account.${var.cloudflare_account_id}" = "*" + } + }, { + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.account["Account Settings Read"] + }] + resources = { + "com.cloudflare.api.account.${var.cloudflare_account_id}" = "*" + } + }, { + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.zone["Workers Routes Write"] # This is what you're missing! + }] + resources = { + "com.cloudflare.api.account.zone.${var.cloudflare_zone_id}" = "*" + } + }, { + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.zone["Zone Read"] + }] + resources = { + "com.cloudflare.api.account.zone.${var.cloudflare_zone_id}" = "*" + } + }, { + effect = "allow" + permission_groups = [{ + id = local.api_token_permission_groups_map.zone["Zone Settings Read"] + }] + resources = { + "com.cloudflare.api.account.zone.${var.cloudflare_zone_id}" = "*" + } + }] +} diff --git a/tofu/cloudflare/account-tokens/infisical.tofu b/tofu/cloudflare/account-tokens/infisical.tofu index 9ac4b3a5f..ed1ae7cdf 100644 --- a/tofu/cloudflare/account-tokens/infisical.tofu +++ b/tofu/cloudflare/account-tokens/infisical.tofu @@ -27,3 +27,13 @@ resource "infisical_secret" "cloudflare_email_tofu_token" { folder_path = var.infisical_rw_secrets_path workspace_id = var.infisical_project_id } + +# Store Github Workers Account Token +resource "infisical_secret" "cloudflare_gha_workers_deployment" { + name = "TF_VAR_cloudflare_gha_workers_deployment" + value = cloudflare_account_token.gha_workers_deployment.value + + env_slug = var.branch_env + folder_path = var.infisical_rw_secrets_path + workspace_id = var.infisical_project_id +}