diff --git a/CLAUDE.md b/CLAUDE.md index a9f005c..c460e47 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,6 +1,6 @@ # attestation-server -A Go HTTP server for serving TEE (Trusted Execution Environment) attestation documents. The server runs behind an Envoy reverse proxy that terminates TLS — Envoy uses the private certificate for service-to-service mTLS (setting the XFCC header with the client cert hash) and optionally the public certificate for Internet-facing ingress without client certificates. The private certificate is only required when dependency endpoints are configured (mTLS for TEE-to-TEE communication) or when no public certificate is set; a TEE with only a public certificate can serve attestation reports to external clients without maintaining private TLS infrastructure. +A Go HTTP server for serving TEE (Trusted Execution Environment) attestation documents. Runs behind an Envoy reverse proxy that terminates TLS — Envoy uses the private certificate for mTLS (XFCC header) and optionally the public certificate for Internet-facing ingress. The private certificate is only required when dependency endpoints are configured or when no public certificate is set. ## Tech stack @@ -12,468 +12,209 @@ A Go HTTP server for serving TEE (Trusted Execution Environment) attestation doc ``` main.go # entry point -cmd/root.go # cobra root command; initializes config, logger, and starts server +cmd/root.go # cobra root command; initializes config, logger, starts server internal/attestation.go # GET /api/v1/attestation handler and helpers (package app) internal/config.go # Config struct and LoadConfig() (package app) -internal/dependencies.go # Transitive dependency attestation: parallel fetch, verify, cycle detection, server cert validation (package app) -internal/cosign.go # Cosign signature verification: bundle fetch, Sigstore/Rekor verification, Fulcio OID extraction + validation (package app) -internal/endorsements.go # Endorsement document fetching, DNSSEC, measurement validation, cosign integration (package app) -internal/fetch.go # Generic HTTP fetch with retry, per-attempt WARN logging, cache (ristretto), TTL parsing, cachedHTTPSGetter for TDX collateral — shared by endorsements, cosign, and TDX (package app) +internal/dependencies.go # Transitive dependency attestation (package app) +internal/cosign.go # Cosign signature verification (package app) +internal/endorsements.go # Endorsement fetching, validation, cosign integration (package app) +internal/fetch.go # HTTP fetch with retry, cache (ristretto), TTL parsing (package app) +internal/health.go # /healthz/live and /healthz/ready handlers (package app) internal/logging.go # NewLogger() (package app) internal/server.go # Server, NewServer(), Run() (package app) -internal/tls.go # TLS certificate/CA loading, verification, and hot-reload (package app) -internal/types.go # BuildInfo, AttestationReport, AttestationReportData, and other shared structs (package app) -pkg/dnssec/dnssec.go # DNSSEC chain-of-trust validation: walks delegation chain from zone to root, verifies RRSIG signatures, embedded IANA root KSK trust anchors (package dnssec) -pkg/hexbytes/hexbytes.go # Shared HexBytes type: []byte that JSON-serializes as hex string (package hexbytes) -pkg/nitro/nitro.go # Shared Nitro attestation: COSE_Sign1 verification, cert chain validation, embedded AWS Nitro root CA (package nitro) -pkg/nitro/nsm.go # NSM device access and attestation via /dev/nsm (package nitro) -pkg/nitro/tpm.go # NitroTPM device access and attestation via raw TPM2 protocol over /dev/tpm0 (package nitro) -pkg/sevsnp/sevsnp.go # SEV-SNP device access, attestation via go-sev-guest, signature verification, report parsing (package sevsnp) -pkg/tdx/tdx.go # Intel TDX device access, attestation via go-tdx-guest, quote verification, report parsing (package tdx) -pkg/tpm/tpm.go # Generic TPM PCR reading via google/go-tpm over /dev/tpmrm0 (package tpm) +internal/tls.go # TLS cert/CA loading, verification, hot-reload (package app) +internal/types.go # BuildInfo, AttestationReport, shared structs (package app) +pkg/dnssec/dnssec.go # DNSSEC chain-of-trust validation (package dnssec) +pkg/hexbytes/hexbytes.go # HexBytes type: []byte with hex JSON serialization (package hexbytes) +pkg/nitro/nitro.go # Nitro COSE_Sign1 verification, cert chain validation (package nitro) +pkg/nitro/nsm.go # NSM device access via /dev/nsm (package nitro) +pkg/nitro/tpm.go # NitroTPM via raw TPM2 protocol over /dev/tpm0 (package nitro) +pkg/sevsnp/sevsnp.go # SEV-SNP attestation via go-sev-guest (package sevsnp) +pkg/tdx/tdx.go # Intel TDX attestation via go-tdx-guest (package tdx) +pkg/tpm/tpm.go # Generic TPM PCR reading via /dev/tpmrm0 (package tpm) config/config.toml # default configuration file -flake.nix # Nix flake: reproducible hermetic build of the server binary and Docker image -flake.lock # pinned Nix input revisions (nixpkgs, flake-utils) -.github/workflows/ci.yml # CI: go fmt, go test (with DNSSEC live tests), go vet, go build on pushes to non-main branches -.github/workflows/nix-build.yml # Nix build: nix build .#docker-image (with offline tests) on PRs to main -.github/workflows/release.yml # Release: Nix build → Release Please → Docker push to GHCR + cosign -release-please-config.json # Release Please configuration (changelog sections, versioning) -.release-please-manifest.json # Release Please version manifest +flake.nix # Nix flake: hermetic build of binary and Docker image +flake.lock # pinned Nix input revisions +.github/workflows/ci.yml # CI: fmt, test, vet, build on non-main branches +.github/workflows/nix-build.yml # Nix build on PRs to main (flake/dep changes only) +.github/workflows/release.yml # Release: Nix build -> Release Please -> GHCR + cosign +release-please-config.json # Release Please configuration +.release-please-manifest.json # Release Please version manifest ``` ## Configuration -Configuration is loaded via a TOML config file, environment variables, and CLI flags. Priority (highest to lowest): CLI flags > env vars > config file > defaults. `LoadConfig` validates all values at startup: duration fields reject negative values (`parseDuration`), timeout and interval fields (`endorsements.client.timeout`, `revocation.refresh_interval`, `ratelimit.stall_timeout`) additionally reject zero, byte-size fields use `dustin/go-humanize` for parsing with int64 overflow protection, and invalid durations or byte sizes fail the startup. +Loaded via TOML config file, environment variables, and CLI flags. Priority: CLI flags > env vars > config file > defaults. `LoadConfig` validates at startup: duration fields reject negative values (`parseDuration`), timeout/interval fields additionally reject zero, byte-size fields use `dustin/go-humanize` with int64 overflow protection. ### Config file -The config file is resolved in order: +Resolved in order: 1. `--config-file` / `-c` flag 2. `ATTESTATION_SERVER_CONFIG_FILE` env var 3. `./config/config.toml` (fallback) 4. `./config.toml` (fallback) -See `config/config.toml` for the full structure: - -```toml -[log] -format = "json" -level = "info" - -[server] -host = "127.0.0.1" -port = 8187 - -[paths] -build_info = "/etc/build-info.json" -endorsements = "/etc/endorsements.json" - -[report.evidence] -nitronsm = false -nitrotpm = false -sevsnp = false -sevsnp_vmpl = 0 -tdx = false - -[tpm] -enabled = false -algorithm = "sha384" - -[report.user_data] -env = [] - -[ratelimit] -enabled = false -requests_per_second = 1 -burst = 1 -stall_timeout = "10s" - -[revocation] -enabled = true -refresh_interval = "12h" - -[secure_boot] -enforce = false - -[endorsements] -dnssec = false -allowed_domains = [] - -[endorsements.client] -timeout = "10s" - -[endorsements.cosign] -verify = true -url_suffix = ".sig" -tuf_cache_path = "" - -[endorsements.cosign.build_signer] -uri = "" -uri_regex = "" - -[http] -allow_proxy = false - -[http.cache] -size = "100MiB" -default_ttl = "1h" - -[dependencies] -endpoints = [] - -[tls.public] -cert_path = "" -key_path = "" -skip_verify = false - -[tls.private] -cert_path = "" -key_path = "" -ca_path = "" # required when private cert is configured -``` +See `config/config.toml` for the full structure and defaults. ### CLI flags -Only logging and config file settings have CLI flag equivalents: - -| Flag | Default | Description | -|------|---------|-------------| -| `--config-file`, `-c` | _(see fallback above)_ | Path to TOML config file | -| `--log-format` | `json` | Log format: `json`/`text` | -| `--log-level` | `info` | Log level: `debug`/`info`/`warn`/`error` | +Only logging and config file have CLI flag equivalents: `--config-file`/`-c`, `--log-format` (`json`/`text`), `--log-level` (`debug`/`info`/`warn`/`error`). ### Environment variables -All settings can be configured via environment variables prefixed with `ATTESTATION_SERVER_`: - -| Env var | TOML key | Default | Description | -|---------|----------|---------|-------------| -| `ATTESTATION_SERVER_CONFIG_FILE` | — | — | Path to TOML config file | -| `ATTESTATION_SERVER_LOG_FORMAT` | `log.format` | `json` | Log format: `json`/`text` | -| `ATTESTATION_SERVER_LOG_LEVEL` | `log.level` | `info` | Log level: `debug`/`info`/`warn`/`error` | -| `ATTESTATION_SERVER_SERVER_HOST` | `server.host` | `127.0.0.1` | HTTP bind host | -| `ATTESTATION_SERVER_SERVER_PORT` | `server.port` | `8187` | HTTP bind port | -| `ATTESTATION_SERVER_PATHS_BUILD_INFO` | `paths.build_info` | `/etc/build-info.json` | Path to build information file | -| `ATTESTATION_SERVER_PATHS_ENDORSEMENTS` | `paths.endorsements` | `/etc/endorsements.json` | Path to endorsements URL list file | -| `ATTESTATION_SERVER_TLS_PUBLIC_CERT_PATH` | `tls.public.cert_path` | — | Path to public TLS certificate (PEM) | -| `ATTESTATION_SERVER_TLS_PUBLIC_KEY_PATH` | `tls.public.key_path` | — | Path to public TLS private key (PEM) | -| `ATTESTATION_SERVER_TLS_PUBLIC_SKIP_VERIFY` | `tls.public.skip_verify` | `false` | Skip system/Mozilla root CA chain verification for the public certificate | -| `ATTESTATION_SERVER_TLS_PRIVATE_CERT_PATH` | `tls.private.cert_path` | — | Path to private TLS certificate (PEM). Required when dependency endpoints are configured or no public certificate is set | -| `ATTESTATION_SERVER_TLS_PRIVATE_KEY_PATH` | `tls.private.key_path` | — | Path to private TLS private key (PEM). Required when dependency endpoints are configured or no public certificate is set | -| `ATTESTATION_SERVER_TLS_PRIVATE_CA_PATH` | `tls.private.ca_path` | — | PEM CA bundle — all private certs in the dependency chain must be issued by this CA. Required when private cert is configured | -| `ATTESTATION_SERVER_REPORT_EVIDENCE_NITRONSM` | `report.evidence.nitronsm` | `false` | Enable Nitro NSM evidence (exclusive: cannot combine with others) | -| `ATTESTATION_SERVER_REPORT_EVIDENCE_NITROTPM` | `report.evidence.nitrotpm` | `false` | Enable Nitro TPM evidence | -| `ATTESTATION_SERVER_REPORT_EVIDENCE_SEVSNP` | `report.evidence.sevsnp` | `false` | Enable SEV-SNP evidence | -| `ATTESTATION_SERVER_REPORT_EVIDENCE_SEVSNP_VMPL` | `report.evidence.sevsnp_vmpl` | `0` | VMPL level for SEV-SNP attestation (0–3) | -| `ATTESTATION_SERVER_REPORT_EVIDENCE_TDX` | `report.evidence.tdx` | `false` | Enable Intel TDX evidence (exclusive: cannot combine with others) | -| `ATTESTATION_SERVER_TPM_ENABLED` | `tpm.enabled` | `false` | Enable generic TPM PCR reading via /dev/tpmrm0; auto-disabled if NitroNSM or NitroTPM evidence is enabled. **Note:** generic TPM PCR values are unattested (`TPM2_PCR_Read`) — they lack a hardware-signed quote. Integrity relies on the TEE's memory encryption protecting the OS. NitroNSM and NitroTPM PCRs are hardware-attested (embedded in the signed attestation document). A future revision may use `TPM2_Quote` for hardware-attested PCR values | -| `ATTESTATION_SERVER_TPM_ALGORITHM` | `tpm.algorithm` | `sha384` | Hash algorithm for TPM PCR values: `sha1`/`sha256`/`sha384`/`sha512` (case-insensitive) | -| `ATTESTATION_SERVER_REVOCATION_ENABLED` | `revocation.enabled` | `true` | Check TEE endorsement key CRLs. SEV-SNP CRLs are fetched from AMD KDS in the background when local SEV-SNP evidence is enabled or dependencies are configured; TDX uses go-tdx-guest's built-in Intel PCS collateral fetching | -| `ATTESTATION_SERVER_REVOCATION_REFRESH_INTERVAL` | `revocation.refresh_interval` | `12h` | How often to re-fetch CRLs in the background (SEV-SNP only; TDX checks are per-request via the library) | -| `ATTESTATION_SERVER_RATELIMIT_ENABLED` | `ratelimit.enabled` | `false` | Rate-limit edge requests (those without client certificate / XFCC header) | -| `ATTESTATION_SERVER_RATELIMIT_REQUESTS_PER_SECOND` | `ratelimit.requests_per_second` | `1` | Per-IP request rate for edge traffic | -| `ATTESTATION_SERVER_RATELIMIT_BURST` | `ratelimit.burst` | `1` | Burst allowance per IP | -| `ATTESTATION_SERVER_RATELIMIT_STALL_TIMEOUT` | `ratelimit.stall_timeout` | `10s` | Max time an over-limit request is stalled before receiving 429; IP extracted from `X-Envoy-Original-IP` > `X-Forwarded-For` > connection IP | -| `ATTESTATION_SERVER_SECURE_BOOT_ENFORCE` | `secure_boot.enforce` | `false` | Enforce UEFI Secure Boot; exit on startup if not enabled. UEFI secure boot detection is skipped when NitroNSM evidence is enabled (enclaves have no EFI firmware; boot integrity is proven by NSM PCR measurements) | -| `ATTESTATION_SERVER_REPORT_USER_DATA_ENV` | `report.user_data.env` | `[]` | Comma-separated environment variable names to include in report (unique) | -| `ATTESTATION_SERVER_DEPENDENCIES_ENDPOINTS` | `dependencies.endpoints` | `[]` | Comma-separated URLs of dependency attestation servers. HTTPS endpoints are verified against the private CA bundle (mTLS) and the server's TLS certificate fingerprint is matched against `data.tls.private` in the attestation report; HTTP endpoints are a design decision for transparent proxy configurations where Envoy diverts traffic through mTLS on non-loopback interfaces — the e2e encryption proof (XFCC fingerprint check + server cert check for HTTPS) ensures the connection was mTLS-protected regardless of the URL scheme | -| `ATTESTATION_SERVER_ENDORSEMENTS_DNSSEC` | `endorsements.dnssec` | `false` | Require strict DNSSEC validation for endorsement URL hosts | -| `ATTESTATION_SERVER_ENDORSEMENTS_ALLOWED_DOMAINS` | `endorsements.allowed_domains` | `[]` | Comma-separated list of allowed endorsement hostnames (exact match). Empty = unrestricted. Applies to both own and dependency endorsement URLs | -| `ATTESTATION_SERVER_ENDORSEMENTS_CLIENT_TIMEOUT` | `endorsements.client.timeout` | `10s` | Overall timeout for fetching endorsement documents and cosign signatures (with retries) | -| `ATTESTATION_SERVER_ENDORSEMENTS_COSIGN_VERIFY` | `endorsements.cosign.verify` | `true` | Verify cosign signatures on endorsement documents using Sigstore public-good infrastructure | -| `ATTESTATION_SERVER_ENDORSEMENTS_COSIGN_URL_SUFFIX` | `endorsements.cosign.url_suffix` | `.sig` | Suffix appended to endorsement URL to fetch the cosign signature bundle | -| `ATTESTATION_SERVER_ENDORSEMENTS_COSIGN_TUF_CACHE_PATH` | `endorsements.cosign.tuf_cache_path` | — | Writable directory for Sigstore TUF metadata cache. Empty = in-memory only (no disk writes; background refresh every 24h). Set a path for disk-cached TUF root that survives restarts | -| `ATTESTATION_SERVER_ENDORSEMENTS_COSIGN_BUILD_SIGNER_URI` | `endorsements.cosign.build_signer.uri` | — | Exact match override for BuildSignerURI Fulcio OID (takes precedence over `uri_regex`) | -| `ATTESTATION_SERVER_ENDORSEMENTS_COSIGN_BUILD_SIGNER_URI_REGEX` | `endorsements.cosign.build_signer.uri_regex` | — | Regex match override for BuildSignerURI Fulcio OID (ignored if `uri` is set) | -| `ATTESTATION_SERVER_HTTP_ALLOW_PROXY` | `http.allow_proxy` | `false` | Honour `HTTP_PROXY`/`HTTPS_PROXY`/`NO_PROXY` env vars for the server's outbound HTTP clients (endorsement/cosign fetches, SEV-SNP CRL fetches, dependency requests). Off by default; required in environments like AWS Nitro Enclaves where a vsock-proxy is the only egress path. TDX collateral fetching (go-tdx-guest) always honours proxy env vars via `http.DefaultTransport` regardless of this setting | -| `ATTESTATION_SERVER_HTTP_CACHE_SIZE` | `http.cache.size` | `100MiB` | Maximum memory for the shared HTTP fetch cache (endorsements + cosign signatures, ristretto) | -| `ATTESTATION_SERVER_HTTP_CACHE_DEFAULT_TTL` | `http.cache.default_ttl` | `1h` | Default cache TTL when response has no Cache-Control header (capped at 24h) | - -List-typed environment variables (`ATTESTATION_SERVER_REPORT_USER_DATA_ENV`, `ATTESTATION_SERVER_DEPENDENCIES_ENDPOINTS`) support comma-separated values: `VAR=a,b,c`. Spaces around commas are trimmed. +All TOML settings map to env vars prefixed with `ATTESTATION_SERVER_`, with `.` replaced by `_` and uppercased (e.g., `tls.private.ca_path` -> `ATTESTATION_SERVER_TLS_PRIVATE_CA_PATH`). + +List-typed variables (`REPORT_USER_DATA_ENV`, `DEPENDENCIES_ENDPOINTS`, `ENDORSEMENTS_ALLOWED_DOMAINS`) support comma-separated values with trimmed spaces: `VAR=a,b,c`. ## Logging conventions -- Use `log/slog` throughout; never use `fmt.Print*` or `log.*` for application logs. -- Log messages are short single sentences, **no initial capital, no trailing punctuation**. -- All structured details (IDs, values, errors) go in slog fields, not in the message string. -- Access logs (via the fiber middleware in `server.go`) include: `method`, `path`, `status`, `duration_ms`, `request_id`. Log level is INFO for 2xx/3xx, WARN for 4xx, ERROR for 5xx. -- Errors in log fields use key `"error"`. +- Use `log/slog` throughout; never `fmt.Print*` or `log.*`. +- Messages: short single sentences, **no initial capital, no trailing punctuation**. +- Structured details (IDs, values, errors) go in slog fields, not in the message. +- Access logs: `method`, `path`, `status`, `duration_ms`, `request_id`. INFO for 2xx/3xx, WARN for 4xx, ERROR for 5xx. +- Error field key: `"error"`. ## Code style -- All Go code must be `go fmt`-conformant. Always run `go fmt ./...` before committing. -- Use `github.com/goccy/go-json` everywhere instead of `encoding/json`. The attestation handler marshals report data with `json.MarshalWithOption(..., json.DisableHTMLEscape())` for the nonce digest, then embeds those exact bytes (via `json.RawMessage`) in the response to guarantee byte-for-byte consistency. -- **Fiber `UnsafeString` hazard**: Fiber's `c.Get()`, `c.Query()`, `c.IP()`, `c.Method()`, `c.Path()`, and similar methods return strings backed by fasthttp's reusable `RequestCtx` buffer (`UnsafeString`). These strings are only valid within the handler. If stored in a long-lived data structure (map key, struct field on the server, channel, etc.), the backing bytes are silently corrupted when fasthttp recycles the `RequestCtx` via `sync.Pool`. Use `strings.Clone()` before storing any Fiber context string beyond the handler lifetime. Operations that implicitly copy (JSON marshaling, string concatenation, `net/http.Header.Set`) are safe without cloning. -- **Fiber `c.UserContext()` hazard**: Fiber's `c.UserContext()` returns `context.Background()` — it is never cancelled on graceful shutdown. Do not pass it to functions that perform blocking operations (HTTP fetches with retry, rate limiter waits, etc.) because those operations will not be interrupted when the server shuts down. Use `s.shutdownCtx()` instead, which returns the server's lifecycle context (set in `Run()`), falling back to `context.Background()` for pre-`Run` callers and unit tests. +- All Go code must be `go fmt`-conformant. Run `go fmt ./...` before committing. +- Use `github.com/goccy/go-json` everywhere instead of `encoding/json`. The attestation handler uses `json.MarshalWithOption(..., json.DisableHTMLEscape())` for the nonce digest, then embeds via `json.RawMessage` for byte-for-byte consistency. +- **Fiber `UnsafeString` hazard**: `c.Get()`, `c.Query()`, `c.IP()`, etc. return strings backed by fasthttp's reusable buffer. Use `strings.Clone()` before storing beyond handler lifetime. JSON marshaling and string concatenation are safe without cloning. +- **Fiber `c.UserContext()` hazard**: Returns `context.Background()` — never cancelled on shutdown. Use `s.shutdownCtx()` instead for blocking operations (HTTP fetches, rate limiter waits, etc.). ## TEE package public API -Each TEE package (`pkg/nitro`, `pkg/sevsnp`, `pkg/tdx`) exposes a consistent set of public functions: - -| Function | Purpose | -|----------|---------| -| `GetEvidence` | Retrieve raw evidence from the device without verification | -| `VerifyEvidence` | Verify a raw evidence blob (standalone, no device needed) | -| `Attest` | Combined retrieval + verification (calls `GetEvidence` then `VerifyEvidence`) | - -The `sevsnp` package additionally exports `SplitEvidence` (split a blob into raw report + certificate table), `ReportSize` (the raw report size constant), and the `RevocationChecker` function type. - -Both `sevsnp.VerifyEvidence` and `tdx.VerifyEvidence` accept optional variadic parameters for revocation checking. These are omitted by standalone callers (backward-compatible) and provided by the attestation server when revocation is enabled: - -- `sevsnp.VerifyEvidence(blob, reportData, now, checkers ...RevocationChecker)` — optional callback checking the endorsement key (VCEK/VLEK) certificate against a CRL -- `tdx.VerifyEvidence(rawQuote, reportData, now, opts ...VerifyOpt)` — `VerifyOpt` contains `CheckRevocations bool` (enables go-tdx-guest Intel PCS collateral fetching and CRL checking) and `Getter trust.HTTPSGetter` (overrides the HTTP client used for collateral fetching; the server provides a caching getter backed by the shared ristretto cache to avoid per-request Intel PCS round-trips) +Each TEE package (`pkg/nitro`, `pkg/sevsnp`, `pkg/tdx`) exposes: `GetEvidence`, `VerifyEvidence`, `Attest` (combined get+verify). The `sevsnp` package additionally exports `SplitEvidence`, `ReportSize`, and `RevocationChecker`. Both `sevsnp.VerifyEvidence` and `tdx.VerifyEvidence` accept optional variadic parameters for revocation checking. ## SEV-SNP workarounds (pkg/sevsnp) -`VerifyEvidence` implements its own verification flow instead of using `verify.SnpAttestation` from go-sev-guest (v0.14.1) to work around three library issues affecting cloud platforms like AWS Nitro. The workarounds are documented in the function's doc comment. Key constraints: +`VerifyEvidence` implements its own verification instead of `verify.SnpAttestation` from go-sev-guest to work around library issues on AWS Nitro: -- **Do not replace with `verify.SnpAttestation`** — it will fail on AWS due to unknown policy bits, malformed ASK/ARK certs in the certificate table, and proto round-trip breaking the signature. -- **Do not remove `reportToProto`** — it sanitises policy bits for parsing while preserving the original value for the API response. -- **Do not remove `trustedRoots`** — these pre-parsed AMD root certs bypass the malformed certificate table entries. -- These workarounds can be revisited when go-sev-guest ships a release including [PR #181](https://github.com/google/go-sev-guest/pull/181) and fixes certificate table handling. +- **Do not replace with `verify.SnpAttestation`** — fails on AWS due to unknown policy bits, malformed ASK/ARK certs, and proto round-trip breaking signatures. +- **Do not remove `reportToProto`** — sanitises policy bits for parsing while preserving original values. +- **Do not remove `trustedRoots`** — pre-parsed AMD root certs bypass malformed certificate table entries. +- Revisit when go-sev-guest ships [PR #181](https://github.com/google/go-sev-guest/pull/181). -### SEV-SNP performance: certificate buffer caching +`GetEvidence` caches the certificate table size after the first call to eliminate a ~2s throttle delay on subsequent calls. The startup self-attestation primes this cache. -`GetEvidence` (and by extension `Attest`) caches the certificate table size after the first call. The go-sev-guest library's `GetRawExtendedReportAtVmpl` performs two ioctls per call (probe for cert buffer size + actual attestation), and the library's self-throttle inserts a ~2 s sleep between ioctls. By caching the cert size, subsequent calls use a single ioctl via `getExtendedReportDirect`, eliminating one PSP firmware round-trip and one throttle delay. The startup self-attestation `Attest` call primes this cache. - -## Attestation handler - -The handler calls `Attest` on each configured TEE device. Each `Attest` method retrieves evidence and verifies it internally using the same `VerifyEvidence` function that external verifiers would use, catching corrupted device output or driver bugs before they reach callers. The handler receives the verified parsed result alongside the raw blob and does not perform any additional verification. - -The `request_id` (a `crypto/rand`-backed UUID) is included in the nonce-bound `AttestationReportData` for audit trail purposes. Since it is cryptographically random, an attacker cannot predict it to pre-compute attestation reports. Verifiers recompute the nonce from the response data (which includes the request_id), not from a pre-shared value. - -### Startup self-attestation +## Critical design constraints -During server initialization (`NewServer`), each opened TEE device is self-attested by calling `Attest` with random nonce/report data. The parsed results are captured in `parsedSelfAttestation` for endorsement validation. This catches environment issues early (tampered firmware, broken devices), primes the SEV-SNP certificate buffer cache, and provides the baseline measurements for endorsement checks. The server exits on any self-attestation failure. +### Error information leakage -## Transitive dependency attestation +Handler error messages must be opaque (`"attestation failed"`, `"dependency attestation failed"`). Never include device errors, file paths, or firmware codes. Real errors are logged at ERROR level with `request_id`. Unhandled errors (not wrapped in `fiber.NewError`) fall back to `"internal error"`. -When `dependencies.endpoints` is configured, the attestation handler fetches and verifies attestation reports from all dependency endpoints in parallel before collecting its own evidence. Each dependency receives the same nonce (`x-attestation-nonce` header) derived from the local `AttestationReportData` digest, and the same `X-Request-Id` for traceability. +### XFCC header -### Verification flow - -Each dependency response is parsed as an `AttestationReport`, verified (nonce binding + cryptographic evidence verification for all known TEE types including NitroTPM→SEV-SNP chaining), and embedded as `json.RawMessage` in the `dependencies` field. Raw bytes are stored instead of re-marshaled structs to avoid `goccy/go-json` zero-copy string issues. - -After cryptographic verification, two certificate fingerprint checks enforce end-to-end encryption: - -1. **Client cert check**: the dependency's `data.tls.client` must be present and match the SHA-256 fingerprint of our private certificate (which is used as the client cert for outgoing mTLS connections). This proves the dependency (via its Envoy) saw our client certificate. -2. **Server cert check** (HTTPS only): when the connection is over HTTPS, the server's leaf certificate fingerprint observed during the TLS handshake is compared against the dependency's `data.tls.private`. This binds the attestation report to the actual TLS connection, catching relay proxies that hold a valid CA-signed cert but are not the TEE. Skipped for plain HTTP endpoints where `resp.TLS` is nil (Envoy terminates TLS on the loopback interface). - -If either check fails, a descriptive error is logged and an opaque error is returned to the caller. - -The dependency HTTP client verifies server certificates against the private CA bundle (`tls.private.ca_path`) and presents the private certificate as the TLS client cert. All private certificates in the dependency chain must be issued by the same CA — Envoy only populates the XFCC header (which provides the client cert fingerprint) when the client cert passes CA verification. +Reject requests with multiple comma-separated XFCC entries (400). Single entry enforces direct e2e encryption without proxy intermediaries. ### End-to-end encryption proof -Every attestation response must prove end-to-end encryption via at least one of: -- `data.tls.client` — XFCC-forwarded client cert fingerprint (service-to-service mTLS within the dependency chain) -- `data.tls.public` — public certificate fingerprint (external Internet clients at the first ingress hop, without client certificates) - -If neither is present, the handler returns 400. This ensures the attestation evidence is always bound to a TLS channel that the verifier can reason about. - -### Cycle detection - -Dependency cycles are detected via the `X-Attestation-Path` header, which carries a comma-separated list of service identities visited along the dependency chain. Each server appends its own identity before forwarding to dependencies. If a server finds its identity already in the path, it returns 409 Conflict, which propagates up the chain. +Every attestation response must have `data.tls.client` (mTLS) or `data.tls.public` (public ingress). Missing both -> 400. -The service identity is deterministic: `SHA-256(json(build_info) || cert_subject || cert_SANs)`, using the private cert (or public cert as fallback). This ensures replicas of the same service share the same identity (cycles are between services, not processes), while different services produce distinct identities. SANs are included because SPIFFE SVIDs may have empty subjects. +### Dependency verification -### HTTP client hardening +After cryptographic verification of a dependency's attestation: +1. **Client cert check**: dependency's `data.tls.client` must match our private cert fingerprint +2. **Server cert check** (HTTPS only): observed server cert fingerprint must match dependency's `data.tls.private` -The dependency HTTP client is hardened against slowloris-like attacks with per-phase timeouts (dial: 5s, TLS handshake: 10s, response headers: 15s, overall: 30s), a 4 MiB response body limit, and disabled keep-alives. +Raw dependency bytes are stored as `json.RawMessage` (not re-marshaled) to avoid `goccy/go-json` zero-copy string issues. -### Certificate hot-reload - -Certificate files (public cert/key, private cert/key, and private CA bundle) are hot-reloaded via fsnotify directory watchers. Since `validateTLSConfig` requires the CA bundle to be in the same directory as the private cert/key, a single watcher covers all three files. On reload, the private cert, CA bundle, and computed fingerprints are swapped atomically under the same `sync.RWMutex` (`tlsCertificates.mu`) that protects concurrent reads from request handlers and the dependency HTTP client. - -The CA bundle loader (`loadCABundle`) cryptographically verifies self-signed certificates using `x509.CheckSignatureFrom`, rejecting certificates whose issuer matches subject but whose signature is invalid. SHA-1 CAs are hard-rejected (Go 1.18+ enforces this). - -### TLS version requirements - -The dependency mTLS HTTP client enforces TLS 1.3 minimum. The endorsement/cosign fetch client uses TLS 1.2 minimum since public CDNs may not yet support TLS 1.3. +### Cycle detection -## Rate limiting +`X-Attestation-Path` header carries visited service identities. Identity = `SHA-256(json(build_info) || cert_subject || cert_SANs)` using private cert (public cert fallback). SANs included because SPIFFE SVIDs may have empty subjects. -When `ratelimit.enabled` is true, a per-IP rate limiting handler is chained on the attestation endpoint (`/api/v1/attestation`) to protect the server from resource exhaustion by edge clients. It is scoped to this endpoint because attestation involves blocking TEE hardware operations; future lightweight endpoints should not inherit this restriction. The handler only applies to requests **without** an `x-forwarded-client-cert` (XFCC) header — service-to-service mTLS traffic is never rate-limited. +### Rate limiting -Client IP is extracted with priority: `X-Envoy-Original-IP` header > first entry in `X-Forwarded-For` > connection IP. Extracted values are validated as IP addresses to prevent header injection from creating unbounded map entries. +Only applies to requests without XFCC header (edge traffic), scoped to `/api/v1/attestation`. Stalls over-limit requests up to `stall_timeout` before 429. IP extraction: `X-Envoy-Original-IP` > first `X-Forwarded-For` > connection IP (validated as IP to prevent unbounded map entries). -Over-limit requests are **stalled** (blocked in a FIFO queue) up to `ratelimit.stall_timeout` before receiving HTTP 429. This avoids immediately rejecting burst traffic while still bounding resource consumption. Per-IP rate limiter entries are cleaned up in a background goroutine when idle for 2× the stall timeout. +### Endorsement domain allowlist -## Certificate revocation checking +Exact hostname match (case-insensitive), no subdomain matching. Applies to own and dependency endorsement URLs. Empty allowlist logs a startup warning. -When `revocation.enabled` is true (the default), the server checks TEE endorsement key certificates against Certificate Revocation Lists. CRL fetching is conditional on configuration: +### Cosign verification -- **SEV-SNP**: A background goroutine fetches AMD KDS CRLs for all supported product lines (Milan, Genoa, Turin) at `revocation.refresh_interval` (default 12h). Both VCEK and VLEK CRLs are fetched. CRLs are initialized when local SEV-SNP evidence is enabled **or** when dependency endpoints are configured (dependencies may include SEV-SNP evidence requiring revocation checks). The `crlCache` stores parsed `x509.RevocationList` entries and checks endorsement key serial numbers during verification. Design is **fail-open**: if no CRL data is available yet (first fetch still pending or failed), certificates are accepted. CRL fetches use the server's `fetchHTTPClient()` and honour `http.allow_proxy`. -- **TDX**: Revocation checking is delegated to go-tdx-guest's built-in Intel PCS collateral fetching (`CheckRevocations: true, GetCollateral: true`). The server provides a `cachedHTTPSGetter` (via `VerifyOpt.Getter`) that caches Intel PCS responses (TCB info, QE identity, PCK CRL, Root CA CRL) in the shared ristretto cache. On cache hit, no network calls are made. TTL is derived from response `Cache-Control` headers; Intel PCS currently returns no cache headers, so `http.cache.default_ttl` applies. The go-tdx-guest library still validates `NextUpdate` expiry on all collateral, so stale cached data is rejected. The cached getter uses the server's `fetchHTTPClient()` and honours `http.allow_proxy`. -- **Nitro**: No CRL mechanism exists (ephemeral certificate chains per attestation; revocation is handled by AWS at the hypervisor level). +Only Cosign v3 protobuf bundles supported. Fulcio OID fields validated against BuildInfo: +- `build_signer.uri` takes precedence over `uri_regex` (warning logged if both set) +- `BuildSignerDigest` skipped when any `build_signer` config is set (digest changes per-commit) +- `SourceRepositoryVisibilityAtSigning` maps to `SourceRepositoryVisibility` +- `DeploymentEnvironment` not checked (no standard Fulcio OID) -When disabled, a startup warning is logged: "certificate revocation checking is disabled, revoked TEE endorsement keys will be accepted". +When cosign is enabled, endorsement URLs are required (own and dependencies). -## Error information leakage +### TLS -The server preserves handler-controlled error messages for all `fiber.NewError` responses (both 4xx and 5xx). Handler code must never include internal details (device errors, file paths, firmware codes) in these messages — only opaque descriptions like `"attestation failed"` or `"dependency attestation failed"`. Unhandled errors (plain `error` values not wrapped in `fiber.NewError`) fall back to a generic `"internal error"` message. The real error is always logged at ERROR level with `request_id` for debugging. +- Dependency mTLS client: TLS 1.3 minimum. Endorsement/cosign client: TLS 1.2 minimum. +- Certificate hot-reload via fsnotify; private cert, CA bundle, and fingerprints swapped atomically under `tlsCertificates.mu`. +- CA bundle loader (`loadCABundle`) verifies self-signed certs via `x509.CheckSignatureFrom`; SHA-1 CAs hard-rejected. -## XFCC header validation +### Certificate revocation -The server rejects requests with multiple comma-separated XFCC entries (HTTP 400). The design assumes a single forwarded client certificate entry per request, enforcing direct end-to-end encryption without proxy intermediaries that might strip or replace the client cert. +- **SEV-SNP**: Background CRL fetch from AMD KDS (fail-open if no data yet). Initialized when local SEV-SNP evidence or dependency endpoints are configured. +- **TDX**: Delegated to go-tdx-guest with `cachedHTTPSGetter` backed by shared ristretto cache. +- **Nitro**: No CRL mechanism (ephemeral cert chains; AWS handles revocation). -## Endorsement domain allowlist +### Health checks -When `endorsements.allowed_domains` is configured (non-empty), endorsement document URLs are checked against the allowlist before fetching. Matching is **exact hostname** (case-insensitive) — subdomain matching is not supported, each host must be listed explicitly. The check applies to both own endorsement URLs and dependency endorsement URLs. An empty allowlist logs a startup warning since dependency reports can contain attacker-controlled URLs. +`/healthz/live` returns 200 once the HTTP listener is up. `/healthz/ready` returns 200 after `NewServer` (self-attestation, endorsement validation) and the initial CRL fetch (if configured) complete; 503 before that. Readiness is a one-way transition — no runtime condition (cert reload failure, CRL refresh failure) flips it back because all background processes use fail-safe/fail-open semantics. Health routes are not rate-limited. -## Endorsement validation +### Startup self-attestation -When `paths.endorsements` is configured with endorsement URLs, the server fetches and validates endorsement documents containing golden measurement values for each configured evidence type. +`NewServer` calls `Attest` with random nonce on each TEE device. Parsed results captured in `parsedSelfAttestation` for endorsement validation. Exits on failure. ### Endorsement document format -A JSON object with evidence-type keys (`nitronsm`, `nitrotpm`, `sevsnp`, `tdx`, `tpm`): -- **NitroNSM/NitroTPM/TPM**: `{"PCR0": "hex", ...}` or `{"0": "hex", ...}` — a flat map of PCR register indices to hex-encoded measurement values. Keys use either `"PCRN"` or `"N"` format (where N is 0–24). Values must be non-empty valid hex strings. These constraints are enforced at JSON parse time (`PCRGoldenValues.UnmarshalJSON`) so that downstream comparison functions (`comparePCRs`, `validateTPMMeasurements`) can trust the values are well-formed. `PCRGoldenValues` is `map[int]hexbytes.Bytes`, matching the type used for TPM PCR values throughout the codebase. -- **SEV-SNP**: a single hex string (96 chars = 384-bit launch measurement) +JSON with evidence-type keys (`nitronsm`, `nitrotpm`, `sevsnp`, `tdx`, `tpm`): +- **NitroNSM/NitroTPM/TPM**: `{"PCR0": "hex", ...}` or `{"0": "hex", ...}` — PCR indices to hex values. `PCRGoldenValues` is `map[int]hexbytes.Bytes`. +- **SEV-SNP**: single hex string (96 chars = 384-bit launch measurement) - **TDX**: `{"MRTD": "hex", "RTMR0": "hex", "RTMR1": "hex", "RTMR2": "hex"}` (all optional) -### Startup validation - -During `NewServer()`, after self-attestation (which now captures parsed results instead of discarding them), the server: -1. Fetches endorsement documents from all configured URLs in parallel with retry -2. Verifies all documents are byte-for-byte identical -3. Validates each configured evidence type against the golden measurements -4. Exits on any failure (missing measurements, mismatches, fetch errors) - -### Per-request revalidation - -Before collecting own evidence in `handleAttestation`, the handler calls `validateOwnEndorsements`. On cache hit this is fast (pointer lookup + comparison). On cache miss (TTL expired) it re-fetches and revalidates. If revalidation fails, the handler returns 500 but the server stays up and self-heals when endorsements become available. - -### Dependency endorsement validation - -After cryptographically verifying a dependency's attestation report, the server also validates the dependency's endorsement URLs (from `reportData.Endorsements`) against the evidence in the dependency report. The shared ristretto cache is used across own and dependency endorsements. - -### Endorsement HTTP client - -Uses system/Mozilla root CAs (via `golang.org/x/crypto/x509roots/fallback` blank import). Hardened with per-phase timeouts (dial 3s, TLS 5s, headers 5s), 1 MiB body limit, disabled keep-alives. When `endorsements.dnssec` is enabled, the `pkg/dnssec` resolver performs cryptographic DNSSEC chain-of-trust validation for endorsement URL hosts before fetching. The resolver reads upstream nameservers from `/etc/resolv.conf` (falling back to `127.0.0.53:53` then `127.0.0.1:53`), sets the CD bit to get raw RRSIG records from any resolver, and validates the full delegation chain from zone to root against embedded IANA root KSK trust anchors (KSK-2017 tag 20326, KSK-2024 tag 38696). It does not rely on the upstream resolver's AD flag. - -### Endorsement cache - -Uses `dgraph-io/ristretto/v2` with URL-string keys in a shared `fetcherCache` (stores both `*EndorsementDocument` and `*cosignResult` values — endorsement URLs and signature URLs don't collide). When multiple URLs resolve to the same document (verified byte-for-byte), the same pointer is stored under all URL keys (cost charged once). TTL is derived from Cache-Control `max-age` (capped at 24h, default `http.cache.default_ttl`). - -Endorsement URLs are tied to CI commit hashes with immutable content. Extended caching (up to 24h) is by design — measurement changes require new commits and new URLs. The TTL cap and per-request revalidation on cache miss provide eventual consistency. - -### Cosign endorsement verification - -When `endorsements.cosign.verify` is enabled (default: `true`), the server verifies cosign signatures on endorsement documents using the Sigstore public-good infrastructure (Fulcio + Rekor). Only Cosign v3 protobuf bundles (from `cosign sign-blob --bundle`) are supported. - -#### Verification flow - -After fetching an endorsement document, a corresponding signature bundle is fetched from the same URL with the configured suffix appended (default `.sig`). The single `endorsements.client.timeout` covers both fetches (not extended for the signature). Signature bundles undergo the same multi-URL byte-for-byte identity check as endorsement documents. - -Verification performs a full online Rekor inclusion proof check using an auto-updating Sigstore TUF client (`root.NewLiveTrustedRoot`) that refreshes roots in the background. Upon successful verification, Fulcio OID extensions are extracted from the signing certificate and validated against the server's `BuildInfo`: - -- **All OID fields** except BuildSignerURI and BuildSignerDigest: exact match against corresponding BuildInfo field -- **BuildSignerURI**: matched against `endorsements.cosign.build_signer.uri` (exact) or `.uri_regex` (regex) if configured; `.uri` takes precedence if both set (warning logged). When neither is configured, exact match against `BuildInfo.BuildSignerURI` -- **BuildSignerDigest**: exact match against `BuildInfo.BuildSignerDigest` when no `build_signer` config is set; **skipped** when any `build_signer` config is set (digest changes per-commit) -- **DeploymentEnvironment**: not checked (no standard Fulcio OID) -- Fulcio's `SourceRepositoryVisibilityAtSigning` maps to BuildInfo's `SourceRepositoryVisibility` - -When cosign verification is enabled, the server requires endorsement URLs to be configured (non-empty `paths.endorsements`); startup fails otherwise. Dependency attestation reports are also required to include non-empty endorsement URL lists — a dependency with no endorsement URLs is rejected. - -Cosign verification is applied to both own endorsements and dependency endorsements. For dependencies, OIDs are validated against the dependency's `BuildInfo` from its attestation report. - -Verified cosign results are cached alongside endorsement documents in the shared `fetcherCache`. On cache hit for both, zero network calls happen. On cache miss for either, both are re-fetched together. - ## Testing -Tests use the standard `testing` package (no testify), table-driven subtests with `t.Run`, and no mocking of hardware interfaces. +Standard `testing` package, table-driven subtests with `t.Run`, no testify, no hardware mocking. ### Fuzz tests -Security-sensitive parsers have `Fuzz*` tests (Go native fuzzing) that verify no panics on arbitrary input. Run seed corpus with `go test ./...`; run actual fuzzing with e.g. `go test ./internal/ -fuzz=FuzzExtractXFCCHash -fuzztime=30s`. Current fuzz targets: - -- `FuzzExtractXFCCHash`, `FuzzIsValidHexFingerprint` — untrusted XFCC header parsing (`internal/attestation_test.go`) -- `FuzzParseCacheTTL`, `FuzzParseByteSize`, `FuzzPCRGoldenValues_UnmarshalJSON` — untrusted HTTP headers, config input, endorsement JSON (`internal/endorsements_test.go`) -- `FuzzBytes_UnmarshalJSON`, `FuzzBytes_RoundTrip` — hex JSON deserialization and marshal↔unmarshal consistency (`pkg/hexbytes/hexbytes_test.go`) +Security-sensitive parsers have `Fuzz*` tests (Go native fuzzing). Run seed corpus with `go test ./...`; run fuzzing with e.g. `go test ./internal/ -fuzz=FuzzExtractXFCCHash -fuzztime=30s`. ### Live DNSSEC tests -`pkg/dnssec/dnssec_test.go` includes live tests that perform real DNSSEC chain-of-trust validation against public domains (ietf.org, internetsociety.org). Gated behind an environment variable: - ```sh DNSSEC_LIVE_TEST=1 go test ./pkg/dnssec/ -run TestLive -v ``` -### Attestation verification fixtures +### Attestation fixtures -Each TEE package has a `testdata/` directory with JSON fixtures captured from real hardware. All fixtures are the raw `AttestationReport` JSON as returned by the attestation handler (pretty-printed): - -```json -{ - "evidence": [{"kind": "...", "blob": "base64...", "data": {...}}], - "data": { ... AttestationReportData ... } -} -``` - -The clock value for certificate validation is extracted from `data.timestamp` (RFC 3339, truncated to seconds). The nonce/report_data is derived as `SHA-512(compact(data))`. Each verification test also cross-checks that `NewAttestationData` produces JSON matching the fixture's `evidence[0].data`. - -Chained (composite) attestation fixtures contain multiple evidence entries. For NitroTPM+SEV-SNP, the SEV-SNP report_data is `SHA-512(nitroTPMBlob)` instead of the raw digest, binding both proofs to the same request. The chained test in `internal/attestation_test.go` verifies both links and confirms the chain breaks if the unchained digest is used. - -Fixture files: -- `pkg/nitro/testdata/nitronsm_attestation.json` -- `pkg/nitro/testdata/nitrotpm_attestation.json` -- `pkg/sevsnp/testdata/sevsnp_attestation_aws.json` -- `pkg/sevsnp/testdata/sevsnp_attestation_gcp.json` -- `pkg/tdx/testdata/tdx_attestation.json` -- `internal/testdata/nitrotpm_sevsnp_attestation.json` (chained NitroTPM → SEV-SNP) -- `internal/testdata/dependencies_attestation.json` (diamond dependency graph: A → {B, C}, B → C with NitroTPM+SEV-SNP, TDX, and SEV-SNP evidence across services; each dependency has client cert matching caller's private cert; server cert validation is tested via unit tests with synthetic fingerprints since fixtures lack TLS connection state) +TEE packages have `testdata/` directories with JSON fixtures from real hardware. Clock from `data.timestamp` (RFC 3339, truncated to seconds), nonce from `SHA-512(compact(data))`. Tests cross-check `NewAttestationData` output. Chained fixtures (NitroTPM+SEV-SNP) bind via `SHA-512(nitroTPMBlob)`. Dependency fixture tests a diamond graph with cross-service evidence. ## Nix build -The project provides a Nix flake for reproducible, hermetic builds. Inputs are pinned to exact commit hashes in `flake.nix` and locked in `flake.lock`. The flake builds a statically linked binary (`CGO_ENABLED=0`, stripped with `-s -w`). Tests run during the build (live DNSSEC tests skip themselves in the sandbox since `DNSSEC_LIVE_TEST` is unset; all other tests use fixtures). The source filter includes `*.go`, `*.json` (test fixtures), `go.mod`, and `go.sum` — changes to docs or config do not trigger a rebuild. Runtime closure references to nixpkgs-patched `mailcap`, `iana-etc`, and `tzdata` store paths are stripped via `removeReferencesTo` (the server does not use `mime.TypeByExtension`, `net.LookupPort`, or `time.LoadLocation`), keeping the Docker image minimal. +Hermetic build via Nix flake (`CGO_ENABLED=0`, stripped). Tests run during build. Source filter: `*.go`, `*.json`, `go.mod`, `go.sum`. Two targets: `default` (static binary), `docker-image` (minimal OCI with compiled-in TLS roots via `x509roots/fallback`). -The flake exposes two package targets: -- `default` / `attestation-server` — the statically linked binary -- `docker-image` — a minimal OCI image (`streamLayeredImage`) containing the binary (TLS root CAs are compiled in via `x509roots/fallback`) and a `/usr/local/bin/attestation-server` symlink for use in multi-stage Docker builds, used by the release workflow and downstream Nitro TEE EIF builds +When `go.mod`/`go.sum` change, update `vendorHash` in `flake.nix`: set to `lib.fakeHash`, build, use hash from error. -The flake is designed to be referenced as a GitHub source input from downstream TEE image repositories: +Referenced as a GitHub source input from downstream TEE image repos: ```nix -# In the downstream flake: inputs.attestation-server.url = "github:eternisai/attestation-server"; -# Binary: attestation-server.packages.x86_64-linux.default -> $out/bin/attestation-server ``` -When `go.mod` or `go.sum` change, the `vendorHash` in `flake.nix` must be updated. Set it to `lib.fakeHash`, build, and use the hash from the error message. - ## CI/CD -- **CI** (`.github/workflows/ci.yml`) — runs on pushes to non-main branches: `go fmt` check, `go test` with `DNSSEC_LIVE_TEST=1`, `go vet`, `go build`. Skipped for doc-only changes (`*.md`, `LICENSE`, `NOTICE`). -- **Nix Build** (`.github/workflows/nix-build.yml`) — runs on PRs targeting main only when Nix or Go dependency files change (`flake.nix`, `flake.lock`, `go.mod`, `go.sum`). Pure Go source changes are already covered by CI; the Nix build catches flake recipe breakage and `vendorHash` mismatches. -- Branch protection on main should require both `Test` and `Build` status checks to pass. If using GitHub rulesets, mark them as "skippable" so PRs without Nix/dependency changes are not blocked when the Nix Build workflow is skipped by `paths`. -- **Release** (`.github/workflows/release.yml`) — runs on push to main, three sequential jobs: - 1. **Build** — `nix build .#docker-image` (runs tests via `doCheck`), uploads image tarball as artifact - 2. **Release** — Release Please creates/updates a release PR; on merge, creates a GitHub Release + tag - 3. **Docker** — loads the pre-built image, pushes to `ghcr.io/eternisai/attestation-server:`, cosigns with keyless Sigstore (Fulcio + Rekor via GitHub OIDC) +- **CI** (`ci.yml`) — non-main pushes: `go fmt`, `go test` (with `DNSSEC_LIVE_TEST=1`), `go vet`, `go build`. Skips doc-only changes. +- **Nix Build** (`nix-build.yml`) — PRs to main, only on flake/dep changes. Catches recipe breakage and `vendorHash` mismatches. +- **Release** (`release.yml`) — push to main: Nix build -> Release Please -> Docker push to `ghcr.io/eternisai/attestation-server:` + keyless cosign. +- Branch protection: require both `Test` and `Build` checks; mark as "skippable" for GitHub rulesets so PRs without Nix changes aren't blocked. -Release Please is configured via `release-please-config.json` and `.release-please-manifest.json`. It parses Conventional Commit messages to determine version bumps and generate changelogs. +Release Please configured via `release-please-config.json` and `.release-please-manifest.json`. ## Development ```sh -# build -go build ./... - -# run locally (uses config/config.toml by default) -go run . - -# run locally (with env vars) -ATTESTATION_SERVER_SERVER_PORT=8187 go run . - -# run tests -go test ./... - -# format -go fmt ./... +go build ./... # build +go run . # run locally (config/config.toml) +go test ./... # run tests +go fmt ./... # format ``` ## Commits -Use [Conventional Commits](https://www.conventionalcommits.org/) for commit messages: -`feat:`, `fix:`, `chore:`, `refactor:`, `docs:`, `test:`, etc. +Use [Conventional Commits](https://www.conventionalcommits.org/): `feat:`, `fix:`, `chore:`, `refactor:`, `docs:`, `test:`, etc. diff --git a/internal/crl.go b/internal/crl.go index b292279..b4ee3c6 100644 --- a/internal/crl.go +++ b/internal/crl.go @@ -154,13 +154,16 @@ func (c *crlCache) refreshAll(ctx context.Context, client *http.Client, urls []s func (s *Server) runCRLRefresh(ctx context.Context) { urls := crlURLsForEvidence(s.cfg) if len(urls) == 0 { + s.ready.Store(true) return } client := s.fetchHTTPClient() - // Initial fetch. + // Initial fetch — once complete the server is ready to serve traffic. s.crlCache.refreshAll(ctx, client, urls) + s.ready.Store(true) + s.logger.Info("initial crl fetch complete, server ready") ticker := time.NewTicker(s.cfg.RevocationRefreshInterval) defer ticker.Stop() diff --git a/internal/health.go b/internal/health.go new file mode 100644 index 0000000..b9432ed --- /dev/null +++ b/internal/health.go @@ -0,0 +1,27 @@ +package app + +import ( + "github.com/gofiber/fiber/v2" +) + +// handleLive returns 200 if the HTTP server is accepting requests. +// If the handler runs, the process is alive. +func (s *Server) handleLive(c *fiber.Ctx) error { + return c.JSON(fiber.Map{"status": "ok"}) +} + +// handleReady returns 200 once startup initialization is complete: +// self-attestation, endorsement validation, and initial CRL fetch +// (when certificate revocation is configured). Returns 503 while the +// server is still initializing. +// +// Readiness is a one-way transition. No runtime condition (certificate +// hot-reload failure, CRL refresh failure) flips it back to not-ready +// because all background processes use fail-safe semantics: stale +// certificates remain in use and CRL checking is fail-open. +func (s *Server) handleReady(c *fiber.Ctx) error { + if !s.ready.Load() { + return c.Status(fiber.StatusServiceUnavailable).JSON(fiber.Map{"status": "not ready"}) + } + return c.JSON(fiber.Map{"status": "ok"}) +} diff --git a/internal/health_test.go b/internal/health_test.go new file mode 100644 index 0000000..6be78c4 --- /dev/null +++ b/internal/health_test.go @@ -0,0 +1,78 @@ +package app + +import ( + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/gofiber/fiber/v2" +) + +func TestHandleLive_Returns200(t *testing.T) { + s := newTestServer() + app := fiber.New() + app.Get("/healthz/live", s.handleLive) + + req := httptest.NewRequest(http.MethodGet, "/healthz/live", nil) + resp, err := app.Test(req, -1) + if err != nil { + t.Fatalf("app.Test error: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusOK) + } + + body, _ := io.ReadAll(resp.Body) + if !contains(string(body), `"ok"`) { + t.Errorf("body = %s, want ok status", body) + } +} + +func TestHandleReady_NotReady(t *testing.T) { + s := newTestServer() + // ready is false by default (zero value of atomic.Bool) + app := fiber.New() + app.Get("/healthz/ready", s.handleReady) + + req := httptest.NewRequest(http.MethodGet, "/healthz/ready", nil) + resp, err := app.Test(req, -1) + if err != nil { + t.Fatalf("app.Test error: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusServiceUnavailable { + t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusServiceUnavailable) + } + + body, _ := io.ReadAll(resp.Body) + if !contains(string(body), `"not ready"`) { + t.Errorf("body = %s, want not ready status", body) + } +} + +func TestHandleReady_Ready(t *testing.T) { + s := newTestServer() + s.ready.Store(true) + app := fiber.New() + app.Get("/healthz/ready", s.handleReady) + + req := httptest.NewRequest(http.MethodGet, "/healthz/ready", nil) + resp, err := app.Test(req, -1) + if err != nil { + t.Fatalf("app.Test error: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + t.Errorf("status = %d, want %d", resp.StatusCode, http.StatusOK) + } + + body, _ := io.ReadAll(resp.Body) + if !contains(string(body), `"ok"`) { + t.Errorf("body = %s, want ok status", body) + } +} diff --git a/internal/server.go b/internal/server.go index 470e76f..31cc8b4 100644 --- a/internal/server.go +++ b/internal/server.go @@ -13,6 +13,7 @@ import ( "os" "path/filepath" "strings" + "sync/atomic" "time" "github.com/goccy/go-json" @@ -51,6 +52,7 @@ type Server struct { crlCache *crlCache tdxGetter *cachedHTTPSGetter dnssecResolver *dnssec.Resolver + ready atomic.Bool } // NewServer constructs a Server with middleware and routes configured. @@ -411,6 +413,8 @@ func (s *Server) Run(ctx context.Context) error { } if s.crlCache != nil { go s.runCRLRefresh(ctx) + } else { + s.ready.Store(true) } addr := fmt.Sprintf("%s:%d", s.cfg.BindHost, s.cfg.BindPort) @@ -516,6 +520,9 @@ func (s *Server) accessLog() fiber.Handler { // to /api/v1/attestation because attestation involves blocking TEE // hardware operations. Future lightweight endpoints should not inherit it. func (s *Server) setupRoutes() { + s.app.Get("/healthz/live", s.handleLive) + s.app.Get("/healthz/ready", s.handleReady) + handlers := []fiber.Handler{} if s.rateLimitHandler != nil { handlers = append(handlers, s.rateLimitHandler)