diff --git a/CLAUDE.md b/CLAUDE.md index c460e47..0902228 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -66,6 +66,8 @@ All TOML settings map to env vars prefixed with `ATTESTATION_SERVER_`, with `.` List-typed variables (`REPORT_USER_DATA_ENV`, `DEPENDENCIES_ENDPOINTS`, `ENDORSEMENTS_ALLOWED_DOMAINS`) support comma-separated values with trimmed spaces: `VAR=a,b,c`. +**When adding a new config setting**, three places must be updated in addition to `internal/config.go`: add a `viper.SetDefault` and a `viper.BindEnv` call in `cmd/root.go`, and add the default value in `config/config.toml`. Without the explicit `BindEnv` call the environment variable will be silently ignored. + ## Logging conventions - Use `log/slog` throughout; never `fmt.Print*` or `log.*`. @@ -156,9 +158,13 @@ When cosign is enabled, endorsement URLs are required (own and dependencies). `/healthz/live` returns 200 once the HTTP listener is up. `/healthz/ready` returns 200 after `NewServer` (self-attestation, endorsement validation) and the initial CRL fetch (if configured) complete; 503 before that. Readiness is a one-way transition — no runtime condition (cert reload failure, CRL refresh failure) flips it back because all background processes use fail-safe/fail-open semantics. Health routes are not rate-limited. +### Endorsement skip validation + +`endorsements.skip_validation` (default `false`) makes endorsement *retrieval* failures non-fatal — if endorsement documents cannot be fetched, errors are logged as warnings and attestation proceeds without endorsement verification. If endorsements are successfully retrieved, measurement comparison is always performed and mismatches always fail regardless of this flag. Intended for disaster recovery when endorsement-serving infrastructure is unavailable. Logs a startup warning that security is weakened. Network fetch errors in `fetchEndorsementDocumentsWithClient` and `fetchCosignSignatures` are wrapped as `*errEndorsementRetrieval`; verification/parsing errors (byte-for-byte mismatch, JSON parse, cosign bundle verification) are not. `validateOwnEndorsements` and `validateDependencyEndorsements` use `errors.As` to skip only retrieval errors. + ### Startup self-attestation -`NewServer` calls `Attest` with random nonce on each TEE device. Parsed results captured in `parsedSelfAttestation` for endorsement validation. Exits on failure. +`NewServer` calls `Attest` with random nonce on each TEE device. Parsed results captured in `parsedSelfAttestation` for endorsement validation. Device open and `Attest` failures always exit the server. Only the subsequent endorsement retrieval/validation step can be bypassed when `endorsements.skip_validation` is enabled. ### Endorsement document format diff --git a/cmd/root.go b/cmd/root.go index 257fc7b..dc55141 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -71,6 +71,7 @@ func initConfig() { viper.SetDefault("ratelimit.burst", 1) viper.SetDefault("ratelimit.stall_timeout", "10s") viper.SetDefault("dependencies.endpoints", []string{}) + viper.SetDefault("endorsements.skip_validation", false) viper.SetDefault("endorsements.dnssec", false) viper.SetDefault("endorsements.allowed_domains", []string{}) viper.SetDefault("endorsements.client.timeout", "10s") @@ -113,6 +114,7 @@ func initConfig() { _ = viper.BindEnv("ratelimit.burst", "ATTESTATION_SERVER_RATELIMIT_BURST") _ = viper.BindEnv("ratelimit.stall_timeout", "ATTESTATION_SERVER_RATELIMIT_STALL_TIMEOUT") _ = viper.BindEnv("dependencies.endpoints", "ATTESTATION_SERVER_DEPENDENCIES_ENDPOINTS") + _ = viper.BindEnv("endorsements.skip_validation", "ATTESTATION_SERVER_ENDORSEMENTS_SKIP_VALIDATION") _ = viper.BindEnv("endorsements.dnssec", "ATTESTATION_SERVER_ENDORSEMENTS_DNSSEC") _ = viper.BindEnv("endorsements.allowed_domains", "ATTESTATION_SERVER_ENDORSEMENTS_ALLOWED_DOMAINS") _ = viper.BindEnv("endorsements.client.timeout", "ATTESTATION_SERVER_ENDORSEMENTS_CLIENT_TIMEOUT") diff --git a/config/config.toml b/config/config.toml index f567d45..8074203 100644 --- a/config/config.toml +++ b/config/config.toml @@ -40,6 +40,7 @@ stall_timeout = "10s" # max wait time before 429 enforce = false [endorsements] +skip_validation = false # skip endorsement validation; for debugging or disaster recovery when endorsement infrastructure is unavailable dnssec = false # require DNSSEC validation for endorsement URL hosts allowed_domains = [] # empty = unrestricted; non-empty = exact hostname match only diff --git a/docs/architecture.md b/docs/architecture.md index 2d22a0c..20f8c60 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -181,6 +181,8 @@ At startup, the server fetches endorsement documents from all configured URLs, v Per-request, endorsements are re-validated from cache (ristretto, TTL from Cache-Control headers, capped at 24h). On cache miss, documents are re-fetched and revalidated. If revalidation fails, the handler returns 500 but the server stays up and self-heals when endorsements become available. +When `endorsements.skip_validation` is enabled (default `false`), endorsement *retrieval* failures are demoted to warnings — the server starts and serves attestation responses without endorsed measurement verification. If endorsements are successfully retrieved, measurement comparison is always enforced regardless of this flag. This is a disaster recovery mechanism for when the endorsement infrastructure is completely unavailable; it weakens security guarantees and should be disabled as soon as endorsement service is restored. + ### Endorsement document format ```jsonc diff --git a/docs/security.md b/docs/security.md index 3681682..ec92672 100644 --- a/docs/security.md +++ b/docs/security.md @@ -108,6 +108,14 @@ Before collecting evidence for each request, the handler calls `validateOwnEndor - **Cache miss** (TTL expired) — documents are re-fetched and revalidated - **Failure** — handler returns 500, but the server stays up and self-heals when endorsements become available +### Skip validation mode + +When `endorsements.skip_validation` is enabled (default `false`), endorsement *retrieval* failures are logged as warnings instead of causing errors. This is intended for disaster recovery when the endorsement-serving infrastructure is completely unavailable but service operations must be restored. The server logs a startup warning that security is weakened. + +**Only retrieval failures are skipped.** If endorsement documents are successfully fetched, measurement comparison is always performed — a mismatch between the endorsed golden values and the actual TEE evidence is a hard error regardless of this flag. This ensures that a TEE running modified code cannot pass attestation when endorsements are available. + +Network fetch errors from `fetchEndorsementDocumentsWithClient` and `fetchCosignSignatures` are wrapped as `*errEndorsementRetrieval`. Verification and parsing errors — byte-for-byte mismatch across providers, endorsement JSON parse failure, cosign bundle verification failure — are not wrapped. `validateOwnEndorsements` and `validateDependencyEndorsements` use `errors.As` to detect `*errEndorsementRetrieval` and only skip those when the flag is enabled. Cosign OID validation and measurement comparison errors are never skipped (they occur after `resolveEndorsements` returns successfully). + ### Endorsement domain allowlist When `endorsements.allowed_domains` is configured (non-empty), endorsement document URLs are checked against the allowlist before fetching. Matching is exact hostname (case-insensitive) — subdomain matching is not supported; each host must be listed explicitly. The check applies to both own endorsement URLs and dependency endorsement URLs. diff --git a/internal/config.go b/internal/config.go index 1be672e..991471e 100644 --- a/internal/config.go +++ b/internal/config.go @@ -46,6 +46,7 @@ type Config struct { EndorsementDNSSEC bool EndorsementAllowedDomains []string EndorsementClientTimeout time.Duration + EndorsementSkipValidation bool HTTPAllowProxy bool HTTPCacheSize int64 HTTPCacheDefaultTTL time.Duration @@ -183,6 +184,7 @@ func LoadConfig() (*Config, error) { EndorsementDNSSEC: viper.GetBool("endorsements.dnssec"), EndorsementAllowedDomains: endorsementDomains, EndorsementClientTimeout: endorsementTimeout, + EndorsementSkipValidation: viper.GetBool("endorsements.skip_validation"), HTTPAllowProxy: viper.GetBool("http.allow_proxy"), HTTPCacheSize: httpCacheSize, HTTPCacheDefaultTTL: httpCacheDefaultTTL, diff --git a/internal/endorsements.go b/internal/endorsements.go index acac702..9d56be8 100644 --- a/internal/endorsements.go +++ b/internal/endorsements.go @@ -5,6 +5,7 @@ import ( "context" "crypto/sha256" "encoding/hex" + "errors" "fmt" "net/http" "net/url" @@ -19,6 +20,16 @@ import ( "github.com/eternisai/attestation-server/pkg/nitro" ) +// errEndorsementRetrieval wraps errors from fetching endorsement documents +// or cosign signatures over the network. Verification/parsing errors on +// successfully retrieved content are NOT wrapped with this type, so callers +// can use errors.As to distinguish infrastructure outages (skippable under +// endorsements.skip_validation) from content integrity failures (never skipped). +type errEndorsementRetrieval struct{ err error } + +func (e *errEndorsementRetrieval) Error() string { return e.err.Error() } +func (e *errEndorsementRetrieval) Unwrap() error { return e.err } + // fetchEndorsementDocumentsWithClient fetches endorsement documents from all // URLs in parallel with retry, verifies byte-for-byte identity, parses the // document, and returns it alongside the raw bytes (needed for cosign @@ -74,7 +85,7 @@ func (s *Server) fetchEndorsementDocumentsWithClient(ctx context.Context, urls [ } if err := g.Wait(); err != nil { - return nil, nil, 0, 0, err + return nil, nil, 0, 0, &errEndorsementRetrieval{err: err} } // Verify byte-for-byte identity across all responses @@ -163,7 +174,7 @@ func (s *Server) resolveEndorsementsWithClient(ctx context.Context, urls []*url. if s.cfg.CosignVerify && s.sigstoreVerifier != nil { bundleBytes, sigRawSize, sigTTL, fetchErr := s.fetchCosignSignatures(ctx, urls, client) if fetchErr != nil { - return nil, nil, fmt.Errorf("cosign signature fetch: %w", fetchErr) + return nil, nil, &errEndorsementRetrieval{err: fmt.Errorf("cosign signature fetch: %w", fetchErr)} } cr, err = s.verifyCosignBundle(bundleBytes, rawBody) @@ -197,9 +208,16 @@ func (s *Server) validateOwnEndorsements(ctx context.Context) error { doc, cr, err := s.resolveEndorsements(ctx, s.endorsements) if err != nil { + var retrieval *errEndorsementRetrieval + if s.cfg.EndorsementSkipValidation && errors.As(err, &retrieval) { + s.logger.Warn("endorsement retrieval failed, skipping validation because skip_validation is enabled", "error", err) + return nil + } return err } + // Endorsements were successfully retrieved — measurement comparison + // errors are never skipped, even with skip_validation enabled. if cr != nil { if err := s.validateCosignOIDs(cr, s.buildInfo); err != nil { return fmt.Errorf("cosign: %w", err) @@ -290,6 +308,11 @@ func (s *Server) validateDependencyEndorsements(ctx context.Context, report *Att edp, cr, err := s.resolveEndorsements(ctx, urls) if err != nil { + var retrieval *errEndorsementRetrieval + if s.cfg.EndorsementSkipValidation && errors.As(err, &retrieval) { + s.logger.Warn("dependency endorsement retrieval failed, skipping validation because skip_validation is enabled", "error", err) + return nil + } return err } doc := *edp diff --git a/internal/endorsements_test.go b/internal/endorsements_test.go index a9dbd31..e68cb91 100644 --- a/internal/endorsements_test.go +++ b/internal/endorsements_test.go @@ -3,6 +3,7 @@ package app import ( "bytes" "context" + "errors" "io" "log/slog" "net/http" @@ -814,6 +815,109 @@ func TestValidateOwnEndorsements_CacheHitMismatch(t *testing.T) { } } +func TestValidateOwnEndorsements_SkipValidation_RetrievalFailure(t *testing.T) { + cache, err := newFetcherCache(100 << 20) + if err != nil { + t.Fatal(err) + } + + u, _ := url.Parse("https://unreachable.example.com/e.json") + s := &Server{ + cfg: &Config{ + ReportEvidence: EvidenceConfig{SEVSNP: true}, + EndorsementClientTimeout: time.Second, + EndorsementSkipValidation: true, + }, + logger: slog.New(slog.NewJSONHandler(io.Discard, nil)), + endorsements: []*url.URL{u}, + httpCache: cache, + selfAttestation: &parsedSelfAttestation{ + sevSNPReport: &spb.Report{Measurement: bytes.Repeat([]byte{0xdd}, 48)}, + }, + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + if err := s.validateOwnEndorsements(ctx); err != nil { + t.Fatalf("expected nil error with skip_validation on retrieval failure, got: %v", err) + } +} + +func TestValidateOwnEndorsements_SkipValidation_MismatchStillFails(t *testing.T) { + cache, err := newFetcherCache(100 << 20) + if err != nil { + t.Fatal(err) + } + + hex := strings.Repeat("00", 48) // wrong measurement + doc := &EndorsementDocument{SEVSNP: &hex} + cache.setGroup([]string{"https://example.com/e.json"}, doc, 100, time.Minute) + + u, _ := url.Parse("https://example.com/e.json") + s := &Server{ + cfg: &Config{ + ReportEvidence: EvidenceConfig{SEVSNP: true}, + EndorsementClientTimeout: 5 * time.Second, + EndorsementSkipValidation: true, + }, + logger: slog.New(slog.NewJSONHandler(io.Discard, nil)), + endorsements: []*url.URL{u}, + httpCache: cache, + selfAttestation: &parsedSelfAttestation{ + sevSNPReport: &spb.Report{Measurement: bytes.Repeat([]byte{0xdd}, 48)}, + }, + } + + err = s.validateOwnEndorsements(context.Background()) + if err == nil { + t.Fatal("expected error: measurement mismatch must not be skipped even with skip_validation") + } + if !contains(err.Error(), "mismatch") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestValidateOwnEndorsements_SkipValidation_PostFetchVerificationFailureNotSkipped(t *testing.T) { + // Serve invalid JSON — the HTTP fetch succeeds but parsing fails. + // This is a verification/parsing error, not a retrieval error, so + // skip_validation must NOT suppress it. + srv := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(`not valid json`)) + })) + defer srv.Close() + + cache, err := newFetcherCache(100 << 20) + if err != nil { + t.Fatal(err) + } + + u, _ := url.Parse(srv.URL + "/e.json") + s := &Server{ + cfg: &Config{ + EndorsementClientTimeout: 5 * time.Second, + EndorsementSkipValidation: true, + }, + logger: slog.New(slog.NewJSONHandler(io.Discard, nil)), + httpCache: cache, + } + + // Use resolveEndorsementsWithClient directly with the test server's TLS + // client (validateOwnEndorsements creates its own client that won't trust + // the httptest certificate). The error classification is the same either way. + _, _, resolveErr := s.resolveEndorsementsWithClient(context.Background(), []*url.URL{u}, srv.Client()) + if resolveErr == nil { + t.Fatal("expected error for invalid JSON") + } + var retrieval *errEndorsementRetrieval + if errors.As(resolveErr, &retrieval) { + t.Fatalf("post-fetch parse error must not be classified as retrieval: %v", resolveErr) + } + if !contains(resolveErr.Error(), "parsing") { + t.Errorf("expected parsing error, got: %v", resolveErr) + } +} + // --- resolveEndorsements --- func TestResolveEndorsements_CacheHit(t *testing.T) { @@ -1494,6 +1598,83 @@ func TestValidateDependencyEndorsements_TPMNoEndorsement(t *testing.T) { } } +// --- skip_validation: dependency endorsement tests --- + +func TestValidateDependencyEndorsements_SkipValidation_RetrievalFailure(t *testing.T) { + cache, err := newFetcherCache(100 << 20) + if err != nil { + t.Fatal(err) + } + + reportData := &AttestationReportData{ + Endorsements: []string{"https://unreachable.example.com/e.json"}, + } + dataJSON, _ := json.Marshal(reportData) + report := &AttestationReport{ + Evidence: []*AttestationEvidence{{Kind: "sevsnp", Blob: []byte("fake")}}, + Data: json.RawMessage(dataJSON), + } + + s := &Server{ + cfg: &Config{ + EndorsementClientTimeout: time.Second, + EndorsementSkipValidation: true, + }, + logger: slog.New(slog.NewJSONHandler(io.Discard, nil)), + httpCache: cache, + } + parsed := &parsedDependencyEvidence{ + sevSNPReport: &spb.Report{Measurement: bytes.Repeat([]byte{0xdd}, 48)}, + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + if err := s.validateDependencyEndorsements(ctx, report, parsed); err != nil { + t.Fatalf("expected nil error with skip_validation on retrieval failure, got: %v", err) + } +} + +func TestValidateDependencyEndorsements_SkipValidation_MismatchStillFails(t *testing.T) { + cache, err := newFetcherCache(100 << 20) + if err != nil { + t.Fatal(err) + } + + hex := strings.Repeat("00", 48) + doc := &EndorsementDocument{SEVSNP: &hex} + cache.setGroup([]string{"https://dep.example.com/e.json"}, doc, 100, time.Minute) + + reportData := &AttestationReportData{ + Endorsements: []string{"https://dep.example.com/e.json"}, + } + dataJSON, _ := json.Marshal(reportData) + report := &AttestationReport{ + Evidence: []*AttestationEvidence{{Kind: "sevsnp", Blob: []byte("fake")}}, + Data: json.RawMessage(dataJSON), + } + + s := &Server{ + cfg: &Config{ + EndorsementClientTimeout: 5 * time.Second, + EndorsementSkipValidation: true, + }, + logger: slog.New(slog.NewJSONHandler(io.Discard, nil)), + httpCache: cache, + } + parsed := &parsedDependencyEvidence{ + sevSNPReport: &spb.Report{Measurement: bytes.Repeat([]byte{0xdd}, 48)}, + } + + err = s.validateDependencyEndorsements(context.Background(), report, parsed) + if err == nil { + t.Fatal("expected error: measurement mismatch must not be skipped even with skip_validation") + } + if !contains(err.Error(), "mismatch") { + t.Errorf("unexpected error: %v", err) + } +} + // --- Edge case tests for measurement comparison --- func TestValidateSEVSNPMeasurement_Empty(t *testing.T) { diff --git a/internal/server.go b/internal/server.go index 31cc8b4..22d5702 100644 --- a/internal/server.go +++ b/internal/server.go @@ -106,6 +106,10 @@ func NewServer(cfg *Config, logger *slog.Logger) (*Server, error) { } logger.Debug("loaded endorsements", "count", len(endorsements), "urls", strings.Join(endorsementStrs, ",")) + if cfg.EndorsementSkipValidation { + logger.Warn("endorsement validation is disabled, attestation will proceed without endorsement verification — security is weakened") + } + if len(endorsements) > 0 { if !cfg.CosignVerify { logger.Warn("cosign verification is disabled, endorsement documents are not cryptographically authenticated")