From 07c755800794cd89cc3b1907ccaf2729e6c08d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enzo=20NAUT=C3=89?= Date: Sat, 4 Apr 2026 03:13:01 +0200 Subject: [PATCH] fix: deduplicate message hashes across files, not just within each file parseClaudeFile() resets its seenKeys set per file. Claude Code logs subagent messages to both the parent session JSONL and the subagent's own JSONL under /subagents/. Since both files share the same messageId:requestId pairs, per-file dedup misses the cross-file duplicates and inflates the totals. On a real dataset (30 days, 538 files): - per-file dedup: 7.98B tokens (current behavior) - global dedup: 2.59B tokens (correct, matches ccusage) - ratio: 3.08x overcounting Fix: pass the seenKeys set through ClaudeScanState so it accumulates across all files in a single scan pass. The new existingSeenKeys parameter defaults to [] so existing call sites and tests are unaffected. --- .../CostUsage/CostUsageScanner+Claude.swift | 22 ++++++++++++++----- .../Vendored/CostUsage/CostUsageScanner.swift | 1 + 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/Sources/CodexBarCore/Vendored/CostUsage/CostUsageScanner+Claude.swift b/Sources/CodexBarCore/Vendored/CostUsage/CostUsageScanner+Claude.swift index 3c747bb1d..4c8256521 100644 --- a/Sources/CodexBarCore/Vendored/CostUsage/CostUsageScanner+Claude.swift +++ b/Sources/CodexBarCore/Vendored/CostUsage/CostUsageScanner+Claude.swift @@ -35,12 +35,15 @@ extension CostUsageScanner { fileURL: URL, range: CostUsageDayRange, providerFilter: ClaudeLogProviderFilter, - startOffset: Int64 = 0) -> ClaudeParseResult + startOffset: Int64 = 0, + existingSeenKeys: Set = []) -> ClaudeParseResult { var days: [String: [String: [Int]]] = [:] - // Track seen message+request IDs to deduplicate streaming chunks within a JSONL file. + // Track seen message+request IDs to deduplicate streaming chunks. // Claude emits multiple lines per message with cumulative usage, so we only count once. - var seenKeys: Set = [] + // Uses existingSeenKeys from prior files to also deduplicate across files + // (e.g. subagent logs that duplicate parent session entries). + var seenKeys = existingSeenKeys struct ClaudeTokens: Sendable { let input: Int @@ -133,7 +136,7 @@ extension CostUsageScanner { add(dayKey: dayKey, model: model, tokens: tokens) })) ?? startOffset - return ClaudeParseResult(days: days, parsedBytes: parsedBytes) + return ClaudeParseResult(days: days, parsedBytes: parsedBytes, seenKeys: seenKeys) } private static let vertexProviderKeys: Set = [ @@ -263,6 +266,9 @@ extension CostUsageScanner { var touched: Set let range: CostUsageDayRange let providerFilter: ClaudeLogProviderFilter + /// Shared across all files in a scan pass to deduplicate messages + /// that appear in both a parent session JSONL and its subagent files. + var globalSeenKeys: Set = [] init(cache: CostUsageCache, range: CostUsageDayRange, providerFilter: ClaudeLogProviderFilter) { self.cache = cache @@ -296,7 +302,9 @@ extension CostUsageScanner { fileURL: url, range: state.range, providerFilter: state.providerFilter, - startOffset: startOffset) + startOffset: startOffset, + existingSeenKeys: state.globalSeenKeys) + state.globalSeenKeys = delta.seenKeys if !delta.days.isEmpty { Self.applyFileDays(cache: &state.cache, fileDays: delta.days, sign: 1) } @@ -317,7 +325,9 @@ extension CostUsageScanner { let parsed = Self.parseClaudeFile( fileURL: url, range: state.range, - providerFilter: state.providerFilter) + providerFilter: state.providerFilter, + existingSeenKeys: state.globalSeenKeys) + state.globalSeenKeys = parsed.seenKeys let usage = Self.makeFileUsage( mtimeUnixMs: mtimeMs, size: size, diff --git a/Sources/CodexBarCore/Vendored/CostUsage/CostUsageScanner.swift b/Sources/CodexBarCore/Vendored/CostUsage/CostUsageScanner.swift index f4a3ba8bb..027eba96a 100644 --- a/Sources/CodexBarCore/Vendored/CostUsage/CostUsageScanner.swift +++ b/Sources/CodexBarCore/Vendored/CostUsage/CostUsageScanner.swift @@ -47,6 +47,7 @@ enum CostUsageScanner { struct ClaudeParseResult { let days: [String: [String: [Int]]] let parsedBytes: Int64 + let seenKeys: Set } static func loadDailyReport(