Skip to content

Commit eeebd1f

Browse files
committed
In-memory rate limits for free mode
1 parent c120535 commit eeebd1f

File tree

3 files changed

+459
-7
lines changed

3 files changed

+459
-7
lines changed
Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
import { afterEach, beforeEach, describe, expect, it, spyOn } from 'bun:test'
2+
3+
import {
4+
checkFreeModeRateLimit,
5+
FREE_MODE_RATE_LIMITS,
6+
resetFreeModeRateLimits,
7+
} from '../free-mode-rate-limiter'
8+
9+
const MINUTE_MS = 60 * 1000
10+
const HOUR_MS = 60 * MINUTE_MS
11+
12+
describe('free-mode-rate-limiter', () => {
13+
let nowSpy: ReturnType<typeof spyOn>
14+
let fakeNow: number
15+
16+
beforeEach(() => {
17+
resetFreeModeRateLimits()
18+
fakeNow = 1_000_000_000_000
19+
nowSpy = spyOn(Date, 'now').mockImplementation(() => fakeNow)
20+
})
21+
22+
afterEach(() => {
23+
nowSpy.mockRestore()
24+
})
25+
26+
function advanceTime(ms: number) {
27+
fakeNow += ms
28+
}
29+
30+
function makeRequests(userId: string, count: number) {
31+
for (let i = 0; i < count; i++) {
32+
const result = checkFreeModeRateLimit(userId)
33+
if (result.limited) {
34+
throw new Error(`Unexpectedly rate limited on request ${i + 1}`)
35+
}
36+
}
37+
}
38+
39+
describe('checkFreeModeRateLimit', () => {
40+
it('allows the first request', () => {
41+
const result = checkFreeModeRateLimit('user-1')
42+
expect(result.limited).toBe(false)
43+
})
44+
45+
it('allows requests up to the per-minute limit', () => {
46+
for (let i = 0; i < FREE_MODE_RATE_LIMITS.PER_MINUTE; i++) {
47+
const result = checkFreeModeRateLimit('user-1')
48+
expect(result.limited).toBe(false)
49+
}
50+
})
51+
52+
it('limits when per-minute limit is exceeded', () => {
53+
makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
54+
55+
const result = checkFreeModeRateLimit('user-1')
56+
expect(result.limited).toBe(true)
57+
if (result.limited) {
58+
expect(result.windowName).toBe('1 minute')
59+
}
60+
})
61+
62+
it('limits when per-30-minute limit is exceeded', () => {
63+
const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE
64+
const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES
65+
66+
// Spread requests across multiple 1-minute windows to avoid hitting the per-minute limit
67+
let sent = 0
68+
while (sent < per30Min) {
69+
const batch = Math.min(perMinute, per30Min - sent)
70+
makeRequests('user-1', batch)
71+
sent += batch
72+
if (sent < per30Min) {
73+
// Advance past the 1-minute window so it resets
74+
advanceTime(1 * MINUTE_MS + 1)
75+
}
76+
}
77+
78+
const result = checkFreeModeRateLimit('user-1')
79+
expect(result.limited).toBe(true)
80+
if (result.limited) {
81+
expect(result.windowName).toBe('30 minutes')
82+
}
83+
})
84+
85+
it('limits when per-5-hour limit is exceeded', () => {
86+
const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE
87+
const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES
88+
const per5Hours = FREE_MODE_RATE_LIMITS.PER_5_HOURS
89+
90+
// Spread requests across multiple 30-minute windows
91+
let sent = 0
92+
while (sent < per5Hours) {
93+
const batchFor30Min = Math.min(per30Min, per5Hours - sent)
94+
// Within each 30-min window, spread across 1-min windows
95+
let sentInWindow = 0
96+
while (sentInWindow < batchFor30Min) {
97+
const batch = Math.min(perMinute, batchFor30Min - sentInWindow)
98+
makeRequests('user-1', batch)
99+
sentInWindow += batch
100+
if (sentInWindow < batchFor30Min) {
101+
advanceTime(1 * MINUTE_MS + 1)
102+
}
103+
}
104+
sent += sentInWindow
105+
// Always advance past 30-min window to reset it for the next batch
106+
// (stays well within the 5-hour window)
107+
advanceTime(30 * MINUTE_MS + 1)
108+
}
109+
110+
const result = checkFreeModeRateLimit('user-1')
111+
expect(result.limited).toBe(true)
112+
if (result.limited) {
113+
expect(result.windowName).toBe('5 hours')
114+
}
115+
})
116+
117+
it('limits when per-7-day limit is exceeded', () => {
118+
const perMinute = FREE_MODE_RATE_LIMITS.PER_MINUTE
119+
const per30Min = FREE_MODE_RATE_LIMITS.PER_30_MINUTES
120+
const per5Hours = FREE_MODE_RATE_LIMITS.PER_5_HOURS
121+
const per7Days = FREE_MODE_RATE_LIMITS.PER_7_DAYS
122+
123+
// Spread requests across multiple 5-hour windows
124+
let sent = 0
125+
while (sent < per7Days) {
126+
const batchFor5Hours = Math.min(per5Hours, per7Days - sent)
127+
let sentIn5Hr = 0
128+
while (sentIn5Hr < batchFor5Hours) {
129+
const batchFor30Min = Math.min(per30Min, batchFor5Hours - sentIn5Hr)
130+
let sentIn30Min = 0
131+
while (sentIn30Min < batchFor30Min) {
132+
const batch = Math.min(perMinute, batchFor30Min - sentIn30Min)
133+
makeRequests('user-1', batch)
134+
sentIn30Min += batch
135+
if (sentIn30Min < batchFor30Min) {
136+
advanceTime(1 * MINUTE_MS + 1)
137+
}
138+
}
139+
sentIn5Hr += sentIn30Min
140+
advanceTime(30 * MINUTE_MS + 1)
141+
}
142+
sent += sentIn5Hr
143+
// Advance past the 5-hour window (stays within 7-day window)
144+
advanceTime(5 * HOUR_MS + 1)
145+
}
146+
147+
const result = checkFreeModeRateLimit('user-1')
148+
expect(result.limited).toBe(true)
149+
if (result.limited) {
150+
expect(result.windowName).toBe('7 days')
151+
}
152+
})
153+
154+
it('does not increment counters when rate limited', () => {
155+
makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
156+
157+
// These should all be rejected without changing state
158+
for (let i = 0; i < 5; i++) {
159+
const result = checkFreeModeRateLimit('user-1')
160+
expect(result.limited).toBe(true)
161+
}
162+
163+
// After the 1-minute window expires, the user should only have used PER_MINUTE requests
164+
// against the 30-minute window, not PER_MINUTE + 5
165+
advanceTime(1 * MINUTE_MS + 1)
166+
167+
// Should be allowed again (1-min window reset)
168+
const result = checkFreeModeRateLimit('user-1')
169+
expect(result.limited).toBe(false)
170+
})
171+
172+
it('returns correct retryAfterMs for the violated window', () => {
173+
makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
174+
175+
// Advance 30 seconds into the 1-minute window
176+
advanceTime(30_000)
177+
178+
const result = checkFreeModeRateLimit('user-1')
179+
expect(result.limited).toBe(true)
180+
if (result.limited) {
181+
// Should be approximately 30 seconds remaining in the 1-minute window
182+
expect(result.retryAfterMs).toBe(1 * MINUTE_MS - 30_000)
183+
}
184+
})
185+
186+
it('resets per-minute window after expiry', () => {
187+
makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
188+
189+
const limited = checkFreeModeRateLimit('user-1')
190+
expect(limited.limited).toBe(true)
191+
192+
// Advance past the 1-minute window
193+
advanceTime(1 * MINUTE_MS + 1)
194+
195+
const result = checkFreeModeRateLimit('user-1')
196+
expect(result.limited).toBe(false)
197+
})
198+
199+
it('isolates different users', () => {
200+
makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
201+
202+
// user-1 is rate limited
203+
expect(checkFreeModeRateLimit('user-1').limited).toBe(true)
204+
205+
// user-2 should not be affected
206+
const result = checkFreeModeRateLimit('user-2')
207+
expect(result.limited).toBe(false)
208+
})
209+
210+
it('retryAfterMs is never negative', () => {
211+
makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
212+
213+
// Advance to just before expiry
214+
advanceTime(1 * MINUTE_MS - 1)
215+
216+
const result = checkFreeModeRateLimit('user-1')
217+
expect(result.limited).toBe(true)
218+
if (result.limited) {
219+
expect(result.retryAfterMs).toBeGreaterThanOrEqual(0)
220+
}
221+
})
222+
223+
it('tracks counts across all windows simultaneously', () => {
224+
// Make some requests
225+
makeRequests('user-1', 5)
226+
227+
// Advance past 1-minute window but within 30-minute window
228+
advanceTime(1 * MINUTE_MS + 1)
229+
230+
// Make more requests — 1-min counter resets, but 30-min counter keeps accumulating
231+
makeRequests('user-1', 5)
232+
233+
// Advance past 1-minute again
234+
advanceTime(1 * MINUTE_MS + 1)
235+
236+
// The 30-min window should now have 10 requests counted
237+
// and the 1-min window should be fresh
238+
const result = checkFreeModeRateLimit('user-1')
239+
expect(result.limited).toBe(false)
240+
})
241+
})
242+
243+
describe('resetFreeModeRateLimits', () => {
244+
it('clears all rate limit state', () => {
245+
makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
246+
expect(checkFreeModeRateLimit('user-1').limited).toBe(true)
247+
248+
resetFreeModeRateLimits()
249+
250+
const result = checkFreeModeRateLimit('user-1')
251+
expect(result.limited).toBe(false)
252+
})
253+
254+
it('clears state for all users', () => {
255+
makeRequests('user-1', FREE_MODE_RATE_LIMITS.PER_MINUTE)
256+
makeRequests('user-2', FREE_MODE_RATE_LIMITS.PER_MINUTE)
257+
258+
resetFreeModeRateLimits()
259+
260+
expect(checkFreeModeRateLimit('user-1').limited).toBe(false)
261+
expect(checkFreeModeRateLimit('user-2').limited).toBe(false)
262+
})
263+
})
264+
})

web/src/app/api/v1/chat/completions/_post.ts

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ import {
6565
OpenRouterError,
6666
} from '@/llm-api/openrouter'
6767
import { extractApiKeyFromHeader } from '@/util/auth'
68+
import { checkFreeModeRateLimit } from './free-mode-rate-limiter'
6869

6970
const FREE_MODE_ALLOWED_COUNTRIES = new Set([
7071
'US', 'CA',
@@ -86,11 +87,6 @@ function getCountryCode(req: NextRequest): string | null {
8687
return cfCountry.toUpperCase()
8788
}
8889

89-
const vercelCountry = req.headers.get('x-vercel-ip-country')
90-
if (vercelCountry && vercelCountry !== 'XX') {
91-
return vercelCountry.toUpperCase()
92-
}
93-
9490
const clientIp = extractClientIp(req)
9591
if (!clientIp) {
9692
return null
@@ -263,10 +259,9 @@ export async function postChatCompletions(params: {
263259
const clientIp = extractClientIp(req)
264260

265261
const cfHeader = req.headers.get('cf-ipcountry')
266-
const vercelHeader = req.headers.get('x-vercel-ip-country')
267262
const geoipResult = clientIp ? geoip.lookup(clientIp)?.country ?? null : null
268263
logger.info(
269-
{ cfHeader, vercelHeader, geoipResult, resolvedCountry: countryCode, clientIp: clientIp ? '[redacted]' : undefined },
264+
{ cfHeader, geoipResult, resolvedCountry: countryCode, clientIp: clientIp ? '[redacted]' : undefined },
270265
'Free mode country detection',
271266
)
272267

@@ -292,6 +287,36 @@ export async function postChatCompletions(params: {
292287
{ status: 403 },
293288
)
294289
}
290+
291+
// Rate limit free mode requests
292+
const rateLimitResult = checkFreeModeRateLimit(userId)
293+
if (rateLimitResult.limited) {
294+
const retryAfterSeconds = Math.ceil(rateLimitResult.retryAfterMs / 1000)
295+
const resetTime = new Date(Date.now() + rateLimitResult.retryAfterMs).toISOString()
296+
const resetCountdown = formatQuotaResetCountdown(resetTime)
297+
298+
trackEvent({
299+
event: AnalyticsEvent.CHAT_COMPLETIONS_VALIDATION_ERROR,
300+
userId,
301+
properties: {
302+
error: 'free_mode_rate_limited',
303+
windowName: rateLimitResult.windowName,
304+
retryAfterSeconds,
305+
},
306+
logger,
307+
})
308+
309+
return NextResponse.json(
310+
{
311+
error: 'free_mode_rate_limited',
312+
message: `Free mode rate limit exceeded (${rateLimitResult.windowName} limit). Try again ${resetCountdown}.`,
313+
},
314+
{
315+
status: 429,
316+
headers: { 'Retry-After': String(retryAfterSeconds) },
317+
},
318+
)
319+
}
295320
}
296321

297322
// Extract and validate agent run ID

0 commit comments

Comments
 (0)