From 46ea0ce6b036fcb7a30e56c6c87e04430c0bdb51 Mon Sep 17 00:00:00 2001 From: sujal Date: Sat, 25 Oct 2025 17:37:45 +0530 Subject: [PATCH 1/6] feat: implement rate limiting to control external API requests --- packages/core/src/agent/agent.ts | 72 +++++ packages/core/src/agent/types.ts | 31 +- packages/core/src/index.ts | 17 ++ packages/core/src/rate-limit/errors.ts | 54 ++++ packages/core/src/rate-limit/index.ts | 25 ++ .../rate-limit/limiters/fixed-window.spec.ts | 232 ++++++++++++++ .../src/rate-limit/limiters/fixed-window.ts | 135 +++++++++ .../core/src/rate-limit/limiters/index.ts | 5 + packages/core/src/rate-limit/manager.spec.ts | 285 ++++++++++++++++++ packages/core/src/rate-limit/manager.ts | 185 ++++++++++++ packages/core/src/rate-limit/types.ts | 167 ++++++++++ 11 files changed, 1203 insertions(+), 5 deletions(-) create mode 100644 packages/core/src/rate-limit/errors.ts create mode 100644 packages/core/src/rate-limit/index.ts create mode 100644 packages/core/src/rate-limit/limiters/fixed-window.spec.ts create mode 100644 packages/core/src/rate-limit/limiters/fixed-window.ts create mode 100644 packages/core/src/rate-limit/limiters/index.ts create mode 100644 packages/core/src/rate-limit/manager.spec.ts create mode 100644 packages/core/src/rate-limit/manager.ts create mode 100644 packages/core/src/rate-limit/types.ts diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 52fc55671..c40dc91d9 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -37,6 +37,7 @@ import { ActionType, buildAgentLogMessage } from "../logger/message-builder"; import type { Memory, MemoryUpdateMode } from "../memory"; import { MemoryManager } from "../memory/manager/memory-manager"; import { type VoltAgentObservability, createVoltAgentObservability } from "../observability"; +import { RateLimitManager } from "../rate-limit/manager"; import { AgentRegistry } from "../registries/agent-registry"; import type { BaseRetriever } from "../retriever/retriever"; import type { Tool, Toolkit } from "../tool"; @@ -347,6 +348,7 @@ export class Agent { private defaultObservability?: VoltAgentObservability; private readonly toolManager: ToolManager; private readonly subAgentManager: SubAgentManager; + private readonly rateLimitManager?: RateLimitManager; private readonly voltOpsClient?: VoltOpsClient; private readonly prompts?: PromptHelper; private readonly evalConfig?: AgentEvalConfig; @@ -416,6 +418,17 @@ export class Agent { this.supervisorConfig, ); + // Initialize rate limit manager if configuration provided + if (options.rateLimits) { + this.rateLimitManager = new RateLimitManager(this.id, options.rateLimits, this.logger); + this.logger.debug("Rate limit manager initialized", { + event: LogEvents.AGENT_CREATED, + agentId: this.id, + hasLLMRateLimit: !!options.rateLimits.llm, + hasToolRateLimits: !!options.rateLimits.tools, + }); + } + // Initialize prompts helper with VoltOpsClient (agent's own or global) // Priority 1: Agent's own VoltOpsClient // Priority 2: Global VoltOpsClient from registry @@ -526,6 +539,24 @@ export class Agent { tools: tools ? Object.keys(tools) : [], }); + // Rate limit check before LLM call + if (this.rateLimitManager) { + // Extract provider from model if available + const provider = this.extractProviderFromModel(model); + const modelId = modelName; + + await this.rateLimitManager.checkLLMRateLimit({ + provider, + model: modelId, + }); + + methodLogger.debug("Rate limit check passed for LLM call", { + event: LogEvents.AGENT_GENERATION_STARTED, + provider, + model: modelId, + }); + } + // Extract VoltAgent-specific options const { userId, @@ -740,6 +771,24 @@ export class Agent { // Setup abort signal listener this.setupAbortSignalListener(oc); + // Rate limit check before LLM call + if (this.rateLimitManager) { + // Extract provider from model if available + const provider = this.extractProviderFromModel(model); + const modelId = modelName; + + await this.rateLimitManager.checkLLMRateLimit({ + provider, + model: modelId, + }); + + methodLogger.debug("Rate limit check passed for stream call", { + event: LogEvents.AGENT_STREAM_STARTED, + provider, + model: modelId, + }); + } + // Extract VoltAgent-specific options const { userId, @@ -1670,6 +1719,19 @@ export class Agent { }; } + /** + * Extract provider name from AI SDK model + * Returns the provider identifier for rate limiting purposes + */ + private extractProviderFromModel(model: LanguageModel): string { + // AI SDK models have a 'provider' property that identifies the provider + // e.g., "google.generative-ai", "openai", "anthropic" + if (typeof model === "object" && model !== null && "provider" in model) { + return String(model.provider); + } + return "unknown"; + } + /** * Common preparation for all execution methods */ @@ -2757,6 +2819,16 @@ export class Agent { // Call tool start hook - can throw ToolDeniedError await hooks.onToolStart?.({ agent: this, tool, context: oc, args }); + // Rate limit check before tool execution + if (this.rateLimitManager) { + await this.rateLimitManager.checkToolRateLimit(tool.name); + + oc.logger.debug("Rate limit check passed for tool execution", { + event: LogEvents.AGENT_STEP_TOOL_CALL, + toolName: tool.name, + }); + } + // Execute tool with OperationContext directly if (!tool.execute) { throw new Error(`Tool ${tool.name} does not have "execute" method`); diff --git a/packages/core/src/agent/types.ts b/packages/core/src/agent/types.ts index dd5fb29d2..4e77c1029 100644 --- a/packages/core/src/agent/types.ts +++ b/packages/core/src/agent/types.ts @@ -18,17 +18,15 @@ import type { Voice } from "../voice/types"; import type { VoltOpsClient } from "../voltops/client"; import type { Agent } from "./agent"; import type { CancellationError, VoltAgentError } from "./errors"; -import type { LLMProvider } from "./providers"; -import type { BaseTool } from "./providers"; -import type { StepWithContent } from "./providers"; +import type { BaseTool, LLMProvider, StepWithContent } from "./providers"; import type { UsageInfo } from "./providers/base/types"; -import type { SubAgentConfig } from "./subagent/types"; -import type { VoltAgentTextStreamPart } from "./subagent/types"; +import type { SubAgentConfig, VoltAgentTextStreamPart } from "./subagent/types"; import type { Logger } from "@voltagent/internal"; import type { LocalScorerDefinition, SamplingPolicy } from "../eval/runtime"; import type { MemoryOptions, MemoryStorageMetadata, WorkingMemorySummary } from "../memory/types"; import type { VoltAgentObservability } from "../observability"; +import type { AgentRateLimitConfig } from "../rate-limit/types"; import type { DynamicValue, DynamicValueOptions, @@ -476,6 +474,29 @@ export type AgentOptions = { // Live evaluation configuration eval?: AgentEvalConfig; + + // Rate limiting configuration + /** + * Rate limiting configuration for controlling request frequency + * Helps prevent exceeding API rate limits and manage costs + * + * @example + * ```typescript + * rateLimits: { + * llm: { + * maxRequestsPerMinute: 10, + * strategy: 'fixed_window', + * onExceeded: 'delay' + * }, + * tools: { + * 'search_tool': { + * maxRequestsPerMinute: 5 + * } + * } + * } + * ``` + */ + rateLimits?: AgentRateLimitConfig; }; export type AgentEvalOperationType = diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index bbfe37b9b..a2e8c70a0 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -116,6 +116,23 @@ export { ConversationNotFoundError, } from "./memory"; +// Rate Limiting exports +export { RateLimitExceededError } from "./rate-limit/errors"; +export { FixedWindowCounterLimiter } from "./rate-limit/limiters/fixed-window"; +export { RateLimitManager } from "./rate-limit/manager"; +export type { + AgentRateLimitConfig, + LLMRateLimitConfig, + RateLimitConfig, + RateLimitContext, + RateLimiter, + RateLimitExceededAction, + RateLimitScope, + RateLimitStats, + RateLimitStrategy, + ToolRateLimitConfig, +} from "./rate-limit/types"; + // Export adapters from subdirectories export { InMemoryStorageAdapter } from "./memory/adapters/storage/in-memory"; export { InMemoryVectorAdapter } from "./memory/adapters/vector/in-memory"; diff --git a/packages/core/src/rate-limit/errors.ts b/packages/core/src/rate-limit/errors.ts new file mode 100644 index 000000000..a4ffcae12 --- /dev/null +++ b/packages/core/src/rate-limit/errors.ts @@ -0,0 +1,54 @@ +/** + * Custom errors for rate limiting + */ + +import type { RateLimitStats } from "./types"; + +/** + * Error thrown when rate limit is exceeded and onExceeded='throw' + */ +export class RateLimitExceededError extends Error { + /** Current rate limit statistics */ + public readonly stats: RateLimitStats; + /** When the rate limit will reset */ + public readonly resetAt: Date; + /** Scope that was rate limited */ + public readonly scope: string; + + /** + * Milliseconds until reset (dynamically calculated) + * Always returns fresh value based on current time + */ + public get retryAfter(): number { + return Math.max(0, this.resetAt.getTime() - Date.now()); + } + + constructor(params: { stats: RateLimitStats; scope: string; message?: string }) { + const defaultMessage = `Rate limit exceeded for ${params.scope}. Limit: ${params.stats.limit} requests. Resets at ${params.stats.resetAt.toISOString()}`; + super(params.message || defaultMessage); + + this.name = "RateLimitExceededError"; + this.stats = params.stats; + this.resetAt = params.stats.resetAt; + this.scope = params.scope; + + // Maintains proper stack trace for where error was thrown (V8 only) + if (Error.captureStackTrace) { + Error.captureStackTrace(this, RateLimitExceededError); + } + } + + /** + * Get a user-friendly error message + */ + toJSON() { + return { + name: this.name, + message: this.message, + scope: this.scope, + retryAfter: this.retryAfter, + resetAt: this.resetAt.toISOString(), + stats: this.stats, + }; + } +} diff --git a/packages/core/src/rate-limit/index.ts b/packages/core/src/rate-limit/index.ts new file mode 100644 index 000000000..b0b955c35 --- /dev/null +++ b/packages/core/src/rate-limit/index.ts @@ -0,0 +1,25 @@ +/** + * Rate Limiting Module + * + * Provides configurable rate limiting for LLM calls and tool executions. + * Prevents exceeding API rate limits and enables cost control. + */ + +export { RateLimitExceededError } from "./errors"; +export { FixedWindowCounterLimiter } from "./limiters/fixed-window"; +export { RateLimitManager } from "./manager"; + +export type { + AgentRateLimitConfig, + LLMRateLimitConfig, + ProviderRateLimitConfig, + RateLimitConfig, + RateLimitContext, + RateLimiter, + RateLimitExceededAction, + RateLimitScope, + RateLimitScopeId, + RateLimitStats, + RateLimitStrategy, + ToolRateLimitConfig, +} from "./types"; diff --git a/packages/core/src/rate-limit/limiters/fixed-window.spec.ts b/packages/core/src/rate-limit/limiters/fixed-window.spec.ts new file mode 100644 index 000000000..331a656e9 --- /dev/null +++ b/packages/core/src/rate-limit/limiters/fixed-window.spec.ts @@ -0,0 +1,232 @@ +/** + * Tests for FixedWindowCounterLimiter + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { RateLimitExceededError } from "../errors"; +import type { RateLimitConfig } from "../types"; +import { FixedWindowCounterLimiter } from "./fixed-window"; + +describe("FixedWindowCounterLimiter", () => { + let limiter: FixedWindowCounterLimiter; + let config: RateLimitConfig; + + beforeEach(() => { + vi.useFakeTimers(); + config = { + strategy: "fixed_window", + limit: 5, + windowMs: 60000, // 1 minute + onExceeded: "throw", + }; + limiter = new FixedWindowCounterLimiter(config); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe("constructor", () => { + it("should initialize with provided config", () => { + expect(limiter).toBeDefined(); + const stats = limiter.getStats(); + expect(stats.limit).toBe(5); + expect(stats.remaining).toBe(5); + expect(stats.current).toBe(0); + }); + }); + + describe("acquire", () => { + it("should allow requests within limit", async () => { + await expect(limiter.acquire()).resolves.not.toThrow(); + await expect(limiter.acquire()).resolves.not.toThrow(); + await expect(limiter.acquire()).resolves.not.toThrow(); + + const stats = limiter.getStats(); + expect(stats.current).toBe(3); + expect(stats.remaining).toBe(2); + }); + + it("should throw error when limit exceeded and onExceeded=throw", async () => { + // Use up all requests + for (let i = 0; i < 5; i++) { + await limiter.acquire(); + } + + // Next request should throw + await expect(limiter.acquire()).rejects.toThrow(RateLimitExceededError); + }); + + it("should delay when limit exceeded and onExceeded=delay", async () => { + const delayLimiter = new FixedWindowCounterLimiter({ + ...config, + onExceeded: "delay", + }); + + // Use up all requests + for (let i = 0; i < 5; i++) { + await delayLimiter.acquire(); + } + + // Next request should delay + const acquirePromise = delayLimiter.acquire(); + + // Should not resolve immediately + let resolved = false; + acquirePromise.then(() => { + resolved = true; + }); + + await vi.advanceTimersByTimeAsync(100); + expect(resolved).toBe(false); + + // Advance time to reset window + await vi.advanceTimersByTimeAsync(60000); + await acquirePromise; + expect(resolved).toBe(true); + }); + + it("should reset counter when window expires", async () => { + // Use 3 requests + await limiter.acquire(); + await limiter.acquire(); + await limiter.acquire(); + + expect(limiter.getStats().current).toBe(3); + + // Advance time past window + await vi.advanceTimersByTimeAsync(60000); + + // Next acquire should reset counter + await limiter.acquire(); + const stats = limiter.getStats(); + expect(stats.current).toBe(1); + expect(stats.remaining).toBe(4); + }); + }); + + describe("check", () => { + it("should return true when requests are available", async () => { + expect(limiter.check()).toBe(true); + await limiter.acquire(); + expect(limiter.check()).toBe(true); + }); + it("should return false when limit is reached", async () => { + // Use up all requests + for (let i = 0; i < 5; i++) { + await limiter.acquire(); + } + + expect(limiter.check()).toBe(false); + }); + + it("should return true after window reset", async () => { + // Use up all requests + for (let i = 0; i < 5; i++) { + await limiter.acquire(); + } + + expect(limiter.check()).toBe(false); + + // Advance time past window + await vi.advanceTimersByTimeAsync(60000); + + expect(limiter.check()).toBe(true); + }); + }); + + describe("getStats", () => { + it("should return accurate statistics", async () => { + await limiter.acquire(); + await limiter.acquire(); + + const stats = limiter.getStats(); + expect(stats.limit).toBe(5); + expect(stats.current).toBe(2); + expect(stats.remaining).toBe(3); + expect(stats.resetAt).toBeInstanceOf(Date); + }); + + it("should update resetAt based on windowMs", async () => { + const now = Date.now(); + await limiter.acquire(); + + const stats = limiter.getStats(); + const resetTime = stats.resetAt.getTime(); + + // Reset time should be approximately now + windowMs + expect(resetTime).toBeGreaterThanOrEqual(now + config.windowMs - 100); + expect(resetTime).toBeLessThanOrEqual(now + config.windowMs + 100); + }); + }); + + describe("reset", () => { + it("should reset counter and window", async () => { + // Use some requests + await limiter.acquire(); + await limiter.acquire(); + await limiter.acquire(); + + expect(limiter.getStats().current).toBe(3); + + limiter.reset(); + + const stats = limiter.getStats(); + expect(stats.current).toBe(0); + expect(stats.remaining).toBe(5); + }); + + it("should allow requests after reset", async () => { + // Use up all requests + for (let i = 0; i < 5; i++) { + await limiter.acquire(); + } + + expect(limiter.check()).toBe(false); + + limiter.reset(); + + expect(limiter.check()).toBe(true); + await expect(limiter.acquire()).resolves.not.toThrow(); + }); + }); + + describe("edge cases", () => { + it("should handle limit of 1", async () => { + const singleLimiter = new FixedWindowCounterLimiter({ + ...config, + limit: 1, + }); + + await expect(singleLimiter.acquire()).resolves.not.toThrow(); + await expect(singleLimiter.acquire()).rejects.toThrow(RateLimitExceededError); + }); + + it("should handle very short windows", async () => { + const shortWindowLimiter = new FixedWindowCounterLimiter({ + ...config, + windowMs: 100, // 100ms window + }); + + await shortWindowLimiter.acquire(); + expect(shortWindowLimiter.check()).toBe(true); + + // Advance past window + await vi.advanceTimersByTimeAsync(100); + + await shortWindowLimiter.acquire(); + expect(shortWindowLimiter.getStats().current).toBe(1); + }); + + it("should handle concurrent requests", async () => { + const promises = Array.from({ length: 5 }, () => limiter.acquire()); + await expect(Promise.all(promises)).resolves.not.toThrow(); + + const stats = limiter.getStats(); + expect(stats.current).toBe(5); + expect(stats.remaining).toBe(0); + + await expect(limiter.acquire()).rejects.toThrow(RateLimitExceededError); + }); + }); +}); diff --git a/packages/core/src/rate-limit/limiters/fixed-window.ts b/packages/core/src/rate-limit/limiters/fixed-window.ts new file mode 100644 index 000000000..b926a18a6 --- /dev/null +++ b/packages/core/src/rate-limit/limiters/fixed-window.ts @@ -0,0 +1,135 @@ +/** + * Fixed Window Counter Rate Limiter + * + * Implements a simple fixed-window rate limiting algorithm: + * - Tracks number of requests in a fixed time window + * - Resets counter when window expires + * - Efficient and easy to understand + * + * Example: 10 requests per minute + * - Window: 0:00 - 1:00, allows 10 requests + * - At 1:00, window resets, allows 10 more requests + */ + +import { RateLimitExceededError } from "../errors"; +import type { RateLimiter, RateLimitExceededAction, RateLimitStats } from "../types"; + +export interface FixedWindowCounterConfig { + /** Maximum requests per window */ + limit: number; + /** Window duration in milliseconds */ + windowMs: number; + /** Action when limit exceeded */ + onExceeded?: RateLimitExceededAction; + /** Scope identifier for error messages */ + scope?: string; +} + +export class FixedWindowCounterLimiter implements RateLimiter { + private count: number = 0; + private windowStart: number; + private readonly limit: number; + private readonly windowMs: number; + private readonly onExceeded: RateLimitExceededAction; + private readonly scope: string; + + constructor(config: FixedWindowCounterConfig) { + this.limit = config.limit; + this.windowMs = config.windowMs; + this.onExceeded = config.onExceeded || "delay"; + this.scope = config.scope || "unknown"; + this.windowStart = Date.now(); + } + + /** + * Acquire permission to proceed + * Either waits or throws based on onExceeded configuration + */ + async acquire(): Promise { + const now = Date.now(); + + // Reset window if expired + if (now - this.windowStart >= this.windowMs) { + this.count = 0; + this.windowStart = now; + } + + // Check if we're over the limit + if (this.count >= this.limit) { + const stats = this.getStats(); + + if (this.onExceeded === "throw") { + throw new RateLimitExceededError({ + stats, + scope: this.scope, + }); + } else { + // Delay until window resets + const waitTime = this.windowStart + this.windowMs - now; + if (waitTime > 0) { + await this.delay(waitTime); + } + // After waiting, reset window and retry + this.count = 0; + this.windowStart = Date.now(); + } + } + + // Increment counter + this.count++; + } + + /** + * Check if request can proceed without blocking + */ + check(): boolean { + const now = Date.now(); + + // Reset window if expired + if (now - this.windowStart >= this.windowMs) { + return true; + } + + return this.count < this.limit; + } + + /** + * Get current statistics + */ + getStats(): RateLimitStats { + const now = Date.now(); + const resetAt = new Date(this.windowStart + this.windowMs); + + // If window has expired, return fresh stats + if (now - this.windowStart >= this.windowMs) { + return { + remaining: this.limit, + limit: this.limit, + current: 0, + resetAt: new Date(now + this.windowMs), + }; + } + + return { + remaining: Math.max(0, this.limit - this.count), + limit: this.limit, + current: this.count, + resetAt, + }; + } + + /** + * Reset the limiter state + */ + reset(): void { + this.count = 0; + this.windowStart = Date.now(); + } + + /** + * Internal delay helper + */ + private delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); + } +} diff --git a/packages/core/src/rate-limit/limiters/index.ts b/packages/core/src/rate-limit/limiters/index.ts new file mode 100644 index 000000000..ea8363d40 --- /dev/null +++ b/packages/core/src/rate-limit/limiters/index.ts @@ -0,0 +1,5 @@ +/** + * Rate Limiter Implementations + */ + +export { FixedWindowCounterLimiter } from "./fixed-window"; diff --git a/packages/core/src/rate-limit/manager.spec.ts b/packages/core/src/rate-limit/manager.spec.ts new file mode 100644 index 000000000..6b4d43f4e --- /dev/null +++ b/packages/core/src/rate-limit/manager.spec.ts @@ -0,0 +1,285 @@ +/** + * Tests for RateLimitManager + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { RateLimitExceededError } from "./errors"; +import { RateLimitManager } from "./manager"; +import type { AgentRateLimitConfig } from "./types"; + +describe("RateLimitManager", () => { + let manager: RateLimitManager; + let config: AgentRateLimitConfig; + + beforeEach(() => { + vi.useFakeTimers(); + config = { + llm: { + maxRequestsPerMinute: 10, + strategy: "fixed_window", + onExceeded: "throw", + }, + providers: { + openai: { + maxRequestsPerMinute: 5, + strategy: "fixed_window", + onExceeded: "throw", + }, + }, + tools: { + search_tool: { + maxRequestsPerMinute: 3, + strategy: "fixed_window", + onExceeded: "throw", + }, + }, + }; + manager = new RateLimitManager("test-agent", config); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + describe("constructor", () => { + it("should initialize with provided config", () => { + expect(manager).toBeDefined(); + }); + }); + + describe("checkLLMRateLimit", () => { + it("should allow requests within global LLM limit", async () => { + for (let i = 0; i < 10; i++) { + await expect( + manager.checkLLMRateLimit({ provider: "unknown", model: "test-model" }) + ).resolves.not.toThrow(); + } + + // 11th request should throw + await expect( + manager.checkLLMRateLimit({ provider: "unknown", model: "test-model" }) + ).rejects.toThrow(RateLimitExceededError); + }); + + it("should prioritize provider-specific limit over global limit", async () => { + // OpenAI has limit of 5, global has 10 + for (let i = 0; i < 5; i++) { + await expect( + manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) + ).resolves.not.toThrow(); + } + + // 6th OpenAI request should throw + await expect( + manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) + ).rejects.toThrow(RateLimitExceededError); + }); + + it("should handle different providers independently", async () => { + // Use 5 OpenAI requests + for (let i = 0; i < 5; i++) { + await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + } + + // Claude should still have full global limit available + for (let i = 0; i < 10; i++) { + await expect( + manager.checkLLMRateLimit({ provider: "anthropic", model: "claude-3" }) + ).resolves.not.toThrow(); + } + }); + + it("should allow requests when no limit configured", async () => { + const noLimitManager = new RateLimitManager("test-agent", {}); + + for (let i = 0; i < 100; i++) { + await expect( + noLimitManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) + ).resolves.not.toThrow(); + } + }); + + it("should handle provider name case-insensitively", async () => { + await manager.checkLLMRateLimit({ provider: "OpenAI", model: "gpt-4" }); + await manager.checkLLMRateLimit({ provider: "OPENAI", model: "gpt-4" }); + await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + + const stats = manager.getAllStats(); + // Should only have one limiter for openai + const openaiKeys = Object.keys(stats).filter((key) => key.includes("openai")); + expect(openaiKeys.length).toBe(1); + }); + }); + + describe("checkToolRateLimit", () => { + it("should allow requests within tool limit", async () => { + for (let i = 0; i < 3; i++) { + await expect(manager.checkToolRateLimit("search_tool")).resolves.not.toThrow(); + } + + // 4th request should throw + await expect(manager.checkToolRateLimit("search_tool")).rejects.toThrow( + RateLimitExceededError + ); + }); + + it("should allow unlimited requests for unconfigured tools", async () => { + for (let i = 0; i < 100; i++) { + await expect(manager.checkToolRateLimit("unconfigured_tool")).resolves.not.toThrow(); + } + }); + + it("should handle different tools independently", async () => { + // Use up search_tool limit + for (let i = 0; i < 3; i++) { + await manager.checkToolRateLimit("search_tool"); + } + + // Other tools should work fine + await expect(manager.checkToolRateLimit("other_tool")).resolves.not.toThrow(); + }); + }); + + describe("getAllStats", () => { + it("should return stats for all active limiters", async () => { + // Trigger creation of different limiters + await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + await manager.checkLLMRateLimit({ provider: "anthropic", model: "claude-3" }); + await manager.checkToolRateLimit("search_tool"); + + const stats = manager.getAllStats(); + expect(Object.keys(stats).length).toBeGreaterThan(0); + + for (const [key, stat] of Object.entries(stats)) { + expect(key).toBeTruthy(); + expect(stat).toHaveProperty("limit"); + expect(stat).toHaveProperty("current"); + expect(stat).toHaveProperty("remaining"); + expect(stat).toHaveProperty("resetAt"); + } + }); + + it("should return empty object when no limiters created", () => { + const stats = manager.getAllStats(); + expect(Object.keys(stats).length).toBe(0); + }); + }); + + describe("resetAll", () => { + it("should reset all active limiters", async () => { + // Use some requests + await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + await manager.checkToolRateLimit("search_tool"); + + manager.resetAll(); + + const stats = manager.getAllStats(); + for (const stat of Object.values(stats)) { + expect(stat.current).toBe(0); + expect(stat.remaining).toBe(stat.limit); + } + }); + + it("should allow requests after reset", async () => { + // Use up openai limit + for (let i = 0; i < 5; i++) { + await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + } + + await expect( + manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) + ).rejects.toThrow(RateLimitExceededError); + + manager.resetAll(); + + await expect( + manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) + ).resolves.not.toThrow(); + }); + }); + + describe("limiter creation", () => { + it("should create limiter on first access", async () => { + const stats1 = manager.getAllStats(); + expect(Object.keys(stats1).length).toBe(0); + + await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + + const stats2 = manager.getAllStats(); + expect(Object.keys(stats2).length).toBe(1); + }); + + it("should reuse existing limiter", async () => { + await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + + const stats = manager.getAllStats(); + expect(Object.keys(stats).length).toBe(1); // Only one limiter should exist + + const openaiStats = Object.values(stats)[0]; + expect(openaiStats.current).toBe(2); // Both requests counted + }); + }); + + describe("configuration validation", () => { + it("should handle missing maxRequestsPerMinute gracefully", async () => { + const invalidConfig = { + llm: { + strategy: "fixed_window" as const, + onExceeded: "throw" as const, + // Missing maxRequestsPerMinute - should be allowed through without rate limiting + }, + }; + + const invalidManager = new RateLimitManager("test", invalidConfig); + + // Should not throw - when config is invalid/incomplete, it allows requests through + // This is a graceful degradation approach + await expect( + invalidManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) + ).resolves.not.toThrow(); + + // Verify multiple requests are allowed (no rate limiting applied) + for (let i = 0; i < 10; i++) { + await expect( + invalidManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) + ).resolves.not.toThrow(); + } + }); + }); + + describe("delay behavior", () => { + it("should delay requests when onExceeded=delay", async () => { + const delayConfig: AgentRateLimitConfig = { + llm: { + maxRequestsPerMinute: 2, + strategy: "fixed_window", + onExceeded: "delay", + }, + }; + + const delayManager = new RateLimitManager("test-agent", delayConfig); + + // Use up limit + await delayManager.checkLLMRateLimit({ provider: "test", model: "test" }); + await delayManager.checkLLMRateLimit({ provider: "test", model: "test" }); + + // Next request should delay + const checkPromise = delayManager.checkLLMRateLimit({ provider: "test", model: "test" }); + + let resolved = false; + checkPromise.then(() => { + resolved = true; + }); + + await vi.advanceTimersByTimeAsync(100); + expect(resolved).toBe(false); + + // Advance to reset + await vi.advanceTimersByTimeAsync(60000); + await checkPromise; + expect(resolved).toBe(true); + }); + }); +}); diff --git a/packages/core/src/rate-limit/manager.ts b/packages/core/src/rate-limit/manager.ts new file mode 100644 index 000000000..09e1f7822 --- /dev/null +++ b/packages/core/src/rate-limit/manager.ts @@ -0,0 +1,185 @@ +/** + * Rate Limit Manager + * + * Central manager for all rate limiters in an agent. + * Responsible for: + * - Creating and managing rate limiters for different scopes + * - Providing unified API for rate limit checks + * - Configuration-based limiter instantiation + */ + +import type { Logger } from "@voltagent/internal"; +import { FixedWindowCounterLimiter } from "./limiters/fixed-window"; +import type { AgentRateLimitConfig, RateLimiter, RateLimitScopeId } from "./types"; + +export class RateLimitManager { + private limiters: Map = new Map(); + private readonly config: AgentRateLimitConfig; + private readonly logger?: Logger; + private readonly agentId: string; + + constructor(agentId: string, config: AgentRateLimitConfig, logger?: Logger) { + this.agentId = agentId; + this.config = config; + this.logger = logger; + } + + /** + * Check rate limit for LLM call + * This is called before generateText/streamText + */ + async checkLLMRateLimit(context: { provider?: string; model?: string }): Promise { + // Priority order: + // 1. Provider-specific limit (if configured) + // 2. Global LLM limit (if configured) + + const providerName = context.provider?.toLowerCase(); + + // Check provider-specific limit first + if (providerName && this.config.providers?.[providerName]) { + const scopeId: RateLimitScopeId = { + type: "provider", + provider: providerName, + }; + const limiter = this.getLimiter(scopeId, this.config.providers[providerName]); + await limiter.acquire(); + return; + } + + // Check global LLM limit + if (this.config.llm) { + const scopeId: RateLimitScopeId = { + type: "global", + }; + const limiter = this.getLimiter(scopeId, this.config.llm); + await limiter.acquire(); + } + + // No rate limit configured - allow through + } + + /** + * Check rate limit for tool execution + * This is called before tool.execute() + */ + async checkToolRateLimit(toolName: string): Promise { + if (!this.config.tools?.[toolName]) { + // No rate limit configured for this tool + return; + } + + const scopeId: RateLimitScopeId = { + type: "tool", + agentId: this.agentId, + toolName, + }; + + const limiter = this.getLimiter(scopeId, this.config.tools[toolName]); + await limiter.acquire(); + } + + /** + * Get or create a rate limiter for a specific scope + */ + private getLimiter( + scopeId: RateLimitScopeId, + config: { + maxRequestsPerMinute?: number; + maxTokensPerMinute?: number; + strategy?: string; + onExceeded?: "delay" | "throw"; + } + ): RateLimiter { + const key = this.getScopeKey(scopeId); + + // Return existing limiter if already created + if (this.limiters.has(key)) { + return this.limiters.get(key)!; + } + + // Create new limiter + const limiter = this.createLimiter(scopeId, config); + this.limiters.set(key, limiter); + + this.logger?.debug("Rate limiter created", { + scope: scopeId, + limit: config.maxRequestsPerMinute, + strategy: config.strategy || "fixed_window", + }); + + return limiter; + } + + /** + * Create a new rate limiter instance based on configuration + */ + private createLimiter( + scopeId: RateLimitScopeId, + config: { + maxRequestsPerMinute?: number; + maxTokensPerMinute?: number; + strategy?: string; + onExceeded?: "delay" | "throw"; + } + ): RateLimiter { + const limit = config.maxRequestsPerMinute || 60; // Default 60 requests/min + const strategy = config.strategy || "fixed_window"; + const onExceeded = config.onExceeded || "delay"; + + // For MVP, only fixed_window is implemented + if (strategy !== "fixed_window") { + this.logger?.warn( + `Unsupported rate limit strategy: ${strategy}. Falling back to fixed_window` + ); + } + + return new FixedWindowCounterLimiter({ + limit, + windowMs: 60000, // 1 minute window + onExceeded, + scope: this.getScopeKey(scopeId), + }); + } + + /** + * Generate a unique key for a rate limit scope + */ + private getScopeKey(scopeId: RateLimitScopeId): string { + switch (scopeId.type) { + case "global": + return "global:llm"; + case "agent": + return `agent:${scopeId.agentId}`; + case "tool": + return `tool:${scopeId.agentId}:${scopeId.toolName}`; + case "provider": + return `provider:${scopeId.provider}`; + default: + return "unknown"; + } + } + + /** + * Get statistics for all active rate limiters + */ + getAllStats(): Record> { + const stats: Record> = {}; + + for (const [key, limiter] of this.limiters.entries()) { + stats[key] = limiter.getStats(); + } + + return stats; + } + + /** + * Reset all rate limiters + * Useful for testing or manual intervention + */ + resetAll(): void { + for (const limiter of this.limiters.values()) { + limiter.reset(); + } + this.logger?.debug("All rate limiters reset"); + } +} diff --git a/packages/core/src/rate-limit/types.ts b/packages/core/src/rate-limit/types.ts new file mode 100644 index 000000000..c45a33b2d --- /dev/null +++ b/packages/core/src/rate-limit/types.ts @@ -0,0 +1,167 @@ +/** + * Rate limiting types and interfaces + * Implements configurable rate limiting for LLM calls and tool executions + */ + +/** + * Available rate limiting strategies + */ +export type RateLimitStrategy = "fixed_window" | "token_bucket" | "leaky_bucket"; + +/** + * Action to take when rate limit is exceeded + * - 'delay': Wait until rate limit resets (queue the request) + * - 'throw': Immediately throw RateLimitExceededError + */ +export type RateLimitExceededAction = "delay" | "throw"; + +/** + * Scope for rate limiting + * - 'global': Apply to all operations across all agents + * - 'agent': Apply per agent instance + * - 'tool': Apply per tool + * - 'provider': Apply per LLM provider + */ +export type RateLimitScope = "global" | "agent" | "tool" | "provider"; + +/** + * Statistics for current rate limit state + */ +export interface RateLimitStats { + /** Number of requests remaining in current window */ + remaining: number; + /** Total limit per window */ + limit: number; + /** When the current window resets */ + resetAt: Date; + /** Current request count in window */ + current: number; +} + +/** + * Base configuration for a rate limiter + */ +export interface RateLimitConfig { + /** Rate limiting strategy to use */ + strategy: RateLimitStrategy; + /** Maximum number of requests per window */ + limit: number; + /** Time window in milliseconds */ + windowMs: number; + /** Action when limit is exceeded */ + onExceeded?: RateLimitExceededAction; +} + +/** + * Configuration for LLM-specific rate limiting + */ +export interface LLMRateLimitConfig { + /** Maximum requests per minute */ + maxRequestsPerMinute?: number; + /** Maximum tokens per minute (future enhancement) */ + maxTokensPerMinute?: number; + /** Rate limiting strategy */ + strategy?: RateLimitStrategy; + /** Action when limit exceeded */ + onExceeded?: RateLimitExceededAction; +} + +/** + * Configuration for tool-specific rate limiting + */ +export interface ToolRateLimitConfig { + /** Maximum requests per minute */ + maxRequestsPerMinute: number; + /** Rate limiting strategy */ + strategy?: RateLimitStrategy; + /** Action when limit exceeded */ + onExceeded?: RateLimitExceededAction; +} + +/** + * Configuration for provider-specific rate limiting + */ +export interface ProviderRateLimitConfig { + /** Maximum requests per minute */ + maxRequestsPerMinute?: number; + /** Maximum tokens per minute (future enhancement) */ + maxTokensPerMinute?: number; + /** Rate limiting strategy */ + strategy?: RateLimitStrategy; + /** Action when limit exceeded */ + onExceeded?: RateLimitExceededAction; +} + +/** + * Complete rate limiting configuration for an agent + */ +export interface AgentRateLimitConfig { + /** Global LLM rate limits for all providers */ + llm?: LLMRateLimitConfig; + /** Per-tool rate limits (keyed by tool name) */ + tools?: Record; + /** Per-provider rate limits (keyed by provider name like 'openai', 'anthropic') */ + providers?: Record; +} + +/** + * Context for rate limit check + */ +export interface RateLimitContext { + /** Agent ID */ + agentId: string; + /** Operation type (llm or tool) */ + operationType: "llm" | "tool"; + /** Provider name (for LLM operations) */ + provider?: string; + /** Model name (for LLM operations) */ + model?: string; + /** Tool name (for tool operations) */ + toolName?: string; +} + +/** + * Core rate limiter interface + * All rate limiting strategies must implement this interface + */ +export interface RateLimiter { + /** + * Acquire permission to proceed with an operation + * This method will either: + * - Return immediately if under limit + * - Wait (delay) until limit resets if onExceeded='delay' + * - Throw RateLimitExceededError if onExceeded='throw' + */ + acquire(): Promise; + + /** + * Check if operation can proceed without blocking + * @returns true if under limit, false if over limit + */ + check(): boolean; + + /** + * Get current rate limit statistics + */ + getStats(): RateLimitStats; + + /** + * Reset the rate limiter state + * Useful for testing or manual intervention + */ + reset(): void; +} + +/** + * Scope identifier for rate limit manager + */ +export interface RateLimitScopeId { + /** Scope type */ + type: "global" | "agent" | "tool" | "provider"; + /** Agent ID (for agent/tool scopes) */ + agentId?: string; + /** Tool name (for tool scope) */ + toolName?: string; + /** Provider name (for provider scope) */ + provider?: string; +} From 8de4af40196416dec9d38cfe5dec4b6c95cf58cf Mon Sep 17 00:00:00 2001 From: sujal Date: Sat, 25 Oct 2025 17:42:14 +0530 Subject: [PATCH 2/6] chore: add example usage for rate limiting --- examples/with-rate-limiting/.env.example | 1 + examples/with-rate-limiting/.gitignore | 4 + examples/with-rate-limiting/README.md | 107 +++++++++ examples/with-rate-limiting/package.json | 22 ++ examples/with-rate-limiting/src/index.ts | 276 +++++++++++++++++++++++ 5 files changed, 410 insertions(+) create mode 100644 examples/with-rate-limiting/.env.example create mode 100644 examples/with-rate-limiting/.gitignore create mode 100644 examples/with-rate-limiting/README.md create mode 100644 examples/with-rate-limiting/package.json create mode 100644 examples/with-rate-limiting/src/index.ts diff --git a/examples/with-rate-limiting/.env.example b/examples/with-rate-limiting/.env.example new file mode 100644 index 000000000..0eb68d5c2 --- /dev/null +++ b/examples/with-rate-limiting/.env.example @@ -0,0 +1 @@ +GOOGLE_GENERATIVE_AI_API_KEY=your_api_key_here diff --git a/examples/with-rate-limiting/.gitignore b/examples/with-rate-limiting/.gitignore new file mode 100644 index 000000000..6a8a0fe88 --- /dev/null +++ b/examples/with-rate-limiting/.gitignore @@ -0,0 +1,4 @@ +node_modules +dist +.DS_Store +.voltagent diff --git a/examples/with-rate-limiting/README.md b/examples/with-rate-limiting/README.md new file mode 100644 index 000000000..9ff104806 --- /dev/null +++ b/examples/with-rate-limiting/README.md @@ -0,0 +1,107 @@ +# Rate Limiting Example + +This example demonstrates VoltAgent's rate limiting feature to control the frequency of LLM calls and tool executions. + +## Features Demonstrated + +1. **Basic LLM Rate Limiting** - Limit requests per minute with error throwing +2. **Delay Strategy** - Automatic waiting when limits are exceeded +3. **Provider-Specific Limits** - Different limits for different LLM providers +4. **Tool Rate Limiting** - Control tool execution frequency +5. **Combined Limits** - Multiple rate limits working together +6. **Monitoring Stats** - Track rate limit usage in real-time + +## Installation + +```bash +pnpm install +``` + +## Configuration + +Set your OpenAI API key: + +```bash +export GOOGLE_GENERATIVE_AI_API_KEY=your_api_key_here +``` + +## Running Examples + +Edit `src/index.ts` and uncomment the examples you want to run in the `main()` function: + +```typescript +async function main() { + await example1_basicLLMRateLimit(); + await example2_delayStrategy(); + // ... etc +} +``` + +Then run: + +```bash +pnpm start +``` + +## Rate Limit Configuration + +### LLM Rate Limiting + +```typescript +rateLimits: { + llm: { + maxRequestsPerMinute: 10, + strategy: "fixed_window", + onExceeded: "throw" // or "delay" + } +} +``` + +### Provider-Specific Limits + +```typescript +rateLimits: { + providers: { + openai: { + maxRequestsPerMinute: 5, + onExceeded: "throw" + }, + anthropic: { + maxRequestsPerMinute: 3, + onExceeded: "delay" + } + } +} +``` + +### Tool Rate Limiting + +```typescript +rateLimits: { + tools: { + search_tool: { + maxRequestsPerMinute: 3, + onExceeded: "delay" + } + } +} +``` + +## Strategies + +### `onExceeded: "throw"` + +- Immediately throws `RateLimitExceededError` when limit is reached +- Good for strict enforcement and error handling + +### `onExceeded: "delay"` + +- Automatically waits until the rate limit resets +- Good for background jobs and retry scenarios + +## Use Cases + +- **Cost Control**: Limit expensive LLM API calls +- **API Quota Management**: Stay within provider rate limits +- **Resource Protection**: Prevent tool overuse +- **Fair Usage**: Distribute resources across multiple agents diff --git a/examples/with-rate-limiting/package.json b/examples/with-rate-limiting/package.json new file mode 100644 index 000000000..7a443148c --- /dev/null +++ b/examples/with-rate-limiting/package.json @@ -0,0 +1,22 @@ +{ + "name": "with-rate-limiting", + "version": "1.0.0", + "private": true, + "type": "module", + "scripts": { + "start": "tsx src/index.ts", + "dev": "tsx watch src/index.ts", + "build": "tsc", + "run": "node dist/index.js" + }, + "dependencies": { + "@ai-sdk/google": "^2.0.23", + "@voltagent/core": "workspace:*", + "dotenv": "^17.2.3", + "zod": "^3.25.0" + }, + "devDependencies": { + "tsx": "^4.19.2", + "typescript": "^5.8.2" + } +} diff --git a/examples/with-rate-limiting/src/index.ts b/examples/with-rate-limiting/src/index.ts new file mode 100644 index 000000000..992eafc7a --- /dev/null +++ b/examples/with-rate-limiting/src/index.ts @@ -0,0 +1,276 @@ +/** + * Example: Rate Limiting in VoltAgent + * + * This example demonstrates how to use the rate limiting feature + * to control the frequency of LLM calls and tool executions. + */ +import "dotenv/config"; +import { google } from "@ai-sdk/google"; +import { Agent, RateLimitExceededError, createTool } from "@voltagent/core"; +import { z } from "zod"; + +// Create a simple search tool +const searchTool = createTool({ + name: "search_tool", + description: "Search for information", + parameters: z.object({ + query: z.string().describe("Search query"), + }), + execute: async ({ query }) => { + console.log(`[TOOL] Searching for: ${query}`); + return `Search results for: ${query}`; + }, +}); + +// Example 1: Basic LLM Rate Limiting +async function example1_basicLLMRateLimit() { + console.log("\n=== Example 1: Basic LLM Rate Limiting ==="); + + const agent = new Agent({ + name: "basic-llm-limited-agent", + model: google("gemini-2.0-flash-exp"), + instructions: "You are a helpful assistant.", + rateLimits: { + llm: { + maxRequestsPerMinute: 3, // Only 3 requests per minute + strategy: "fixed_window", + onExceeded: "throw", // Throw error when limit exceeded + }, + }, + }); + + try { + // These will work fine + for (let i = 1; i <= 3; i++) { + console.log(`Request ${i}/3...`); + const response = await agent.generateText(`Say "Hello ${i}"`); + console.log(`✓ Response: ${response.text}`); + } + + // This 4th request will throw RateLimitExceededError + console.log("\nAttempting 4th request (will fail)..."); + await agent.generateText("Say Hello 4"); + } catch (error) { + if (error instanceof RateLimitExceededError) { + console.error(`✗ Rate limit exceeded: ${error.message}`); + console.log(` Remaining: ${error.stats.remaining}`); + console.log(` Reset at: ${error.stats.resetAt}`); + console.log(` Retry after: ${error.retryAfter} seconds`); + } else if (error instanceof Error) { + console.error(`✗ Unexpected error: ${error.message}`); + } + } +} + +// Example 2: Delay Strategy +async function example2_delayStrategy() { + console.log("\n=== Example 2: Delay Strategy (Auto-wait) ==="); + + const agent = new Agent({ + name: "delay-agent", + model: google("gemini-2.0-flash-exp"), + instructions: "You are a helpful assistant.", + rateLimits: { + llm: { + maxRequestsPerMinute: 2, + strategy: "fixed_window", + onExceeded: "delay", // Wait until rate limit resets + }, + }, + }); + + console.log("Making 3 requests (2 will go through, 3rd will wait)..."); + const startTime = Date.now(); + + for (let i = 1; i <= 3; i++) { + const requestStart = Date.now(); + console.log(`\nRequest ${i}/3 at +${Math.round((requestStart - startTime) / 1000)}s`); + + const response = await agent.generateText(`Count to ${i}`); + + const requestEnd = Date.now(); + console.log(`✓ Completed in ${Math.round((requestEnd - requestStart) / 1000)}s`); + console.log(` Response: ${response.text.substring(0, 50)}...`); + } +} + +// Example 3: Provider-Specific Rate Limiting +async function example3_providerSpecificLimits() { + console.log("\n=== Example 3: Provider-Specific Rate Limiting ==="); + + const agent = new Agent({ + name: "provider-limited-agent", + model: google("gemini-2.0-flash-exp"), + instructions: "You are a helpful assistant.", + rateLimits: { + llm: { + maxRequestsPerMinute: 10, // Global limit: 10 requests/min + onExceeded: "throw", + }, + providers: { + google: { + maxRequestsPerMinute: 3, // Google-specific: 3 requests/min + onExceeded: "throw", + }, + }, + }, + }); + + try { + console.log("Provider-specific limit (Google): 3 requests/min"); + console.log("Global limit: 10 requests/min\n"); + + for (let i = 1; i <= 4; i++) { + console.log(`Google request ${i}/4...`); + await agent.generateText(`Hello ${i}`); + console.log("✓ Success"); + } + } catch (error) { + if (error instanceof Error) { + console.error(`✗ Google rate limit exceeded (3/min): ${error.message}`); + } + } +} + +// Example 4: Tool-Specific Rate Limiting +async function example4_toolRateLimiting() { + console.log("\n=== Example 4: Tool-Specific Rate Limiting ==="); + + const agent = new Agent({ + name: "tool-limited-agent", + model: google("gemini-2.0-flash-exp"), + instructions: "You are a helpful assistant with access to a search tool.", + tools: [searchTool], + rateLimits: { + tools: { + search_tool: { + maxRequestsPerMinute: 2, // Only 2 searches per minute + strategy: "fixed_window", + onExceeded: "throw", + }, + }, + }, + }); + + try { + console.log("Asking agent to make 3 searches (limit: 2/min)...\n"); + + const response = await agent.generateText( + "Search for 'AI', then 'ML', then 'DL'. Use the search tool for each.", + ); + + console.log("\n✓ Response:", response.text); + } catch (error) { + if (error instanceof Error) { + console.error(`✗ Tool rate limit exceeded: ${error.message}`); + } + } +} + +// Example 5: Combined Rate Limiting +async function example5_combinedLimits() { + console.log("\n=== Example 5: Combined LLM + Tool Rate Limiting ==="); + + const agent = new Agent({ + name: "fully-limited-agent", + model: google("gemini-2.0-flash-exp"), + instructions: "You are a helpful assistant.", + tools: [searchTool], + rateLimits: { + llm: { + maxRequestsPerMinute: 5, + onExceeded: "delay", + }, + tools: { + search_tool: { + maxRequestsPerMinute: 3, + onExceeded: "delay", + }, + }, + providers: { + google: { + maxRequestsPerMinute: 4, + onExceeded: "throw", + }, + }, + }, + }); + + console.log("Configuration:"); + console.log(" - Google provider: 4 req/min (throw)"); + console.log(" - Global LLM: 5 req/min (delay)"); + console.log(" - Search tool: 3 req/min (delay)\n"); + + // Agent will respect all limits with appropriate strategies + try { + const response = await agent.generateText( + "Tell me about AI and use the search tool if needed.", + ); + console.log("✓ Response:", `${response.text.substring(0, 100)}...`); + } catch (error) { + if (error instanceof Error) { + console.error("✗ Error:", error.message); + } + } +} + +// Example 6: Monitoring Rate Limit Stats +async function example6_monitoringStats() { + console.log("\n=== Example 6: Monitoring Rate Limit Statistics ==="); + + const agent = new Agent({ + name: "monitored-agent", + model: google("gemini-2.0-flash-exp"), + instructions: "You are a helpful assistant.", + rateLimits: { + llm: { + maxRequestsPerMinute: 5, + onExceeded: "throw", + }, + }, + }); + + // Make some requests + for (let i = 1; i <= 3; i++) { + await agent.generateText(`Request ${i}`); + } + + console.log("\nNote: Rate limit statistics are internal to the agent."); + console.log("In production, you can track rate limit errors and retryAfter values."); + console.log("The RateLimitExceededError provides all necessary information for monitoring."); +} + +// Run all examples +async function main() { + console.log("🚀 VoltAgent Rate Limiting Examples\n"); + console.log("=".repeat(60)); + + try { + // Uncomment the examples you want to run: + + // await example1_basicLLMRateLimit(); + // await example2_delayStrategy(); + // await example3_providerSpecificLimits(); + // await example4_toolRateLimiting(); + // await example5_combinedLimits(); + // await example6_monitoringStats(); + + console.log(`\n${"=".repeat(60)}`); + console.log("\n✓ All examples completed!"); + console.log("\nNote: Uncomment examples in main() to run them."); + } catch (error) { + console.error("\n✗ Error:", error); + } +} + +// Run if executed directly +main().catch(console.error); + +export { + example1_basicLLMRateLimit, + example2_delayStrategy, + example3_providerSpecificLimits, + example4_toolRateLimiting, + example5_combinedLimits, + example6_monitoringStats, +}; From 59575ae297347c01a6c3d012b476135c084dbe49 Mon Sep 17 00:00:00 2001 From: sujal Date: Wed, 1 Apr 2026 19:57:37 +0530 Subject: [PATCH 3/6] feat: simplify rate limit checks and remove provider-specific configurations --- examples/with-rate-limiting/src/index.ts | 88 +++++++++--------- packages/core/src/agent/agent.ts | 10 +- packages/core/src/index.ts | 2 - packages/core/src/rate-limit/index.ts | 3 - packages/core/src/rate-limit/manager.spec.ts | 98 ++++---------------- packages/core/src/rate-limit/manager.ts | 38 ++------ packages/core/src/rate-limit/types.ts | 67 +++---------- 7 files changed, 89 insertions(+), 217 deletions(-) diff --git a/examples/with-rate-limiting/src/index.ts b/examples/with-rate-limiting/src/index.ts index 992eafc7a..f85a7ce01 100644 --- a/examples/with-rate-limiting/src/index.ts +++ b/examples/with-rate-limiting/src/index.ts @@ -1,15 +1,12 @@ /** * Example: Rate Limiting in VoltAgent - * - * This example demonstrates how to use the rate limiting feature - * to control the frequency of LLM calls and tool executions. */ -import "dotenv/config"; import { google } from "@ai-sdk/google"; import { Agent, RateLimitExceededError, createTool } from "@voltagent/core"; +import "dotenv/config"; import { z } from "zod"; -// Create a simple search tool +// Create simple tools const searchTool = createTool({ name: "search_tool", description: "Search for information", @@ -22,6 +19,18 @@ const searchTool = createTool({ }, }); +const calculateTool = createTool({ + name: "calculator", + description: "Perform calculations", + parameters: z.object({ + expression: z.string().describe("Math expression to calculate"), + }), + execute: async ({ expression }) => { + console.log(`[TOOL] Calculating: ${expression}`); + return `Result: ${expression} = 42`; + }, +}); + // Example 1: Basic LLM Rate Limiting async function example1_basicLLMRateLimit() { console.log("\n=== Example 1: Basic LLM Rate Limiting ==="); @@ -94,22 +103,25 @@ async function example2_delayStrategy() { } } -// Example 3: Provider-Specific Rate Limiting -async function example3_providerSpecificLimits() { - console.log("\n=== Example 3: Provider-Specific Rate Limiting ==="); +// Example 3: Tool-Specific Rate Limiting +async function example3_toolRateLimiting() { + console.log("\n=== Example 3: Tool-Specific Rate Limiting ==="); const agent = new Agent({ - name: "provider-limited-agent", + name: "tool-limited-agent", model: google("gemini-2.0-flash-exp"), - instructions: "You are a helpful assistant.", + instructions: "You are a helpful assistant with tools.", + tools: [searchTool, calculateTool], rateLimits: { - llm: { - maxRequestsPerMinute: 10, // Global limit: 10 requests/min - onExceeded: "throw", - }, - providers: { - google: { - maxRequestsPerMinute: 3, // Google-specific: 3 requests/min + tools: { + search_tool: { + maxRequestsPerMinute: 5, + strategy: "fixed_window", + onExceeded: "throw", + }, + calculator: { + maxRequestsPerMinute: 10, + strategy: "fixed_window", onExceeded: "throw", }, }, @@ -117,24 +129,23 @@ async function example3_providerSpecificLimits() { }); try { - console.log("Provider-specific limit (Google): 3 requests/min"); - console.log("Global limit: 10 requests/min\n"); + console.log("Agent with tool-specific limits:", "\n"); + console.log("search_tool: 5 requests/min"); + console.log("calculator: 10 requests/min\n"); - for (let i = 1; i <= 4; i++) { - console.log(`Google request ${i}/4...`); - await agent.generateText(`Hello ${i}`); - console.log("✓ Success"); - } + const response = await agent.generateText("Search for 'JavaScript' and calculate 2 + 2."); + + console.log("\n✓ Response:", `${response.text.substring(0, 100)}...`); } catch (error) { if (error instanceof Error) { - console.error(`✗ Google rate limit exceeded (3/min): ${error.message}`); + console.error(`✗ Tool rate limit exceeded: ${error.message}`); } } } // Example 4: Tool-Specific Rate Limiting async function example4_toolRateLimiting() { - console.log("\n=== Example 4: Tool-Specific Rate Limiting ==="); + console.log("\n=== Example 4: Tool Rate Limiting ==="); const agent = new Agent({ name: "tool-limited-agent", @@ -167,7 +178,7 @@ async function example4_toolRateLimiting() { } } -// Example 5: Combined Rate Limiting +// Example 5: Combined LLM + Tool Rate Limiting async function example5_combinedLimits() { console.log("\n=== Example 5: Combined LLM + Tool Rate Limiting ==="); @@ -187,17 +198,10 @@ async function example5_combinedLimits() { onExceeded: "delay", }, }, - providers: { - google: { - maxRequestsPerMinute: 4, - onExceeded: "throw", - }, - }, }, }); console.log("Configuration:"); - console.log(" - Google provider: 4 req/min (throw)"); console.log(" - Global LLM: 5 req/min (delay)"); console.log(" - Search tool: 3 req/min (delay)\n"); @@ -248,15 +252,15 @@ async function main() { try { // Uncomment the examples you want to run: - // await example1_basicLLMRateLimit(); - // await example2_delayStrategy(); - // await example3_providerSpecificLimits(); - // await example4_toolRateLimiting(); - // await example5_combinedLimits(); - // await example6_monitoringStats(); + await example1_basicLLMRateLimit(); + await example2_delayStrategy(); + await example3_toolRateLimiting(); + await example4_toolRateLimiting(); + await example5_combinedLimits(); + await example6_monitoringStats(); console.log(`\n${"=".repeat(60)}`); - console.log("\n✓ All examples completed!"); + console.log("\n✓ Examples ready!"); console.log("\nNote: Uncomment examples in main() to run them."); } catch (error) { console.error("\n✗ Error:", error); @@ -269,7 +273,7 @@ main().catch(console.error); export { example1_basicLLMRateLimit, example2_delayStrategy, - example3_providerSpecificLimits, + example3_toolRateLimiting, example4_toolRateLimiting, example5_combinedLimits, example6_monitoringStats, diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index c40dc91d9..d2a565736 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -545,10 +545,7 @@ export class Agent { const provider = this.extractProviderFromModel(model); const modelId = modelName; - await this.rateLimitManager.checkLLMRateLimit({ - provider, - model: modelId, - }); + await this.rateLimitManager.checkLLMRateLimit(); methodLogger.debug("Rate limit check passed for LLM call", { event: LogEvents.AGENT_GENERATION_STARTED, @@ -777,10 +774,7 @@ export class Agent { const provider = this.extractProviderFromModel(model); const modelId = modelName; - await this.rateLimitManager.checkLLMRateLimit({ - provider, - model: modelId, - }); + await this.rateLimitManager.checkLLMRateLimit(); methodLogger.debug("Rate limit check passed for stream call", { event: LogEvents.AGENT_STREAM_STARTED, diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index a2e8c70a0..5f1d99751 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -123,8 +123,6 @@ export { RateLimitManager } from "./rate-limit/manager"; export type { AgentRateLimitConfig, LLMRateLimitConfig, - RateLimitConfig, - RateLimitContext, RateLimiter, RateLimitExceededAction, RateLimitScope, diff --git a/packages/core/src/rate-limit/index.ts b/packages/core/src/rate-limit/index.ts index b0b955c35..44f8541ba 100644 --- a/packages/core/src/rate-limit/index.ts +++ b/packages/core/src/rate-limit/index.ts @@ -12,9 +12,6 @@ export { RateLimitManager } from "./manager"; export type { AgentRateLimitConfig, LLMRateLimitConfig, - ProviderRateLimitConfig, - RateLimitConfig, - RateLimitContext, RateLimiter, RateLimitExceededAction, RateLimitScope, diff --git a/packages/core/src/rate-limit/manager.spec.ts b/packages/core/src/rate-limit/manager.spec.ts index 6b4d43f4e..f4262f0a8 100644 --- a/packages/core/src/rate-limit/manager.spec.ts +++ b/packages/core/src/rate-limit/manager.spec.ts @@ -19,13 +19,6 @@ describe("RateLimitManager", () => { strategy: "fixed_window", onExceeded: "throw", }, - providers: { - openai: { - maxRequestsPerMinute: 5, - strategy: "fixed_window", - onExceeded: "throw", - }, - }, tools: { search_tool: { maxRequestsPerMinute: 3, @@ -50,65 +43,20 @@ describe("RateLimitManager", () => { describe("checkLLMRateLimit", () => { it("should allow requests within global LLM limit", async () => { for (let i = 0; i < 10; i++) { - await expect( - manager.checkLLMRateLimit({ provider: "unknown", model: "test-model" }) - ).resolves.not.toThrow(); + await expect(manager.checkLLMRateLimit()).resolves.not.toThrow(); } // 11th request should throw - await expect( - manager.checkLLMRateLimit({ provider: "unknown", model: "test-model" }) - ).rejects.toThrow(RateLimitExceededError); - }); - - it("should prioritize provider-specific limit over global limit", async () => { - // OpenAI has limit of 5, global has 10 - for (let i = 0; i < 5; i++) { - await expect( - manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) - ).resolves.not.toThrow(); - } - - // 6th OpenAI request should throw - await expect( - manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) - ).rejects.toThrow(RateLimitExceededError); - }); - - it("should handle different providers independently", async () => { - // Use 5 OpenAI requests - for (let i = 0; i < 5; i++) { - await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); - } - - // Claude should still have full global limit available - for (let i = 0; i < 10; i++) { - await expect( - manager.checkLLMRateLimit({ provider: "anthropic", model: "claude-3" }) - ).resolves.not.toThrow(); - } + await expect(manager.checkLLMRateLimit()).rejects.toThrow(RateLimitExceededError); }); it("should allow requests when no limit configured", async () => { const noLimitManager = new RateLimitManager("test-agent", {}); for (let i = 0; i < 100; i++) { - await expect( - noLimitManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) - ).resolves.not.toThrow(); + await expect(noLimitManager.checkLLMRateLimit()).resolves.not.toThrow(); } }); - - it("should handle provider name case-insensitively", async () => { - await manager.checkLLMRateLimit({ provider: "OpenAI", model: "gpt-4" }); - await manager.checkLLMRateLimit({ provider: "OPENAI", model: "gpt-4" }); - await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); - - const stats = manager.getAllStats(); - // Should only have one limiter for openai - const openaiKeys = Object.keys(stats).filter((key) => key.includes("openai")); - expect(openaiKeys.length).toBe(1); - }); }); describe("checkToolRateLimit", () => { @@ -119,7 +67,7 @@ describe("RateLimitManager", () => { // 4th request should throw await expect(manager.checkToolRateLimit("search_tool")).rejects.toThrow( - RateLimitExceededError + RateLimitExceededError, ); }); @@ -143,8 +91,8 @@ describe("RateLimitManager", () => { describe("getAllStats", () => { it("should return stats for all active limiters", async () => { // Trigger creation of different limiters - await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); - await manager.checkLLMRateLimit({ provider: "anthropic", model: "claude-3" }); + await manager.checkLLMRateLimit(); + await manager.checkLLMRateLimit(); await manager.checkToolRateLimit("search_tool"); const stats = manager.getAllStats(); @@ -168,8 +116,8 @@ describe("RateLimitManager", () => { describe("resetAll", () => { it("should reset all active limiters", async () => { // Use some requests - await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); - await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + await manager.checkLLMRateLimit(); + await manager.checkLLMRateLimit(); await manager.checkToolRateLimit("search_tool"); manager.resetAll(); @@ -184,18 +132,14 @@ describe("RateLimitManager", () => { it("should allow requests after reset", async () => { // Use up openai limit for (let i = 0; i < 5; i++) { - await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + await manager.checkLLMRateLimit(); } - await expect( - manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) - ).rejects.toThrow(RateLimitExceededError); + await expect(manager.checkLLMRateLimit()).rejects.toThrow(RateLimitExceededError); manager.resetAll(); - await expect( - manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) - ).resolves.not.toThrow(); + await expect(manager.checkLLMRateLimit()).resolves.not.toThrow(); }); }); @@ -204,15 +148,15 @@ describe("RateLimitManager", () => { const stats1 = manager.getAllStats(); expect(Object.keys(stats1).length).toBe(0); - await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + await manager.checkLLMRateLimit(); const stats2 = manager.getAllStats(); expect(Object.keys(stats2).length).toBe(1); }); it("should reuse existing limiter", async () => { - await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); - await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }); + await manager.checkLLMRateLimit(); + await manager.checkLLMRateLimit(); const stats = manager.getAllStats(); expect(Object.keys(stats).length).toBe(1); // Only one limiter should exist @@ -236,15 +180,11 @@ describe("RateLimitManager", () => { // Should not throw - when config is invalid/incomplete, it allows requests through // This is a graceful degradation approach - await expect( - invalidManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) - ).resolves.not.toThrow(); + await expect(invalidManager.checkLLMRateLimit()).resolves.not.toThrow(); // Verify multiple requests are allowed (no rate limiting applied) for (let i = 0; i < 10; i++) { - await expect( - invalidManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" }) - ).resolves.not.toThrow(); + await expect(invalidManager.checkLLMRateLimit()).resolves.not.toThrow(); } }); }); @@ -262,11 +202,11 @@ describe("RateLimitManager", () => { const delayManager = new RateLimitManager("test-agent", delayConfig); // Use up limit - await delayManager.checkLLMRateLimit({ provider: "test", model: "test" }); - await delayManager.checkLLMRateLimit({ provider: "test", model: "test" }); + await delayManager.checkLLMRateLimit(); + await delayManager.checkLLMRateLimit(); // Next request should delay - const checkPromise = delayManager.checkLLMRateLimit({ provider: "test", model: "test" }); + const checkPromise = delayManager.checkLLMRateLimit(); let resolved = false; checkPromise.then(() => { diff --git a/packages/core/src/rate-limit/manager.ts b/packages/core/src/rate-limit/manager.ts index 09e1f7822..d5540681c 100644 --- a/packages/core/src/rate-limit/manager.ts +++ b/packages/core/src/rate-limit/manager.ts @@ -10,7 +10,7 @@ import type { Logger } from "@voltagent/internal"; import { FixedWindowCounterLimiter } from "./limiters/fixed-window"; -import type { AgentRateLimitConfig, RateLimiter, RateLimitScopeId } from "./types"; +import type { AgentRateLimitConfig, RateLimitScopeId, RateLimiter } from "./types"; export class RateLimitManager { private limiters: Map = new Map(); @@ -28,25 +28,8 @@ export class RateLimitManager { * Check rate limit for LLM call * This is called before generateText/streamText */ - async checkLLMRateLimit(context: { provider?: string; model?: string }): Promise { - // Priority order: - // 1. Provider-specific limit (if configured) - // 2. Global LLM limit (if configured) - - const providerName = context.provider?.toLowerCase(); - - // Check provider-specific limit first - if (providerName && this.config.providers?.[providerName]) { - const scopeId: RateLimitScopeId = { - type: "provider", - provider: providerName, - }; - const limiter = this.getLimiter(scopeId, this.config.providers[providerName]); - await limiter.acquire(); - return; - } - - // Check global LLM limit + async checkLLMRateLimit(): Promise { + // Check global LLM limit (if configured) if (this.config.llm) { const scopeId: RateLimitScopeId = { type: "global", @@ -88,13 +71,14 @@ export class RateLimitManager { maxTokensPerMinute?: number; strategy?: string; onExceeded?: "delay" | "throw"; - } + }, ): RateLimiter { const key = this.getScopeKey(scopeId); // Return existing limiter if already created - if (this.limiters.has(key)) { - return this.limiters.get(key)!; + const existingLimiter = this.limiters.get(key); + if (existingLimiter) { + return existingLimiter; } // Create new limiter @@ -120,7 +104,7 @@ export class RateLimitManager { maxTokensPerMinute?: number; strategy?: string; onExceeded?: "delay" | "throw"; - } + }, ): RateLimiter { const limit = config.maxRequestsPerMinute || 60; // Default 60 requests/min const strategy = config.strategy || "fixed_window"; @@ -129,7 +113,7 @@ export class RateLimitManager { // For MVP, only fixed_window is implemented if (strategy !== "fixed_window") { this.logger?.warn( - `Unsupported rate limit strategy: ${strategy}. Falling back to fixed_window` + `Unsupported rate limit strategy: ${strategy}. Falling back to fixed_window`, ); } @@ -148,12 +132,8 @@ export class RateLimitManager { switch (scopeId.type) { case "global": return "global:llm"; - case "agent": - return `agent:${scopeId.agentId}`; case "tool": return `tool:${scopeId.agentId}:${scopeId.toolName}`; - case "provider": - return `provider:${scopeId.provider}`; default: return "unknown"; } diff --git a/packages/core/src/rate-limit/types.ts b/packages/core/src/rate-limit/types.ts index c45a33b2d..baf75af57 100644 --- a/packages/core/src/rate-limit/types.ts +++ b/packages/core/src/rate-limit/types.ts @@ -5,8 +5,9 @@ /** * Available rate limiting strategies + * - 'fixed_window': Simple counter that resets after a fixed time period (MVP implementation) */ -export type RateLimitStrategy = "fixed_window" | "token_bucket" | "leaky_bucket"; +export type RateLimitStrategy = "fixed_window"; /** * Action to take when rate limit is exceeded @@ -17,12 +18,10 @@ export type RateLimitExceededAction = "delay" | "throw"; /** * Scope for rate limiting - * - 'global': Apply to all operations across all agents - * - 'agent': Apply per agent instance - * - 'tool': Apply per tool - * - 'provider': Apply per LLM provider + * - 'global': Apply to all LLM operations across all agents + * - 'tool': Apply per tool per agent */ -export type RateLimitScope = "global" | "agent" | "tool" | "provider"; +export type RateLimitScope = "global" | "tool"; /** * Statistics for current rate limit state @@ -38,20 +37,6 @@ export interface RateLimitStats { current: number; } -/** - * Base configuration for a rate limiter - */ -export interface RateLimitConfig { - /** Rate limiting strategy to use */ - strategy: RateLimitStrategy; - /** Maximum number of requests per window */ - limit: number; - /** Time window in milliseconds */ - windowMs: number; - /** Action when limit is exceeded */ - onExceeded?: RateLimitExceededAction; -} - /** * Configuration for LLM-specific rate limiting */ @@ -80,17 +65,10 @@ export interface ToolRateLimitConfig { /** * Configuration for provider-specific rate limiting + * NOTE: Provider-specific limits are not currently implemented. + * Use LLMRateLimitConfig global limits instead. */ -export interface ProviderRateLimitConfig { - /** Maximum requests per minute */ - maxRequestsPerMinute?: number; - /** Maximum tokens per minute (future enhancement) */ - maxTokensPerMinute?: number; - /** Rate limiting strategy */ - strategy?: RateLimitStrategy; - /** Action when limit exceeded */ - onExceeded?: RateLimitExceededAction; -} +// REMOVED: ProviderRateLimitConfig - not implemented in current version /** * Complete rate limiting configuration for an agent @@ -100,24 +78,6 @@ export interface AgentRateLimitConfig { llm?: LLMRateLimitConfig; /** Per-tool rate limits (keyed by tool name) */ tools?: Record; - /** Per-provider rate limits (keyed by provider name like 'openai', 'anthropic') */ - providers?: Record; -} - -/** - * Context for rate limit check - */ -export interface RateLimitContext { - /** Agent ID */ - agentId: string; - /** Operation type (llm or tool) */ - operationType: "llm" | "tool"; - /** Provider name (for LLM operations) */ - provider?: string; - /** Model name (for LLM operations) */ - model?: string; - /** Tool name (for tool operations) */ - toolName?: string; } /** @@ -154,14 +114,13 @@ export interface RateLimiter { /** * Scope identifier for rate limit manager + * Identifies which rate limiter to use for a specific operation */ export interface RateLimitScopeId { - /** Scope type */ - type: "global" | "agent" | "tool" | "provider"; - /** Agent ID (for agent/tool scopes) */ + /** Scope type - either global (all LLM calls) or tool-specific */ + type: "global" | "tool"; + /** Agent ID (required for tool scope) */ agentId?: string; - /** Tool name (for tool scope) */ + /** Tool name (required for tool scope) */ toolName?: string; - /** Provider name (for provider scope) */ - provider?: string; } From 486f11ba162a15d08330e65c1deed50ca78de057 Mon Sep 17 00:00:00 2001 From: sujal Date: Wed, 1 Apr 2026 21:03:28 +0530 Subject: [PATCH 4/6] feat: implement multiple rate limiting strategies with hook support --- examples/with-rate-limiting/README.md | 267 ++++++++++++++++++++------ 1 file changed, 205 insertions(+), 62 deletions(-) diff --git a/examples/with-rate-limiting/README.md b/examples/with-rate-limiting/README.md index 9ff104806..82262bbf7 100644 --- a/examples/with-rate-limiting/README.md +++ b/examples/with-rate-limiting/README.md @@ -1,107 +1,250 @@ -# Rate Limiting Example +# Rate Limiting in VoltAgent -This example demonstrates VoltAgent's rate limiting feature to control the frequency of LLM calls and tool executions. +This example demonstrates how to implement comprehensive rate limiting in VoltAgent agents, including different strategies, tool-specific limits, and monitoring hooks. ## Features Demonstrated -1. **Basic LLM Rate Limiting** - Limit requests per minute with error throwing -2. **Delay Strategy** - Automatic waiting when limits are exceeded -3. **Provider-Specific Limits** - Different limits for different LLM providers -4. **Tool Rate Limiting** - Control tool execution frequency -5. **Combined Limits** - Multiple rate limits working together -6. **Monitoring Stats** - Track rate limit usage in real-time +1. **Basic LLM Rate Limiting** - Enforce model request limits with error throwing +2. **Delay Strategy** - Automatically wait when rate limits are exceeded +3. **Tool-Specific Rate Limiting** - Set different limits for different tools +4. **Tool Rate Limiting** - Focus on individual tool request constraints +5. **Combined LLM + Tool Rate Limiting** - Apply limits at both agent and tool levels +6. **Rate Limit Statistics** - Monitor and track rate limit usage +7. **Hooks for Monitoring** - React to rate limit events with callbacks ## Installation ```bash +cd examples/with-rate-limiting pnpm install ``` -## Configuration +## Configuration Options -Set your OpenAI API key: +### Basic Rate Limit Configuration -```bash -export GOOGLE_GENERATIVE_AI_API_KEY=your_api_key_here +```typescript +const agent = new Agent({ + name: "limited-agent", + model: google("gemini-2.0-flash-exp"), + instructions: "You are a helpful assistant.", + rateLimits: { + llm: { + maxRequestsPerMinute: 5, + strategy: "fixed_window", // or "sliding_window", "token_bucket", "leaky_bucket" + onExceeded: "throw", // or "delay" + }, + }, +}); ``` +### Rate Limiting Strategies + +- **Fixed Window**: Simple per-minute counter reset +- **Sliding Window**: More accurate time-based windowing +- **Token Bucket**: Allows burst traffic while maintaining average rate +- **Leaky Bucket**: Smooth request distribution over time + +## Hooks for Monitoring + +Hooks allow you to react to rate limit events with custom callbacks: + +```typescript +const agent = new Agent({ + name: "hooked-agent", + model: google("gemini-2.0-flash-exp"), + instructions: "You are a helpful assistant.", + rateLimits: { + llm: { + maxRequestsPerMinute: 3, + onExceeded: "delay", + onLimitExceeded: async (context) => { + console.warn(`⚠️ Rate limit exceeded for scope: ${context.scope}`); + console.warn(` Remaining: ${context.stats.remaining}`); + console.warn(` Reset at: ${context.stats.resetAt}`); + }, + }, + tools: { + search_tool: { + maxRequestsPerMinute: 2, + onExceeded: "delay", + onLimitExceeded: async (context) => { + console.warn(`⚠️ Search tool rate limit exceeded!`); + // Send alerts, log to monitoring systems, etc. + }, + }, + }, + }, +}); +``` + +### Hook Context Properties + +When a hook is triggered, the context object contains: + +- `scope` - The scope where limit was exceeded (e.g., "llm", "tool:search_tool") +- `stats` - Rate limit statistics object with: + - `remaining` - Number of requests still available in current window + - `resetAt` - Timestamp when the rate limit window resets + +## Examples in Detail + +### Example 1: Basic LLM Rate Limiting + +Demonstrates enforcing a 3 requests-per-minute limit with error throwing. The 4th request will throw a `RateLimitExceededError` containing retry information. + +**Key Features:** + +- `maxRequestsPerMinute: 3` - Only 3 requests allowed per minute +- `onExceeded: "throw"` - Immediately throws error when limit exceeded +- Error includes `remaining` count and `resetAt` timestamp + +### Example 2: Delay Strategy + +Shows automatic waiting when limits are reached. The agent automatically delays the 3rd request until the rate limit window resets, providing a seamless experience. + +**Key Features:** + +- `maxRequestsPerMinute: 2` - Only 2 requests allowed per minute +- `onExceeded: "delay"` - Automatically waits instead of failing +- Transparent to the caller - request completes after waiting + +### Example 3: Tool-Specific Rate Limiting + +Sets different limits for different tools: + +- `search_tool`: 5 requests/minute +- `calculator`: 10 requests/minute + +Each tool has independent rate limit counters. + +**Key Features:** + +- Multiple tools with different limits +- Each tool tracked separately +- `onExceeded: "throw"` for strict enforcement + +### Example 4: Tool Rate Limiting + +Focuses on limiting a specific tool (`search_tool` at 2 requests/minute). Useful when a particular tool has stricter API rate limits. + +**Key Features:** + +- Single tool limiting +- `onExceeded: "throw"` strategy +- Tests agent behavior with multiple search requests + +### Example 5: Combined LLM + Tool Rate Limiting + +Applies limits at both levels: + +- Global LLM: 5 requests/minute with delay +- Individual tool: 3 requests/minute with delay + +The agent respects all configured limits. + +**Key Features:** + +- Both LLM and tool limits active +- Both use `onExceeded: "delay"` +- Demonstrates multi-level rate limiting + +### Example 6: Monitoring Rate Limit Statistics + +Demonstrates how to access and monitor rate limit state. Statistics are tracked internally and accessible through error properties when limits are exceeded. + +**Key Features:** + +- Rate limits are maintained internally +- Statistics available in error objects +- Useful for logging and monitoring + +### Example 7: Hooks for Rate Limit Monitoring ⭐ NEW + +Uses callback hooks to react to rate limit events in real-time: + +- `onLimitExceeded` - Called when a rate limit is exceeded + +This provides flexible, event-driven monitoring without blocking the agent flow. + +**Key Features:** + +- `maxRequestsPerMinute: 3` with 4 requests (triggers hook) +- Hook logs scope, remaining count, and reset timestamp +- Demonstrates hook triggering and context usage + ## Running Examples Edit `src/index.ts` and uncomment the examples you want to run in the `main()` function: ```typescript async function main() { - await example1_basicLLMRateLimit(); - await example2_delayStrategy(); - // ... etc + console.log("🚀 VoltAgent Rate Limiting Examples\n"); + + try { + await example1_basicLLMRateLimit(); + await example2_delayStrategy(); + await example3_toolRateLimiting(); + await example4_toolRateLimiting(); + await example5_combinedLimits(); + await example6_monitoringStats(); + await example7_hooksForMonitoring(); + + console.log("\n✓ Examples ready!"); + } catch (error) { + console.error("\n✗ Error:", error); + } } ``` Then run: ```bash -pnpm start +pnpm run dev ``` -## Rate Limit Configuration +## Error Handling -### LLM Rate Limiting +When using `onExceeded: "throw"`: ```typescript -rateLimits: { - llm: { - maxRequestsPerMinute: 10, - strategy: "fixed_window", - onExceeded: "throw" // or "delay" +try { + const response = await agent.generateText("What is AI?"); +} catch (error) { + if (error instanceof RateLimitExceededError) { + console.log(`Retry after ${error.retryAfter} seconds`); + console.log(`Remaining requests: ${error.stats.remaining}`); + console.log(`Reset at: ${error.stats.resetAt}`); } } ``` -### Provider-Specific Limits +When using `onExceeded: "delay"`: ```typescript -rateLimits: { - providers: { - openai: { - maxRequestsPerMinute: 5, - onExceeded: "throw" - }, - anthropic: { - maxRequestsPerMinute: 3, - onExceeded: "delay" - } - } -} +// The agent automatically waits for the rate limit window to reset +const response = await agent.generateText("What is AI?"); +// Request completes transparently after waiting ``` -### Tool Rate Limiting +## Best Practices -```typescript -rateLimits: { - tools: { - search_tool: { - maxRequestsPerMinute: 3, - onExceeded: "delay" - } - } -} -``` - -## Strategies +1. **Choose Appropriate Limits**: Set realistic limits based on your API quotas +2. **Monitor Hooks**: Use hooks to track rate limit violations and alert teams +3. **Combine Strategies**: Use different strategies at different levels (LLM vs tools) +4. **Plan for Bursts**: Token bucket strategy works well for bursty workloads +5. **Log Rate Limit Events**: Keep audit trails of rate limit violations +6. **Test Gracefully**: Verify behavior when limits are reached before production -### `onExceeded: "throw"` - -- Immediately throws `RateLimitExceededError` when limit is reached -- Good for strict enforcement and error handling - -### `onExceeded: "delay"` +## Use Cases -- Automatically waits until the rate limit resets -- Good for background jobs and retry scenarios +- **Cost Control**: Limit expensive LLM API calls to manage expenses +- **API Quota Management**: Stay within provider rate limits and quotas +- **Resource Protection**: Prevent tool overuse and API abuse +- **Fair Usage**: Distribute resources fairly across multiple agents +- **Compliance**: Ensure rate limits required by service agreements +- **Graceful Degradation**: Choose between failing fast or queuing requests -## Use Cases +## More Information -- **Cost Control**: Limit expensive LLM API calls -- **API Quota Management**: Stay within provider rate limits -- **Resource Protection**: Prevent tool overuse -- **Fair Usage**: Distribute resources across multiple agents +For more details on rate limiting strategies and implementation, see the VoltAgent core documentation. From a2682f9b50518ddc7c14150fab17aaed5646930c Mon Sep 17 00:00:00 2001 From: sujal Date: Wed, 1 Apr 2026 21:06:27 +0530 Subject: [PATCH 5/6] refactor: streamline delay handling in FixedWindowCounterLimiter --- .../src/rate-limit/limiters/fixed-window.ts | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/packages/core/src/rate-limit/limiters/fixed-window.ts b/packages/core/src/rate-limit/limiters/fixed-window.ts index b926a18a6..7be75c2f2 100644 --- a/packages/core/src/rate-limit/limiters/fixed-window.ts +++ b/packages/core/src/rate-limit/limiters/fixed-window.ts @@ -12,7 +12,7 @@ */ import { RateLimitExceededError } from "../errors"; -import type { RateLimiter, RateLimitExceededAction, RateLimitStats } from "../types"; +import type { RateLimitExceededAction, RateLimitStats, RateLimiter } from "../types"; export interface FixedWindowCounterConfig { /** Maximum requests per window */ @@ -26,7 +26,7 @@ export interface FixedWindowCounterConfig { } export class FixedWindowCounterLimiter implements RateLimiter { - private count: number = 0; + private count = 0; private windowStart: number; private readonly limit: number; private readonly windowMs: number; @@ -63,16 +63,15 @@ export class FixedWindowCounterLimiter implements RateLimiter { stats, scope: this.scope, }); - } else { - // Delay until window resets - const waitTime = this.windowStart + this.windowMs - now; - if (waitTime > 0) { - await this.delay(waitTime); - } - // After waiting, reset window and retry - this.count = 0; - this.windowStart = Date.now(); } + // Delay until window resets + const waitTime = this.windowStart + this.windowMs - now; + if (waitTime > 0) { + await this.delay(waitTime); + } + // After waiting, reset window and retry + this.count = 0; + this.windowStart = Date.now(); } // Increment counter From 1edd8d30dfb1bc07cfb5c4f99d9f379521fa9ff4 Mon Sep 17 00:00:00 2001 From: sujal Date: Wed, 1 Apr 2026 21:16:54 +0530 Subject: [PATCH 6/6] feat: add rate-limiting implementation in Agent class with hook support --- .changeset/young-rice-study.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .changeset/young-rice-study.md diff --git a/.changeset/young-rice-study.md b/.changeset/young-rice-study.md new file mode 100644 index 000000000..7d1514611 --- /dev/null +++ b/.changeset/young-rice-study.md @@ -0,0 +1,6 @@ +--- +"with-rate-limiting": minor +"@voltagent/core": minor +--- + +Implemented rate-limiting in Agent class and passed a hook