From 46ea0ce6b036fcb7a30e56c6c87e04430c0bdb51 Mon Sep 17 00:00:00 2001
From: sujal <sujalkesharwani220@gmail.com>
Date: Sat, 25 Oct 2025 17:37:45 +0530
Subject: [PATCH 1/6] feat: implement rate limiting to control external API
 requests

---
 packages/core/src/agent/agent.ts              |  72 +++++
 packages/core/src/agent/types.ts              |  31 +-
 packages/core/src/index.ts                    |  17 ++
 packages/core/src/rate-limit/errors.ts        |  54 ++++
 packages/core/src/rate-limit/index.ts         |  25 ++
 .../rate-limit/limiters/fixed-window.spec.ts  | 232 ++++++++++++++
 .../src/rate-limit/limiters/fixed-window.ts   | 135 +++++++++
 .../core/src/rate-limit/limiters/index.ts     |   5 +
 packages/core/src/rate-limit/manager.spec.ts  | 285 ++++++++++++++++++
 packages/core/src/rate-limit/manager.ts       | 185 ++++++++++++
 packages/core/src/rate-limit/types.ts         | 167 ++++++++++
 11 files changed, 1203 insertions(+), 5 deletions(-)
 create mode 100644 packages/core/src/rate-limit/errors.ts
 create mode 100644 packages/core/src/rate-limit/index.ts
 create mode 100644 packages/core/src/rate-limit/limiters/fixed-window.spec.ts
 create mode 100644 packages/core/src/rate-limit/limiters/fixed-window.ts
 create mode 100644 packages/core/src/rate-limit/limiters/index.ts
 create mode 100644 packages/core/src/rate-limit/manager.spec.ts
 create mode 100644 packages/core/src/rate-limit/manager.ts
 create mode 100644 packages/core/src/rate-limit/types.ts

diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts
index 52fc55671..c40dc91d9 100644
--- a/packages/core/src/agent/agent.ts
+++ b/packages/core/src/agent/agent.ts
@@ -37,6 +37,7 @@ import { ActionType, buildAgentLogMessage } from "../logger/message-builder";
 import type { Memory, MemoryUpdateMode } from "../memory";
 import { MemoryManager } from "../memory/manager/memory-manager";
 import { type VoltAgentObservability, createVoltAgentObservability } from "../observability";
+import { RateLimitManager } from "../rate-limit/manager";
 import { AgentRegistry } from "../registries/agent-registry";
 import type { BaseRetriever } from "../retriever/retriever";
 import type { Tool, Toolkit } from "../tool";
@@ -347,6 +348,7 @@ export class Agent {
   private defaultObservability?: VoltAgentObservability;
   private readonly toolManager: ToolManager;
   private readonly subAgentManager: SubAgentManager;
+  private readonly rateLimitManager?: RateLimitManager;
   private readonly voltOpsClient?: VoltOpsClient;
   private readonly prompts?: PromptHelper;
   private readonly evalConfig?: AgentEvalConfig;
@@ -416,6 +418,17 @@ export class Agent {
       this.supervisorConfig,
     );
 
+    // Initialize rate limit manager if configuration provided
+    if (options.rateLimits) {
+      this.rateLimitManager = new RateLimitManager(this.id, options.rateLimits, this.logger);
+      this.logger.debug("Rate limit manager initialized", {
+        event: LogEvents.AGENT_CREATED,
+        agentId: this.id,
+        hasLLMRateLimit: !!options.rateLimits.llm,
+        hasToolRateLimits: !!options.rateLimits.tools,
+      });
+    }
+
     // Initialize prompts helper with VoltOpsClient (agent's own or global)
     // Priority 1: Agent's own VoltOpsClient
     // Priority 2: Global VoltOpsClient from registry
@@ -526,6 +539,24 @@ export class Agent {
           tools: tools ? Object.keys(tools) : [],
         });
 
+        // Rate limit check before LLM call
+        if (this.rateLimitManager) {
+          // Extract provider from model if available
+          const provider = this.extractProviderFromModel(model);
+          const modelId = modelName;
+
+          await this.rateLimitManager.checkLLMRateLimit({
+            provider,
+            model: modelId,
+          });
+
+          methodLogger.debug("Rate limit check passed for LLM call", {
+            event: LogEvents.AGENT_GENERATION_STARTED,
+            provider,
+            model: modelId,
+          });
+        }
+
         // Extract VoltAgent-specific options
         const {
           userId,
@@ -740,6 +771,24 @@ export class Agent {
         // Setup abort signal listener
         this.setupAbortSignalListener(oc);
 
+        // Rate limit check before LLM call
+        if (this.rateLimitManager) {
+          // Extract provider from model if available
+          const provider = this.extractProviderFromModel(model);
+          const modelId = modelName;
+
+          await this.rateLimitManager.checkLLMRateLimit({
+            provider,
+            model: modelId,
+          });
+
+          methodLogger.debug("Rate limit check passed for stream call", {
+            event: LogEvents.AGENT_STREAM_STARTED,
+            provider,
+            model: modelId,
+          });
+        }
+
         // Extract VoltAgent-specific options
         const {
           userId,
@@ -1670,6 +1719,19 @@ export class Agent {
     };
   }
 
+  /**
+   * Extract provider name from AI SDK model
+   * Returns the provider identifier for rate limiting purposes
+   */
+  private extractProviderFromModel(model: LanguageModel): string {
+    // AI SDK models have a 'provider' property that identifies the provider
+    // e.g., "google.generative-ai", "openai", "anthropic"
+    if (typeof model === "object" && model !== null && "provider" in model) {
+      return String(model.provider);
+    }
+    return "unknown";
+  }
+
   /**
    * Common preparation for all execution methods
    */
@@ -2757,6 +2819,16 @@ export class Agent {
               // Call tool start hook - can throw ToolDeniedError
               await hooks.onToolStart?.({ agent: this, tool, context: oc, args });
 
+              // Rate limit check before tool execution
+              if (this.rateLimitManager) {
+                await this.rateLimitManager.checkToolRateLimit(tool.name);
+
+                oc.logger.debug("Rate limit check passed for tool execution", {
+                  event: LogEvents.AGENT_STEP_TOOL_CALL,
+                  toolName: tool.name,
+                });
+              }
+
               // Execute tool with OperationContext directly
               if (!tool.execute) {
                 throw new Error(`Tool ${tool.name} does not have "execute" method`);
diff --git a/packages/core/src/agent/types.ts b/packages/core/src/agent/types.ts
index dd5fb29d2..4e77c1029 100644
--- a/packages/core/src/agent/types.ts
+++ b/packages/core/src/agent/types.ts
@@ -18,17 +18,15 @@ import type { Voice } from "../voice/types";
 import type { VoltOpsClient } from "../voltops/client";
 import type { Agent } from "./agent";
 import type { CancellationError, VoltAgentError } from "./errors";
-import type { LLMProvider } from "./providers";
-import type { BaseTool } from "./providers";
-import type { StepWithContent } from "./providers";
+import type { BaseTool, LLMProvider, StepWithContent } from "./providers";
 import type { UsageInfo } from "./providers/base/types";
-import type { SubAgentConfig } from "./subagent/types";
-import type { VoltAgentTextStreamPart } from "./subagent/types";
+import type { SubAgentConfig, VoltAgentTextStreamPart } from "./subagent/types";
 
 import type { Logger } from "@voltagent/internal";
 import type { LocalScorerDefinition, SamplingPolicy } from "../eval/runtime";
 import type { MemoryOptions, MemoryStorageMetadata, WorkingMemorySummary } from "../memory/types";
 import type { VoltAgentObservability } from "../observability";
+import type { AgentRateLimitConfig } from "../rate-limit/types";
 import type {
   DynamicValue,
   DynamicValueOptions,
@@ -476,6 +474,29 @@ export type AgentOptions = {
 
   // Live evaluation configuration
   eval?: AgentEvalConfig;
+
+  // Rate limiting configuration
+  /**
+   * Rate limiting configuration for controlling request frequency
+   * Helps prevent exceeding API rate limits and manage costs
+   *
+   * @example
+   * ```typescript
+   * rateLimits: {
+   *   llm: {
+   *     maxRequestsPerMinute: 10,
+   *     strategy: 'fixed_window',
+   *     onExceeded: 'delay'
+   *   },
+   *   tools: {
+   *     'search_tool': {
+   *       maxRequestsPerMinute: 5
+   *     }
+   *   }
+   * }
+   * ```
+   */
+  rateLimits?: AgentRateLimitConfig;
 };
 
 export type AgentEvalOperationType =
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index bbfe37b9b..a2e8c70a0 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -116,6 +116,23 @@ export {
   ConversationNotFoundError,
 } from "./memory";
 
+// Rate Limiting exports
+export { RateLimitExceededError } from "./rate-limit/errors";
+export { FixedWindowCounterLimiter } from "./rate-limit/limiters/fixed-window";
+export { RateLimitManager } from "./rate-limit/manager";
+export type {
+  AgentRateLimitConfig,
+  LLMRateLimitConfig,
+  RateLimitConfig,
+  RateLimitContext,
+  RateLimiter,
+  RateLimitExceededAction,
+  RateLimitScope,
+  RateLimitStats,
+  RateLimitStrategy,
+  ToolRateLimitConfig,
+} from "./rate-limit/types";
+
 // Export adapters from subdirectories
 export { InMemoryStorageAdapter } from "./memory/adapters/storage/in-memory";
 export { InMemoryVectorAdapter } from "./memory/adapters/vector/in-memory";
diff --git a/packages/core/src/rate-limit/errors.ts b/packages/core/src/rate-limit/errors.ts
new file mode 100644
index 000000000..a4ffcae12
--- /dev/null
+++ b/packages/core/src/rate-limit/errors.ts
@@ -0,0 +1,54 @@
+/**
+ * Custom errors for rate limiting
+ */
+
+import type { RateLimitStats } from "./types";
+
+/**
+ * Error thrown when rate limit is exceeded and onExceeded='throw'
+ */
+export class RateLimitExceededError extends Error {
+  /** Current rate limit statistics */
+  public readonly stats: RateLimitStats;
+  /** When the rate limit will reset */
+  public readonly resetAt: Date;
+  /** Scope that was rate limited */
+  public readonly scope: string;
+
+  /**
+   * Milliseconds until reset (dynamically calculated)
+   * Always returns fresh value based on current time
+   */
+  public get retryAfter(): number {
+    return Math.max(0, this.resetAt.getTime() - Date.now());
+  }
+
+  constructor(params: { stats: RateLimitStats; scope: string; message?: string }) {
+    const defaultMessage = `Rate limit exceeded for ${params.scope}. Limit: ${params.stats.limit} requests. Resets at ${params.stats.resetAt.toISOString()}`;
+    super(params.message || defaultMessage);
+
+    this.name = "RateLimitExceededError";
+    this.stats = params.stats;
+    this.resetAt = params.stats.resetAt;
+    this.scope = params.scope;
+
+    // Maintains proper stack trace for where error was thrown (V8 only)
+    if (Error.captureStackTrace) {
+      Error.captureStackTrace(this, RateLimitExceededError);
+    }
+  }
+
+  /**
+   * Get a user-friendly error message
+   */
+  toJSON() {
+    return {
+      name: this.name,
+      message: this.message,
+      scope: this.scope,
+      retryAfter: this.retryAfter,
+      resetAt: this.resetAt.toISOString(),
+      stats: this.stats,
+    };
+  }
+}
diff --git a/packages/core/src/rate-limit/index.ts b/packages/core/src/rate-limit/index.ts
new file mode 100644
index 000000000..b0b955c35
--- /dev/null
+++ b/packages/core/src/rate-limit/index.ts
@@ -0,0 +1,25 @@
+/**
+ * Rate Limiting Module
+ *
+ * Provides configurable rate limiting for LLM calls and tool executions.
+ * Prevents exceeding API rate limits and enables cost control.
+ */
+
+export { RateLimitExceededError } from "./errors";
+export { FixedWindowCounterLimiter } from "./limiters/fixed-window";
+export { RateLimitManager } from "./manager";
+
+export type {
+  AgentRateLimitConfig,
+  LLMRateLimitConfig,
+  ProviderRateLimitConfig,
+  RateLimitConfig,
+  RateLimitContext,
+  RateLimiter,
+  RateLimitExceededAction,
+  RateLimitScope,
+  RateLimitScopeId,
+  RateLimitStats,
+  RateLimitStrategy,
+  ToolRateLimitConfig,
+} from "./types";
diff --git a/packages/core/src/rate-limit/limiters/fixed-window.spec.ts b/packages/core/src/rate-limit/limiters/fixed-window.spec.ts
new file mode 100644
index 000000000..331a656e9
--- /dev/null
+++ b/packages/core/src/rate-limit/limiters/fixed-window.spec.ts
@@ -0,0 +1,232 @@
+/**
+ * Tests for FixedWindowCounterLimiter
+ */
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { RateLimitExceededError } from "../errors";
+import type { RateLimitConfig } from "../types";
+import { FixedWindowCounterLimiter } from "./fixed-window";
+
+describe("FixedWindowCounterLimiter", () => {
+  let limiter: FixedWindowCounterLimiter;
+  let config: RateLimitConfig;
+
+  beforeEach(() => {
+    vi.useFakeTimers();
+    config = {
+      strategy: "fixed_window",
+      limit: 5,
+      windowMs: 60000, // 1 minute
+      onExceeded: "throw",
+    };
+    limiter = new FixedWindowCounterLimiter(config);
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  describe("constructor", () => {
+    it("should initialize with provided config", () => {
+      expect(limiter).toBeDefined();
+      const stats = limiter.getStats();
+      expect(stats.limit).toBe(5);
+      expect(stats.remaining).toBe(5);
+      expect(stats.current).toBe(0);
+    });
+  });
+
+  describe("acquire", () => {
+    it("should allow requests within limit", async () => {
+      await expect(limiter.acquire()).resolves.not.toThrow();
+      await expect(limiter.acquire()).resolves.not.toThrow();
+      await expect(limiter.acquire()).resolves.not.toThrow();
+
+      const stats = limiter.getStats();
+      expect(stats.current).toBe(3);
+      expect(stats.remaining).toBe(2);
+    });
+
+    it("should throw error when limit exceeded and onExceeded=throw", async () => {
+      // Use up all requests
+      for (let i = 0; i < 5; i++) {
+        await limiter.acquire();
+      }
+
+      // Next request should throw
+      await expect(limiter.acquire()).rejects.toThrow(RateLimitExceededError);
+    });
+
+    it("should delay when limit exceeded and onExceeded=delay", async () => {
+      const delayLimiter = new FixedWindowCounterLimiter({
+        ...config,
+        onExceeded: "delay",
+      });
+
+      // Use up all requests
+      for (let i = 0; i < 5; i++) {
+        await delayLimiter.acquire();
+      }
+
+      // Next request should delay
+      const acquirePromise = delayLimiter.acquire();
+
+      // Should not resolve immediately
+      let resolved = false;
+      acquirePromise.then(() => {
+        resolved = true;
+      });
+
+      await vi.advanceTimersByTimeAsync(100);
+      expect(resolved).toBe(false);
+
+      // Advance time to reset window
+      await vi.advanceTimersByTimeAsync(60000);
+      await acquirePromise;
+      expect(resolved).toBe(true);
+    });
+
+    it("should reset counter when window expires", async () => {
+      // Use 3 requests
+      await limiter.acquire();
+      await limiter.acquire();
+      await limiter.acquire();
+
+      expect(limiter.getStats().current).toBe(3);
+
+      // Advance time past window
+      await vi.advanceTimersByTimeAsync(60000);
+
+      // Next acquire should reset counter
+      await limiter.acquire();
+      const stats = limiter.getStats();
+      expect(stats.current).toBe(1);
+      expect(stats.remaining).toBe(4);
+    });
+  });
+
+  describe("check", () => {
+    it("should return true when requests are available", async () => {
+      expect(limiter.check()).toBe(true);
+      await limiter.acquire();
+      expect(limiter.check()).toBe(true);
+    });
+    it("should return false when limit is reached", async () => {
+      // Use up all requests
+      for (let i = 0; i < 5; i++) {
+        await limiter.acquire();
+      }
+
+      expect(limiter.check()).toBe(false);
+    });
+
+    it("should return true after window reset", async () => {
+      // Use up all requests
+      for (let i = 0; i < 5; i++) {
+        await limiter.acquire();
+      }
+
+      expect(limiter.check()).toBe(false);
+
+      // Advance time past window
+      await vi.advanceTimersByTimeAsync(60000);
+
+      expect(limiter.check()).toBe(true);
+    });
+  });
+
+  describe("getStats", () => {
+    it("should return accurate statistics", async () => {
+      await limiter.acquire();
+      await limiter.acquire();
+
+      const stats = limiter.getStats();
+      expect(stats.limit).toBe(5);
+      expect(stats.current).toBe(2);
+      expect(stats.remaining).toBe(3);
+      expect(stats.resetAt).toBeInstanceOf(Date);
+    });
+
+    it("should update resetAt based on windowMs", async () => {
+      const now = Date.now();
+      await limiter.acquire();
+
+      const stats = limiter.getStats();
+      const resetTime = stats.resetAt.getTime();
+
+      // Reset time should be approximately now + windowMs
+      expect(resetTime).toBeGreaterThanOrEqual(now + config.windowMs - 100);
+      expect(resetTime).toBeLessThanOrEqual(now + config.windowMs + 100);
+    });
+  });
+
+  describe("reset", () => {
+    it("should reset counter and window", async () => {
+      // Use some requests
+      await limiter.acquire();
+      await limiter.acquire();
+      await limiter.acquire();
+
+      expect(limiter.getStats().current).toBe(3);
+
+      limiter.reset();
+
+      const stats = limiter.getStats();
+      expect(stats.current).toBe(0);
+      expect(stats.remaining).toBe(5);
+    });
+
+    it("should allow requests after reset", async () => {
+      // Use up all requests
+      for (let i = 0; i < 5; i++) {
+        await limiter.acquire();
+      }
+
+      expect(limiter.check()).toBe(false);
+
+      limiter.reset();
+
+      expect(limiter.check()).toBe(true);
+      await expect(limiter.acquire()).resolves.not.toThrow();
+    });
+  });
+
+  describe("edge cases", () => {
+    it("should handle limit of 1", async () => {
+      const singleLimiter = new FixedWindowCounterLimiter({
+        ...config,
+        limit: 1,
+      });
+
+      await expect(singleLimiter.acquire()).resolves.not.toThrow();
+      await expect(singleLimiter.acquire()).rejects.toThrow(RateLimitExceededError);
+    });
+
+    it("should handle very short windows", async () => {
+      const shortWindowLimiter = new FixedWindowCounterLimiter({
+        ...config,
+        windowMs: 100, // 100ms window
+      });
+
+      await shortWindowLimiter.acquire();
+      expect(shortWindowLimiter.check()).toBe(true);
+
+      // Advance past window
+      await vi.advanceTimersByTimeAsync(100);
+
+      await shortWindowLimiter.acquire();
+      expect(shortWindowLimiter.getStats().current).toBe(1);
+    });
+
+    it("should handle concurrent requests", async () => {
+      const promises = Array.from({ length: 5 }, () => limiter.acquire());
+      await expect(Promise.all(promises)).resolves.not.toThrow();
+
+      const stats = limiter.getStats();
+      expect(stats.current).toBe(5);
+      expect(stats.remaining).toBe(0);
+
+      await expect(limiter.acquire()).rejects.toThrow(RateLimitExceededError);
+    });
+  });
+});
diff --git a/packages/core/src/rate-limit/limiters/fixed-window.ts b/packages/core/src/rate-limit/limiters/fixed-window.ts
new file mode 100644
index 000000000..b926a18a6
--- /dev/null
+++ b/packages/core/src/rate-limit/limiters/fixed-window.ts
@@ -0,0 +1,135 @@
+/**
+ * Fixed Window Counter Rate Limiter
+ *
+ * Implements a simple fixed-window rate limiting algorithm:
+ * - Tracks number of requests in a fixed time window
+ * - Resets counter when window expires
+ * - Efficient and easy to understand
+ *
+ * Example: 10 requests per minute
+ * - Window: 0:00 - 1:00, allows 10 requests
+ * - At 1:00, window resets, allows 10 more requests
+ */
+
+import { RateLimitExceededError } from "../errors";
+import type { RateLimiter, RateLimitExceededAction, RateLimitStats } from "../types";
+
+export interface FixedWindowCounterConfig {
+  /** Maximum requests per window */
+  limit: number;
+  /** Window duration in milliseconds */
+  windowMs: number;
+  /** Action when limit exceeded */
+  onExceeded?: RateLimitExceededAction;
+  /** Scope identifier for error messages */
+  scope?: string;
+}
+
+export class FixedWindowCounterLimiter implements RateLimiter {
+  private count: number = 0;
+  private windowStart: number;
+  private readonly limit: number;
+  private readonly windowMs: number;
+  private readonly onExceeded: RateLimitExceededAction;
+  private readonly scope: string;
+
+  constructor(config: FixedWindowCounterConfig) {
+    this.limit = config.limit;
+    this.windowMs = config.windowMs;
+    this.onExceeded = config.onExceeded || "delay";
+    this.scope = config.scope || "unknown";
+    this.windowStart = Date.now();
+  }
+
+  /**
+   * Acquire permission to proceed
+   * Either waits or throws based on onExceeded configuration
+   */
+  async acquire(): Promise<void> {
+    const now = Date.now();
+
+    // Reset window if expired
+    if (now - this.windowStart >= this.windowMs) {
+      this.count = 0;
+      this.windowStart = now;
+    }
+
+    // Check if we're over the limit
+    if (this.count >= this.limit) {
+      const stats = this.getStats();
+
+      if (this.onExceeded === "throw") {
+        throw new RateLimitExceededError({
+          stats,
+          scope: this.scope,
+        });
+      } else {
+        // Delay until window resets
+        const waitTime = this.windowStart + this.windowMs - now;
+        if (waitTime > 0) {
+          await this.delay(waitTime);
+        }
+        // After waiting, reset window and retry
+        this.count = 0;
+        this.windowStart = Date.now();
+      }
+    }
+
+    // Increment counter
+    this.count++;
+  }
+
+  /**
+   * Check if request can proceed without blocking
+   */
+  check(): boolean {
+    const now = Date.now();
+
+    // Reset window if expired
+    if (now - this.windowStart >= this.windowMs) {
+      return true;
+    }
+
+    return this.count < this.limit;
+  }
+
+  /**
+   * Get current statistics
+   */
+  getStats(): RateLimitStats {
+    const now = Date.now();
+    const resetAt = new Date(this.windowStart + this.windowMs);
+
+    // If window has expired, return fresh stats
+    if (now - this.windowStart >= this.windowMs) {
+      return {
+        remaining: this.limit,
+        limit: this.limit,
+        current: 0,
+        resetAt: new Date(now + this.windowMs),
+      };
+    }
+
+    return {
+      remaining: Math.max(0, this.limit - this.count),
+      limit: this.limit,
+      current: this.count,
+      resetAt,
+    };
+  }
+
+  /**
+   * Reset the limiter state
+   */
+  reset(): void {
+    this.count = 0;
+    this.windowStart = Date.now();
+  }
+
+  /**
+   * Internal delay helper
+   */
+  private delay(ms: number): Promise<void> {
+    return new Promise((resolve) => setTimeout(resolve, ms));
+  }
+}
diff --git a/packages/core/src/rate-limit/limiters/index.ts b/packages/core/src/rate-limit/limiters/index.ts
new file mode 100644
index 000000000..ea8363d40
--- /dev/null
+++ b/packages/core/src/rate-limit/limiters/index.ts
@@ -0,0 +1,5 @@
+/**
+ * Rate Limiter Implementations
+ */
+
+export { FixedWindowCounterLimiter } from "./fixed-window";
diff --git a/packages/core/src/rate-limit/manager.spec.ts b/packages/core/src/rate-limit/manager.spec.ts
new file mode 100644
index 000000000..6b4d43f4e
--- /dev/null
+++ b/packages/core/src/rate-limit/manager.spec.ts
@@ -0,0 +1,285 @@
+/**
+ * Tests for RateLimitManager
+ */
+
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { RateLimitExceededError } from "./errors";
+import { RateLimitManager } from "./manager";
+import type { AgentRateLimitConfig } from "./types";
+
+describe("RateLimitManager", () => {
+  let manager: RateLimitManager;
+  let config: AgentRateLimitConfig;
+
+  beforeEach(() => {
+    vi.useFakeTimers();
+    config = {
+      llm: {
+        maxRequestsPerMinute: 10,
+        strategy: "fixed_window",
+        onExceeded: "throw",
+      },
+      providers: {
+        openai: {
+          maxRequestsPerMinute: 5,
+          strategy: "fixed_window",
+          onExceeded: "throw",
+        },
+      },
+      tools: {
+        search_tool: {
+          maxRequestsPerMinute: 3,
+          strategy: "fixed_window",
+          onExceeded: "throw",
+        },
+      },
+    };
+    manager = new RateLimitManager("test-agent", config);
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  describe("constructor", () => {
+    it("should initialize with provided config", () => {
+      expect(manager).toBeDefined();
+    });
+  });
+
+  describe("checkLLMRateLimit", () => {
+    it("should allow requests within global LLM limit", async () => {
+      for (let i = 0; i < 10; i++) {
+        await expect(
+          manager.checkLLMRateLimit({ provider: "unknown", model: "test-model" })
+        ).resolves.not.toThrow();
+      }
+
+      // 11th request should throw
+      await expect(
+        manager.checkLLMRateLimit({ provider: "unknown", model: "test-model" })
+      ).rejects.toThrow(RateLimitExceededError);
+    });
+
+    it("should prioritize provider-specific limit over global limit", async () => {
+      // OpenAI has limit of 5, global has 10
+      for (let i = 0; i < 5; i++) {
+        await expect(
+          manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
+        ).resolves.not.toThrow();
+      }
+
+      // 6th OpenAI request should throw
+      await expect(
+        manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
+      ).rejects.toThrow(RateLimitExceededError);
+    });
+
+    it("should handle different providers independently", async () => {
+      // Use 5 OpenAI requests
+      for (let i = 0; i < 5; i++) {
+        await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+      }
+
+      // Claude should still have full global limit available
+      for (let i = 0; i < 10; i++) {
+        await expect(
+          manager.checkLLMRateLimit({ provider: "anthropic", model: "claude-3" })
+        ).resolves.not.toThrow();
+      }
+    });
+
+    it("should allow requests when no limit configured", async () => {
+      const noLimitManager = new RateLimitManager("test-agent", {});
+
+      for (let i = 0; i < 100; i++) {
+        await expect(
+          noLimitManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
+        ).resolves.not.toThrow();
+      }
+    });
+
+    it("should handle provider name case-insensitively", async () => {
+      await manager.checkLLMRateLimit({ provider: "OpenAI", model: "gpt-4" });
+      await manager.checkLLMRateLimit({ provider: "OPENAI", model: "gpt-4" });
+      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+
+      const stats = manager.getAllStats();
+      // Should only have one limiter for openai
+      const openaiKeys = Object.keys(stats).filter((key) => key.includes("openai"));
+      expect(openaiKeys.length).toBe(1);
+    });
+  });
+
+  describe("checkToolRateLimit", () => {
+    it("should allow requests within tool limit", async () => {
+      for (let i = 0; i < 3; i++) {
+        await expect(manager.checkToolRateLimit("search_tool")).resolves.not.toThrow();
+      }
+
+      // 4th request should throw
+      await expect(manager.checkToolRateLimit("search_tool")).rejects.toThrow(
+        RateLimitExceededError
+      );
+    });
+
+    it("should allow unlimited requests for unconfigured tools", async () => {
+      for (let i = 0; i < 100; i++) {
+        await expect(manager.checkToolRateLimit("unconfigured_tool")).resolves.not.toThrow();
+      }
+    });
+
+    it("should handle different tools independently", async () => {
+      // Use up search_tool limit
+      for (let i = 0; i < 3; i++) {
+        await manager.checkToolRateLimit("search_tool");
+      }
+
+      // Other tools should work fine
+      await expect(manager.checkToolRateLimit("other_tool")).resolves.not.toThrow();
+    });
+  });
+
+  describe("getAllStats", () => {
+    it("should return stats for all active limiters", async () => {
+      // Trigger creation of different limiters
+      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+      await manager.checkLLMRateLimit({ provider: "anthropic", model: "claude-3" });
+      await manager.checkToolRateLimit("search_tool");
+
+      const stats = manager.getAllStats();
+      expect(Object.keys(stats).length).toBeGreaterThan(0);
+
+      for (const [key, stat] of Object.entries(stats)) {
+        expect(key).toBeTruthy();
+        expect(stat).toHaveProperty("limit");
+        expect(stat).toHaveProperty("current");
+        expect(stat).toHaveProperty("remaining");
+        expect(stat).toHaveProperty("resetAt");
+      }
+    });
+
+    it("should return empty object when no limiters created", () => {
+      const stats = manager.getAllStats();
+      expect(Object.keys(stats).length).toBe(0);
+    });
+  });
+
+  describe("resetAll", () => {
+    it("should reset all active limiters", async () => {
+      // Use some requests
+      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+      await manager.checkToolRateLimit("search_tool");
+
+      manager.resetAll();
+
+      const stats = manager.getAllStats();
+      for (const stat of Object.values(stats)) {
+        expect(stat.current).toBe(0);
+        expect(stat.remaining).toBe(stat.limit);
+      }
+    });
+
+    it("should allow requests after reset", async () => {
+      // Use up openai limit
+      for (let i = 0; i < 5; i++) {
+        await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+      }
+
+      await expect(
+        manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
+      ).rejects.toThrow(RateLimitExceededError);
+
+      manager.resetAll();
+
+      await expect(
+        manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
+      ).resolves.not.toThrow();
+    });
+  });
+
+  describe("limiter creation", () => {
+    it("should create limiter on first access", async () => {
+      const stats1 = manager.getAllStats();
+      expect(Object.keys(stats1).length).toBe(0);
+
+      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+
+      const stats2 = manager.getAllStats();
+      expect(Object.keys(stats2).length).toBe(1);
+    });
+
+    it("should reuse existing limiter", async () => {
+      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+
+      const stats = manager.getAllStats();
+      expect(Object.keys(stats).length).toBe(1); // Only one limiter should exist
+
+      const openaiStats = Object.values(stats)[0];
+      expect(openaiStats.current).toBe(2); // Both requests counted
+    });
+  });
+
+  describe("configuration validation", () => {
+    it("should handle missing maxRequestsPerMinute gracefully", async () => {
+      const invalidConfig = {
+        llm: {
+          strategy: "fixed_window" as const,
+          onExceeded: "throw" as const,
+          // Missing maxRequestsPerMinute - should be allowed through without rate limiting
+        },
+      };
+
+      const invalidManager = new RateLimitManager("test", invalidConfig);
+
+      // Should not throw - when config is invalid/incomplete, it allows requests through
+      // This is a graceful degradation approach
+      await expect(
+        invalidManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
+      ).resolves.not.toThrow();
+
+      // Verify multiple requests are allowed (no rate limiting applied)
+      for (let i = 0; i < 10; i++) {
+        await expect(
+          invalidManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
+        ).resolves.not.toThrow();
+      }
+    });
+  });
+
+  describe("delay behavior", () => {
+    it("should delay requests when onExceeded=delay", async () => {
+      const delayConfig: AgentRateLimitConfig = {
+        llm: {
+          maxRequestsPerMinute: 2,
+          strategy: "fixed_window",
+          onExceeded: "delay",
+        },
+      };
+
+      const delayManager = new RateLimitManager("test-agent", delayConfig);
+
+      // Use up limit
+      await delayManager.checkLLMRateLimit({ provider: "test", model: "test" });
+      await delayManager.checkLLMRateLimit({ provider: "test", model: "test" });
+
+      // Next request should delay
+      const checkPromise = delayManager.checkLLMRateLimit({ provider: "test", model: "test" });
+
+      let resolved = false;
+      checkPromise.then(() => {
+        resolved = true;
+      });
+
+      await vi.advanceTimersByTimeAsync(100);
+      expect(resolved).toBe(false);
+
+      // Advance to reset
+      await vi.advanceTimersByTimeAsync(60000);
+      await checkPromise;
+      expect(resolved).toBe(true);
+    });
+  });
+});
diff --git a/packages/core/src/rate-limit/manager.ts b/packages/core/src/rate-limit/manager.ts
new file mode 100644
index 000000000..09e1f7822
--- /dev/null
+++ b/packages/core/src/rate-limit/manager.ts
@@ -0,0 +1,185 @@
+/**
+ * Rate Limit Manager
+ *
+ * Central manager for all rate limiters in an agent.
+ * Responsible for:
+ * - Creating and managing rate limiters for different scopes
+ * - Providing unified API for rate limit checks
+ * - Configuration-based limiter instantiation
+ */
+
+import type { Logger } from "@voltagent/internal";
+import { FixedWindowCounterLimiter } from "./limiters/fixed-window";
+import type { AgentRateLimitConfig, RateLimiter, RateLimitScopeId } from "./types";
+
+export class RateLimitManager {
+  private limiters: Map<string, RateLimiter> = new Map();
+  private readonly config: AgentRateLimitConfig;
+  private readonly logger?: Logger;
+  private readonly agentId: string;
+
+  constructor(agentId: string, config: AgentRateLimitConfig, logger?: Logger) {
+    this.agentId = agentId;
+    this.config = config;
+    this.logger = logger;
+  }
+
+  /**
+   * Check rate limit for LLM call
+   * This is called before generateText/streamText
+   */
+  async checkLLMRateLimit(context: { provider?: string; model?: string }): Promise<void> {
+    // Priority order:
+    // 1. Provider-specific limit (if configured)
+    // 2. Global LLM limit (if configured)
+
+    const providerName = context.provider?.toLowerCase();
+
+    // Check provider-specific limit first
+    if (providerName && this.config.providers?.[providerName]) {
+      const scopeId: RateLimitScopeId = {
+        type: "provider",
+        provider: providerName,
+      };
+      const limiter = this.getLimiter(scopeId, this.config.providers[providerName]);
+      await limiter.acquire();
+      return;
+    }
+
+    // Check global LLM limit
+    if (this.config.llm) {
+      const scopeId: RateLimitScopeId = {
+        type: "global",
+      };
+      const limiter = this.getLimiter(scopeId, this.config.llm);
+      await limiter.acquire();
+    }
+
+    // No rate limit configured - allow through
+  }
+
+  /**
+   * Check rate limit for tool execution
+   * This is called before tool.execute()
+   */
+  async checkToolRateLimit(toolName: string): Promise<void> {
+    if (!this.config.tools?.[toolName]) {
+      // No rate limit configured for this tool
+      return;
+    }
+
+    const scopeId: RateLimitScopeId = {
+      type: "tool",
+      agentId: this.agentId,
+      toolName,
+    };
+
+    const limiter = this.getLimiter(scopeId, this.config.tools[toolName]);
+    await limiter.acquire();
+  }
+
+  /**
+   * Get or create a rate limiter for a specific scope
+   */
+  private getLimiter(
+    scopeId: RateLimitScopeId,
+    config: {
+      maxRequestsPerMinute?: number;
+      maxTokensPerMinute?: number;
+      strategy?: string;
+      onExceeded?: "delay" | "throw";
+    }
+  ): RateLimiter {
+    const key = this.getScopeKey(scopeId);
+
+    // Return existing limiter if already created
+    if (this.limiters.has(key)) {
+      return this.limiters.get(key)!;
+    }
+
+    // Create new limiter
+    const limiter = this.createLimiter(scopeId, config);
+    this.limiters.set(key, limiter);
+
+    this.logger?.debug("Rate limiter created", {
+      scope: scopeId,
+      limit: config.maxRequestsPerMinute,
+      strategy: config.strategy || "fixed_window",
+    });
+
+    return limiter;
+  }
+
+  /**
+   * Create a new rate limiter instance based on configuration
+   */
+  private createLimiter(
+    scopeId: RateLimitScopeId,
+    config: {
+      maxRequestsPerMinute?: number;
+      maxTokensPerMinute?: number;
+      strategy?: string;
+      onExceeded?: "delay" | "throw";
+    }
+  ): RateLimiter {
+    const limit = config.maxRequestsPerMinute || 60; // Default 60 requests/min
+    const strategy = config.strategy || "fixed_window";
+    const onExceeded = config.onExceeded || "delay";
+
+    // For MVP, only fixed_window is implemented
+    if (strategy !== "fixed_window") {
+      this.logger?.warn(
+        `Unsupported rate limit strategy: ${strategy}. Falling back to fixed_window`
+      );
+    }
+
+    return new FixedWindowCounterLimiter({
+      limit,
+      windowMs: 60000, // 1 minute window
+      onExceeded,
+      scope: this.getScopeKey(scopeId),
+    });
+  }
+
+  /**
+   * Generate a unique key for a rate limit scope
+   */
+  private getScopeKey(scopeId: RateLimitScopeId): string {
+    switch (scopeId.type) {
+      case "global":
+        return "global:llm";
+      case "agent":
+        return `agent:${scopeId.agentId}`;
+      case "tool":
+        return `tool:${scopeId.agentId}:${scopeId.toolName}`;
+      case "provider":
+        return `provider:${scopeId.provider}`;
+      default:
+        return "unknown";
+    }
+  }
+
+  /**
+   * Get statistics for all active rate limiters
+   */
+  getAllStats(): Record<string, ReturnType<RateLimiter["getStats"]>> {
+    const stats: Record<string, ReturnType<RateLimiter["getStats"]>> = {};
+
+    for (const [key, limiter] of this.limiters.entries()) {
+      stats[key] = limiter.getStats();
+    }
+
+    return stats;
+  }
+
+  /**
+   * Reset all rate limiters
+   * Useful for testing or manual intervention
+   */
+  resetAll(): void {
+    for (const limiter of this.limiters.values()) {
+      limiter.reset();
+    }
+    this.logger?.debug("All rate limiters reset");
+  }
+}
diff --git a/packages/core/src/rate-limit/types.ts b/packages/core/src/rate-limit/types.ts
new file mode 100644
index 000000000..c45a33b2d
--- /dev/null
+++ b/packages/core/src/rate-limit/types.ts
@@ -0,0 +1,167 @@
+/**
+ * Rate limiting types and interfaces
+ * Implements configurable rate limiting for LLM calls and tool executions
+ */
+
+/**
+ * Available rate limiting strategies
+ */
+export type RateLimitStrategy = "fixed_window" | "token_bucket" | "leaky_bucket";
+
+/**
+ * Action to take when rate limit is exceeded
+ * - 'delay': Wait until rate limit resets (queue the request)
+ * - 'throw': Immediately throw RateLimitExceededError
+ */
+export type RateLimitExceededAction = "delay" | "throw";
+
+/**
+ * Scope for rate limiting
+ * - 'global': Apply to all operations across all agents
+ * - 'agent': Apply per agent instance
+ * - 'tool': Apply per tool
+ * - 'provider': Apply per LLM provider
+ */
+export type RateLimitScope = "global" | "agent" | "tool" | "provider";
+
+/**
+ * Statistics for current rate limit state
+ */
+export interface RateLimitStats {
+  /** Number of requests remaining in current window */
+  remaining: number;
+  /** Total limit per window */
+  limit: number;
+  /** When the current window resets */
+  resetAt: Date;
+  /** Current request count in window */
+  current: number;
+}
+
+/**
+ * Base configuration for a rate limiter
+ */
+export interface RateLimitConfig {
+  /** Rate limiting strategy to use */
+  strategy: RateLimitStrategy;
+  /** Maximum number of requests per window */
+  limit: number;
+  /** Time window in milliseconds */
+  windowMs: number;
+  /** Action when limit is exceeded */
+  onExceeded?: RateLimitExceededAction;
+}
+
+/**
+ * Configuration for LLM-specific rate limiting
+ */
+export interface LLMRateLimitConfig {
+  /** Maximum requests per minute */
+  maxRequestsPerMinute?: number;
+  /** Maximum tokens per minute (future enhancement) */
+  maxTokensPerMinute?: number;
+  /** Rate limiting strategy */
+  strategy?: RateLimitStrategy;
+  /** Action when limit exceeded */
+  onExceeded?: RateLimitExceededAction;
+}
+
+/**
+ * Configuration for tool-specific rate limiting
+ */
+export interface ToolRateLimitConfig {
+  /** Maximum requests per minute */
+  maxRequestsPerMinute: number;
+  /** Rate limiting strategy */
+  strategy?: RateLimitStrategy;
+  /** Action when limit exceeded */
+  onExceeded?: RateLimitExceededAction;
+}
+
+/**
+ * Configuration for provider-specific rate limiting
+ */
+export interface ProviderRateLimitConfig {
+  /** Maximum requests per minute */
+  maxRequestsPerMinute?: number;
+  /** Maximum tokens per minute (future enhancement) */
+  maxTokensPerMinute?: number;
+  /** Rate limiting strategy */
+  strategy?: RateLimitStrategy;
+  /** Action when limit exceeded */
+  onExceeded?: RateLimitExceededAction;
+}
+
+/**
+ * Complete rate limiting configuration for an agent
+ */
+export interface AgentRateLimitConfig {
+  /** Global LLM rate limits for all providers */
+  llm?: LLMRateLimitConfig;
+  /** Per-tool rate limits (keyed by tool name) */
+  tools?: Record<string, ToolRateLimitConfig>;
+  /** Per-provider rate limits (keyed by provider name like 'openai', 'anthropic') */
+  providers?: Record<string, ProviderRateLimitConfig>;
+}
+
+/**
+ * Context for rate limit check
+ */
+export interface RateLimitContext {
+  /** Agent ID */
+  agentId: string;
+  /** Operation type (llm or tool) */
+  operationType: "llm" | "tool";
+  /** Provider name (for LLM operations) */
+  provider?: string;
+  /** Model name (for LLM operations) */
+  model?: string;
+  /** Tool name (for tool operations) */
+  toolName?: string;
+}
+
+/**
+ * Core rate limiter interface
+ * All rate limiting strategies must implement this interface
+ */
+export interface RateLimiter {
+  /**
+   * Acquire permission to proceed with an operation
+   * This method will either:
+   * - Return immediately if under limit
+   * - Wait (delay) until limit resets if onExceeded='delay'
+   * - Throw RateLimitExceededError if onExceeded='throw'
+   */
+  acquire(): Promise<void>;
+
+  /**
+   * Check if operation can proceed without blocking
+   * @returns true if under limit, false if over limit
+   */
+  check(): boolean;
+
+  /**
+   * Get current rate limit statistics
+   */
+  getStats(): RateLimitStats;
+
+  /**
+   * Reset the rate limiter state
+   * Useful for testing or manual intervention
+   */
+  reset(): void;
+}
+
+/**
+ * Scope identifier for rate limit manager
+ */
+export interface RateLimitScopeId {
+  /** Scope type */
+  type: "global" | "agent" | "tool" | "provider";
+  /** Agent ID (for agent/tool scopes) */
+  agentId?: string;
+  /** Tool name (for tool scope) */
+  toolName?: string;
+  /** Provider name (for provider scope) */
+  provider?: string;
+}

From 8de4af40196416dec9d38cfe5dec4b6c95cf58cf Mon Sep 17 00:00:00 2001
From: sujal <sujalkesharwani220@gmail.com>
Date: Sat, 25 Oct 2025 17:42:14 +0530
Subject: [PATCH 2/6] chore: add example usage for rate limiting

---
 examples/with-rate-limiting/.env.example |   1 +
 examples/with-rate-limiting/.gitignore   |   4 +
 examples/with-rate-limiting/README.md    | 107 +++++++++
 examples/with-rate-limiting/package.json |  22 ++
 examples/with-rate-limiting/src/index.ts | 276 +++++++++++++++++++++++
 5 files changed, 410 insertions(+)
 create mode 100644 examples/with-rate-limiting/.env.example
 create mode 100644 examples/with-rate-limiting/.gitignore
 create mode 100644 examples/with-rate-limiting/README.md
 create mode 100644 examples/with-rate-limiting/package.json
 create mode 100644 examples/with-rate-limiting/src/index.ts

diff --git a/examples/with-rate-limiting/.env.example b/examples/with-rate-limiting/.env.example
new file mode 100644
index 000000000..0eb68d5c2
--- /dev/null
+++ b/examples/with-rate-limiting/.env.example
@@ -0,0 +1 @@
+GOOGLE_GENERATIVE_AI_API_KEY=your_api_key_here
diff --git a/examples/with-rate-limiting/.gitignore b/examples/with-rate-limiting/.gitignore
new file mode 100644
index 000000000..6a8a0fe88
--- /dev/null
+++ b/examples/with-rate-limiting/.gitignore
@@ -0,0 +1,4 @@
+node_modules
+dist
+.DS_Store
+.voltagent
diff --git a/examples/with-rate-limiting/README.md b/examples/with-rate-limiting/README.md
new file mode 100644
index 000000000..9ff104806
--- /dev/null
+++ b/examples/with-rate-limiting/README.md
@@ -0,0 +1,107 @@
+# Rate Limiting Example
+
+This example demonstrates VoltAgent's rate limiting feature to control the frequency of LLM calls and tool executions.
+
+## Features Demonstrated
+
+1. **Basic LLM Rate Limiting** - Limit requests per minute with error throwing
+2. **Delay Strategy** - Automatic waiting when limits are exceeded
+3. **Provider-Specific Limits** - Different limits for different LLM providers
+4. **Tool Rate Limiting** - Control tool execution frequency
+5. **Combined Limits** - Multiple rate limits working together
+6. **Monitoring Stats** - Track rate limit usage in real-time
+
+## Installation
+
+```bash
+pnpm install
+```
+
+## Configuration
+
+Set your OpenAI API key:
+
+```bash
+export GOOGLE_GENERATIVE_AI_API_KEY=your_api_key_here
+```
+
+## Running Examples
+
+Edit `src/index.ts` and uncomment the examples you want to run in the `main()` function:
+
+```typescript
+async function main() {
+  await example1_basicLLMRateLimit();
+  await example2_delayStrategy();
+  // ... etc
+}
+```
+
+Then run:
+
+```bash
+pnpm start
+```
+
+## Rate Limit Configuration
+
+### LLM Rate Limiting
+
+```typescript
+rateLimits: {
+  llm: {
+    maxRequestsPerMinute: 10,
+    strategy: "fixed_window",
+    onExceeded: "throw" // or "delay"
+  }
+}
+```
+
+### Provider-Specific Limits
+
+```typescript
+rateLimits: {
+  providers: {
+    openai: {
+      maxRequestsPerMinute: 5,
+      onExceeded: "throw"
+    },
+    anthropic: {
+      maxRequestsPerMinute: 3,
+      onExceeded: "delay"
+    }
+  }
+}
+```
+
+### Tool Rate Limiting
+
+```typescript
+rateLimits: {
+  tools: {
+    search_tool: {
+      maxRequestsPerMinute: 3,
+      onExceeded: "delay"
+    }
+  }
+}
+```
+
+## Strategies
+
+### `onExceeded: "throw"`
+
+- Immediately throws `RateLimitExceededError` when limit is reached
+- Good for strict enforcement and error handling
+
+### `onExceeded: "delay"`
+
+- Automatically waits until the rate limit resets
+- Good for background jobs and retry scenarios
+
+## Use Cases
+
+- **Cost Control**: Limit expensive LLM API calls
+- **API Quota Management**: Stay within provider rate limits
+- **Resource Protection**: Prevent tool overuse
+- **Fair Usage**: Distribute resources across multiple agents
diff --git a/examples/with-rate-limiting/package.json b/examples/with-rate-limiting/package.json
new file mode 100644
index 000000000..7a443148c
--- /dev/null
+++ b/examples/with-rate-limiting/package.json
@@ -0,0 +1,22 @@
+{
+  "name": "with-rate-limiting",
+  "version": "1.0.0",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "start": "tsx src/index.ts",
+    "dev": "tsx watch src/index.ts",
+    "build": "tsc",
+    "run": "node dist/index.js"
+  },
+  "dependencies": {
+    "@ai-sdk/google": "^2.0.23",
+    "@voltagent/core": "workspace:*",
+    "dotenv": "^17.2.3",
+    "zod": "^3.25.0"
+  },
+  "devDependencies": {
+    "tsx": "^4.19.2",
+    "typescript": "^5.8.2"
+  }
+}
diff --git a/examples/with-rate-limiting/src/index.ts b/examples/with-rate-limiting/src/index.ts
new file mode 100644
index 000000000..992eafc7a
--- /dev/null
+++ b/examples/with-rate-limiting/src/index.ts
@@ -0,0 +1,276 @@
+/**
+ * Example: Rate Limiting in VoltAgent
+ *
+ * This example demonstrates how to use the rate limiting feature
+ * to control the frequency of LLM calls and tool executions.
+ */
+import "dotenv/config";
+import { google } from "@ai-sdk/google";
+import { Agent, RateLimitExceededError, createTool } from "@voltagent/core";
+import { z } from "zod";
+
+// Create a simple search tool
+const searchTool = createTool({
+  name: "search_tool",
+  description: "Search for information",
+  parameters: z.object({
+    query: z.string().describe("Search query"),
+  }),
+  execute: async ({ query }) => {
+    console.log(`[TOOL] Searching for: ${query}`);
+    return `Search results for: ${query}`;
+  },
+});
+
+// Example 1: Basic LLM Rate Limiting
+async function example1_basicLLMRateLimit() {
+  console.log("\n=== Example 1: Basic LLM Rate Limiting ===");
+
+  const agent = new Agent({
+    name: "basic-llm-limited-agent",
+    model: google("gemini-2.0-flash-exp"),
+    instructions: "You are a helpful assistant.",
+    rateLimits: {
+      llm: {
+        maxRequestsPerMinute: 3, // Only 3 requests per minute
+        strategy: "fixed_window",
+        onExceeded: "throw", // Throw error when limit exceeded
+      },
+    },
+  });
+
+  try {
+    // These will work fine
+    for (let i = 1; i <= 3; i++) {
+      console.log(`Request ${i}/3...`);
+      const response = await agent.generateText(`Say "Hello ${i}"`);
+      console.log(`✓ Response: ${response.text}`);
+    }
+
+    // This 4th request will throw RateLimitExceededError
+    console.log("\nAttempting 4th request (will fail)...");
+    await agent.generateText("Say Hello 4");
+  } catch (error) {
+    if (error instanceof RateLimitExceededError) {
+      console.error(`✗ Rate limit exceeded: ${error.message}`);
+      console.log(`  Remaining: ${error.stats.remaining}`);
+      console.log(`  Reset at: ${error.stats.resetAt}`);
+      console.log(`  Retry after: ${error.retryAfter} seconds`);
+    } else if (error instanceof Error) {
+      console.error(`✗ Unexpected error: ${error.message}`);
+    }
+  }
+}
+
+// Example 2: Delay Strategy
+async function example2_delayStrategy() {
+  console.log("\n=== Example 2: Delay Strategy (Auto-wait) ===");
+
+  const agent = new Agent({
+    name: "delay-agent",
+    model: google("gemini-2.0-flash-exp"),
+    instructions: "You are a helpful assistant.",
+    rateLimits: {
+      llm: {
+        maxRequestsPerMinute: 2,
+        strategy: "fixed_window",
+        onExceeded: "delay", // Wait until rate limit resets
+      },
+    },
+  });
+
+  console.log("Making 3 requests (2 will go through, 3rd will wait)...");
+  const startTime = Date.now();
+
+  for (let i = 1; i <= 3; i++) {
+    const requestStart = Date.now();
+    console.log(`\nRequest ${i}/3 at +${Math.round((requestStart - startTime) / 1000)}s`);
+
+    const response = await agent.generateText(`Count to ${i}`);
+
+    const requestEnd = Date.now();
+    console.log(`✓ Completed in ${Math.round((requestEnd - requestStart) / 1000)}s`);
+    console.log(`  Response: ${response.text.substring(0, 50)}...`);
+  }
+}
+
+// Example 3: Provider-Specific Rate Limiting
+async function example3_providerSpecificLimits() {
+  console.log("\n=== Example 3: Provider-Specific Rate Limiting ===");
+
+  const agent = new Agent({
+    name: "provider-limited-agent",
+    model: google("gemini-2.0-flash-exp"),
+    instructions: "You are a helpful assistant.",
+    rateLimits: {
+      llm: {
+        maxRequestsPerMinute: 10, // Global limit: 10 requests/min
+        onExceeded: "throw",
+      },
+      providers: {
+        google: {
+          maxRequestsPerMinute: 3, // Google-specific: 3 requests/min
+          onExceeded: "throw",
+        },
+      },
+    },
+  });
+
+  try {
+    console.log("Provider-specific limit (Google): 3 requests/min");
+    console.log("Global limit: 10 requests/min\n");
+
+    for (let i = 1; i <= 4; i++) {
+      console.log(`Google request ${i}/4...`);
+      await agent.generateText(`Hello ${i}`);
+      console.log("✓ Success");
+    }
+  } catch (error) {
+    if (error instanceof Error) {
+      console.error(`✗ Google rate limit exceeded (3/min): ${error.message}`);
+    }
+  }
+}
+
+// Example 4: Tool-Specific Rate Limiting
+async function example4_toolRateLimiting() {
+  console.log("\n=== Example 4: Tool-Specific Rate Limiting ===");
+
+  const agent = new Agent({
+    name: "tool-limited-agent",
+    model: google("gemini-2.0-flash-exp"),
+    instructions: "You are a helpful assistant with access to a search tool.",
+    tools: [searchTool],
+    rateLimits: {
+      tools: {
+        search_tool: {
+          maxRequestsPerMinute: 2, // Only 2 searches per minute
+          strategy: "fixed_window",
+          onExceeded: "throw",
+        },
+      },
+    },
+  });
+
+  try {
+    console.log("Asking agent to make 3 searches (limit: 2/min)...\n");
+
+    const response = await agent.generateText(
+      "Search for 'AI', then 'ML', then 'DL'. Use the search tool for each.",
+    );
+
+    console.log("\n✓ Response:", response.text);
+  } catch (error) {
+    if (error instanceof Error) {
+      console.error(`✗ Tool rate limit exceeded: ${error.message}`);
+    }
+  }
+}
+
+// Example 5: Combined Rate Limiting
+async function example5_combinedLimits() {
+  console.log("\n=== Example 5: Combined LLM + Tool Rate Limiting ===");
+
+  const agent = new Agent({
+    name: "fully-limited-agent",
+    model: google("gemini-2.0-flash-exp"),
+    instructions: "You are a helpful assistant.",
+    tools: [searchTool],
+    rateLimits: {
+      llm: {
+        maxRequestsPerMinute: 5,
+        onExceeded: "delay",
+      },
+      tools: {
+        search_tool: {
+          maxRequestsPerMinute: 3,
+          onExceeded: "delay",
+        },
+      },
+      providers: {
+        google: {
+          maxRequestsPerMinute: 4,
+          onExceeded: "throw",
+        },
+      },
+    },
+  });
+
+  console.log("Configuration:");
+  console.log("  - Google provider: 4 req/min (throw)");
+  console.log("  - Global LLM: 5 req/min (delay)");
+  console.log("  - Search tool: 3 req/min (delay)\n");
+
+  // Agent will respect all limits with appropriate strategies
+  try {
+    const response = await agent.generateText(
+      "Tell me about AI and use the search tool if needed.",
+    );
+    console.log("✓ Response:", `${response.text.substring(0, 100)}...`);
+  } catch (error) {
+    if (error instanceof Error) {
+      console.error("✗ Error:", error.message);
+    }
+  }
+}
+
+// Example 6: Monitoring Rate Limit Stats
+async function example6_monitoringStats() {
+  console.log("\n=== Example 6: Monitoring Rate Limit Statistics ===");
+
+  const agent = new Agent({
+    name: "monitored-agent",
+    model: google("gemini-2.0-flash-exp"),
+    instructions: "You are a helpful assistant.",
+    rateLimits: {
+      llm: {
+        maxRequestsPerMinute: 5,
+        onExceeded: "throw",
+      },
+    },
+  });
+
+  // Make some requests
+  for (let i = 1; i <= 3; i++) {
+    await agent.generateText(`Request ${i}`);
+  }
+
+  console.log("\nNote: Rate limit statistics are internal to the agent.");
+  console.log("In production, you can track rate limit errors and retryAfter values.");
+  console.log("The RateLimitExceededError provides all necessary information for monitoring.");
+}
+
+// Run all examples
+async function main() {
+  console.log("🚀 VoltAgent Rate Limiting Examples\n");
+  console.log("=".repeat(60));
+
+  try {
+    // Uncomment the examples you want to run:
+
+    // await example1_basicLLMRateLimit();
+    // await example2_delayStrategy();
+    // await example3_providerSpecificLimits();
+    // await example4_toolRateLimiting();
+    // await example5_combinedLimits();
+    // await example6_monitoringStats();
+
+    console.log(`\n${"=".repeat(60)}`);
+    console.log("\n✓ All examples completed!");
+    console.log("\nNote: Uncomment examples in main() to run them.");
+  } catch (error) {
+    console.error("\n✗ Error:", error);
+  }
+}
+
+// Run if executed directly
+main().catch(console.error);
+
+export {
+  example1_basicLLMRateLimit,
+  example2_delayStrategy,
+  example3_providerSpecificLimits,
+  example4_toolRateLimiting,
+  example5_combinedLimits,
+  example6_monitoringStats,
+};

From 59575ae297347c01a6c3d012b476135c084dbe49 Mon Sep 17 00:00:00 2001
From: sujal <sujalkesharwani220@gmail.com>
Date: Wed, 1 Apr 2026 19:57:37 +0530
Subject: [PATCH 3/6] feat: simplify rate limit checks and remove
 provider-specific configurations

---
 examples/with-rate-limiting/src/index.ts     | 88 +++++++++---------
 packages/core/src/agent/agent.ts             | 10 +-
 packages/core/src/index.ts                   |  2 -
 packages/core/src/rate-limit/index.ts        |  3 -
 packages/core/src/rate-limit/manager.spec.ts | 98 ++++----------------
 packages/core/src/rate-limit/manager.ts      | 38 ++------
 packages/core/src/rate-limit/types.ts        | 67 +++----------
 7 files changed, 89 insertions(+), 217 deletions(-)

diff --git a/examples/with-rate-limiting/src/index.ts b/examples/with-rate-limiting/src/index.ts
index 992eafc7a..f85a7ce01 100644
--- a/examples/with-rate-limiting/src/index.ts
+++ b/examples/with-rate-limiting/src/index.ts
@@ -1,15 +1,12 @@
 /**
  * Example: Rate Limiting in VoltAgent
- *
- * This example demonstrates how to use the rate limiting feature
- * to control the frequency of LLM calls and tool executions.
  */
-import "dotenv/config";
 import { google } from "@ai-sdk/google";
 import { Agent, RateLimitExceededError, createTool } from "@voltagent/core";
+import "dotenv/config";
 import { z } from "zod";
 
-// Create a simple search tool
+// Create simple tools
 const searchTool = createTool({
   name: "search_tool",
   description: "Search for information",
@@ -22,6 +19,18 @@ const searchTool = createTool({
   },
 });
 
+const calculateTool = createTool({
+  name: "calculator",
+  description: "Perform calculations",
+  parameters: z.object({
+    expression: z.string().describe("Math expression to calculate"),
+  }),
+  execute: async ({ expression }) => {
+    console.log(`[TOOL] Calculating: ${expression}`);
+    return `Result: ${expression} = 42`;
+  },
+});
+
 // Example 1: Basic LLM Rate Limiting
 async function example1_basicLLMRateLimit() {
   console.log("\n=== Example 1: Basic LLM Rate Limiting ===");
@@ -94,22 +103,25 @@ async function example2_delayStrategy() {
   }
 }
 
-// Example 3: Provider-Specific Rate Limiting
-async function example3_providerSpecificLimits() {
-  console.log("\n=== Example 3: Provider-Specific Rate Limiting ===");
+// Example 3: Tool-Specific Rate Limiting
+async function example3_toolRateLimiting() {
+  console.log("\n=== Example 3: Tool-Specific Rate Limiting ===");
 
   const agent = new Agent({
-    name: "provider-limited-agent",
+    name: "tool-limited-agent",
     model: google("gemini-2.0-flash-exp"),
-    instructions: "You are a helpful assistant.",
+    instructions: "You are a helpful assistant with tools.",
+    tools: [searchTool, calculateTool],
     rateLimits: {
-      llm: {
-        maxRequestsPerMinute: 10, // Global limit: 10 requests/min
-        onExceeded: "throw",
-      },
-      providers: {
-        google: {
-          maxRequestsPerMinute: 3, // Google-specific: 3 requests/min
+      tools: {
+        search_tool: {
+          maxRequestsPerMinute: 5,
+          strategy: "fixed_window",
+          onExceeded: "throw",
+        },
+        calculator: {
+          maxRequestsPerMinute: 10,
+          strategy: "fixed_window",
           onExceeded: "throw",
         },
       },
@@ -117,24 +129,23 @@ async function example3_providerSpecificLimits() {
   });
 
   try {
-    console.log("Provider-specific limit (Google): 3 requests/min");
-    console.log("Global limit: 10 requests/min\n");
+    console.log("Agent with tool-specific limits:", "\n");
+    console.log("search_tool: 5 requests/min");
+    console.log("calculator: 10 requests/min\n");
 
-    for (let i = 1; i <= 4; i++) {
-      console.log(`Google request ${i}/4...`);
-      await agent.generateText(`Hello ${i}`);
-      console.log("✓ Success");
-    }
+    const response = await agent.generateText("Search for 'JavaScript' and calculate 2 + 2.");
+
+    console.log("\n✓ Response:", `${response.text.substring(0, 100)}...`);
   } catch (error) {
     if (error instanceof Error) {
-      console.error(`✗ Google rate limit exceeded (3/min): ${error.message}`);
+      console.error(`✗ Tool rate limit exceeded: ${error.message}`);
     }
   }
 }
 
 // Example 4: Tool-Specific Rate Limiting
 async function example4_toolRateLimiting() {
-  console.log("\n=== Example 4: Tool-Specific Rate Limiting ===");
+  console.log("\n=== Example 4: Tool Rate Limiting ===");
 
   const agent = new Agent({
     name: "tool-limited-agent",
@@ -167,7 +178,7 @@ async function example4_toolRateLimiting() {
   }
 }
 
-// Example 5: Combined Rate Limiting
+// Example 5: Combined LLM + Tool Rate Limiting
 async function example5_combinedLimits() {
   console.log("\n=== Example 5: Combined LLM + Tool Rate Limiting ===");
 
@@ -187,17 +198,10 @@ async function example5_combinedLimits() {
           onExceeded: "delay",
         },
       },
-      providers: {
-        google: {
-          maxRequestsPerMinute: 4,
-          onExceeded: "throw",
-        },
-      },
     },
   });
 
   console.log("Configuration:");
-  console.log("  - Google provider: 4 req/min (throw)");
   console.log("  - Global LLM: 5 req/min (delay)");
   console.log("  - Search tool: 3 req/min (delay)\n");
 
@@ -248,15 +252,15 @@ async function main() {
   try {
     // Uncomment the examples you want to run:
 
-    // await example1_basicLLMRateLimit();
-    // await example2_delayStrategy();
-    // await example3_providerSpecificLimits();
-    // await example4_toolRateLimiting();
-    // await example5_combinedLimits();
-    // await example6_monitoringStats();
+    await example1_basicLLMRateLimit();
+    await example2_delayStrategy();
+    await example3_toolRateLimiting();
+    await example4_toolRateLimiting();
+    await example5_combinedLimits();
+    await example6_monitoringStats();
 
     console.log(`\n${"=".repeat(60)}`);
-    console.log("\n✓ All examples completed!");
+    console.log("\n✓ Examples ready!");
     console.log("\nNote: Uncomment examples in main() to run them.");
   } catch (error) {
     console.error("\n✗ Error:", error);
@@ -269,7 +273,7 @@ main().catch(console.error);
 export {
   example1_basicLLMRateLimit,
   example2_delayStrategy,
-  example3_providerSpecificLimits,
+  example3_toolRateLimiting,
   example4_toolRateLimiting,
   example5_combinedLimits,
   example6_monitoringStats,
diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts
index c40dc91d9..d2a565736 100644
--- a/packages/core/src/agent/agent.ts
+++ b/packages/core/src/agent/agent.ts
@@ -545,10 +545,7 @@ export class Agent {
           const provider = this.extractProviderFromModel(model);
           const modelId = modelName;
 
-          await this.rateLimitManager.checkLLMRateLimit({
-            provider,
-            model: modelId,
-          });
+          await this.rateLimitManager.checkLLMRateLimit();
 
           methodLogger.debug("Rate limit check passed for LLM call", {
             event: LogEvents.AGENT_GENERATION_STARTED,
@@ -777,10 +774,7 @@ export class Agent {
           const provider = this.extractProviderFromModel(model);
           const modelId = modelName;
 
-          await this.rateLimitManager.checkLLMRateLimit({
-            provider,
-            model: modelId,
-          });
+          await this.rateLimitManager.checkLLMRateLimit();
 
           methodLogger.debug("Rate limit check passed for stream call", {
             event: LogEvents.AGENT_STREAM_STARTED,
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index a2e8c70a0..5f1d99751 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -123,8 +123,6 @@ export { RateLimitManager } from "./rate-limit/manager";
 export type {
   AgentRateLimitConfig,
   LLMRateLimitConfig,
-  RateLimitConfig,
-  RateLimitContext,
   RateLimiter,
   RateLimitExceededAction,
   RateLimitScope,
diff --git a/packages/core/src/rate-limit/index.ts b/packages/core/src/rate-limit/index.ts
index b0b955c35..44f8541ba 100644
--- a/packages/core/src/rate-limit/index.ts
+++ b/packages/core/src/rate-limit/index.ts
@@ -12,9 +12,6 @@ export { RateLimitManager } from "./manager";
 export type {
   AgentRateLimitConfig,
   LLMRateLimitConfig,
-  ProviderRateLimitConfig,
-  RateLimitConfig,
-  RateLimitContext,
   RateLimiter,
   RateLimitExceededAction,
   RateLimitScope,
diff --git a/packages/core/src/rate-limit/manager.spec.ts b/packages/core/src/rate-limit/manager.spec.ts
index 6b4d43f4e..f4262f0a8 100644
--- a/packages/core/src/rate-limit/manager.spec.ts
+++ b/packages/core/src/rate-limit/manager.spec.ts
@@ -19,13 +19,6 @@ describe("RateLimitManager", () => {
         strategy: "fixed_window",
         onExceeded: "throw",
       },
-      providers: {
-        openai: {
-          maxRequestsPerMinute: 5,
-          strategy: "fixed_window",
-          onExceeded: "throw",
-        },
-      },
       tools: {
         search_tool: {
           maxRequestsPerMinute: 3,
@@ -50,65 +43,20 @@ describe("RateLimitManager", () => {
   describe("checkLLMRateLimit", () => {
     it("should allow requests within global LLM limit", async () => {
       for (let i = 0; i < 10; i++) {
-        await expect(
-          manager.checkLLMRateLimit({ provider: "unknown", model: "test-model" })
-        ).resolves.not.toThrow();
+        await expect(manager.checkLLMRateLimit()).resolves.not.toThrow();
       }
 
       // 11th request should throw
-      await expect(
-        manager.checkLLMRateLimit({ provider: "unknown", model: "test-model" })
-      ).rejects.toThrow(RateLimitExceededError);
-    });
-
-    it("should prioritize provider-specific limit over global limit", async () => {
-      // OpenAI has limit of 5, global has 10
-      for (let i = 0; i < 5; i++) {
-        await expect(
-          manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
-        ).resolves.not.toThrow();
-      }
-
-      // 6th OpenAI request should throw
-      await expect(
-        manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
-      ).rejects.toThrow(RateLimitExceededError);
-    });
-
-    it("should handle different providers independently", async () => {
-      // Use 5 OpenAI requests
-      for (let i = 0; i < 5; i++) {
-        await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
-      }
-
-      // Claude should still have full global limit available
-      for (let i = 0; i < 10; i++) {
-        await expect(
-          manager.checkLLMRateLimit({ provider: "anthropic", model: "claude-3" })
-        ).resolves.not.toThrow();
-      }
+      await expect(manager.checkLLMRateLimit()).rejects.toThrow(RateLimitExceededError);
     });
 
     it("should allow requests when no limit configured", async () => {
       const noLimitManager = new RateLimitManager("test-agent", {});
 
       for (let i = 0; i < 100; i++) {
-        await expect(
-          noLimitManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
-        ).resolves.not.toThrow();
+        await expect(noLimitManager.checkLLMRateLimit()).resolves.not.toThrow();
       }
     });
-
-    it("should handle provider name case-insensitively", async () => {
-      await manager.checkLLMRateLimit({ provider: "OpenAI", model: "gpt-4" });
-      await manager.checkLLMRateLimit({ provider: "OPENAI", model: "gpt-4" });
-      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
-
-      const stats = manager.getAllStats();
-      // Should only have one limiter for openai
-      const openaiKeys = Object.keys(stats).filter((key) => key.includes("openai"));
-      expect(openaiKeys.length).toBe(1);
-    });
   });
 
   describe("checkToolRateLimit", () => {
@@ -119,7 +67,7 @@ describe("RateLimitManager", () => {
 
       // 4th request should throw
       await expect(manager.checkToolRateLimit("search_tool")).rejects.toThrow(
-        RateLimitExceededError
+        RateLimitExceededError,
       );
     });
 
@@ -143,8 +91,8 @@ describe("RateLimitManager", () => {
   describe("getAllStats", () => {
     it("should return stats for all active limiters", async () => {
       // Trigger creation of different limiters
-      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
-      await manager.checkLLMRateLimit({ provider: "anthropic", model: "claude-3" });
+      await manager.checkLLMRateLimit();
+      await manager.checkLLMRateLimit();
       await manager.checkToolRateLimit("search_tool");
 
       const stats = manager.getAllStats();
@@ -168,8 +116,8 @@ describe("RateLimitManager", () => {
   describe("resetAll", () => {
     it("should reset all active limiters", async () => {
       // Use some requests
-      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
-      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+      await manager.checkLLMRateLimit();
+      await manager.checkLLMRateLimit();
       await manager.checkToolRateLimit("search_tool");
 
       manager.resetAll();
@@ -184,18 +132,14 @@ describe("RateLimitManager", () => {
     it("should allow requests after reset", async () => {
       // Use up openai limit
       for (let i = 0; i < 5; i++) {
-        await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+        await manager.checkLLMRateLimit();
       }
 
-      await expect(
-        manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
-      ).rejects.toThrow(RateLimitExceededError);
+      await expect(manager.checkLLMRateLimit()).rejects.toThrow(RateLimitExceededError);
 
       manager.resetAll();
 
-      await expect(
-        manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
-      ).resolves.not.toThrow();
+      await expect(manager.checkLLMRateLimit()).resolves.not.toThrow();
     });
   });
 
@@ -204,15 +148,15 @@ describe("RateLimitManager", () => {
       const stats1 = manager.getAllStats();
       expect(Object.keys(stats1).length).toBe(0);
 
-      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+      await manager.checkLLMRateLimit();
 
       const stats2 = manager.getAllStats();
       expect(Object.keys(stats2).length).toBe(1);
     });
 
     it("should reuse existing limiter", async () => {
-      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
-      await manager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" });
+      await manager.checkLLMRateLimit();
+      await manager.checkLLMRateLimit();
 
       const stats = manager.getAllStats();
       expect(Object.keys(stats).length).toBe(1); // Only one limiter should exist
@@ -236,15 +180,11 @@ describe("RateLimitManager", () => {
 
       // Should not throw - when config is invalid/incomplete, it allows requests through
       // This is a graceful degradation approach
-      await expect(
-        invalidManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
-      ).resolves.not.toThrow();
+      await expect(invalidManager.checkLLMRateLimit()).resolves.not.toThrow();
 
       // Verify multiple requests are allowed (no rate limiting applied)
       for (let i = 0; i < 10; i++) {
-        await expect(
-          invalidManager.checkLLMRateLimit({ provider: "openai", model: "gpt-4" })
-        ).resolves.not.toThrow();
+        await expect(invalidManager.checkLLMRateLimit()).resolves.not.toThrow();
       }
     });
   });
@@ -262,11 +202,11 @@ describe("RateLimitManager", () => {
       const delayManager = new RateLimitManager("test-agent", delayConfig);
 
       // Use up limit
-      await delayManager.checkLLMRateLimit({ provider: "test", model: "test" });
-      await delayManager.checkLLMRateLimit({ provider: "test", model: "test" });
+      await delayManager.checkLLMRateLimit();
+      await delayManager.checkLLMRateLimit();
 
       // Next request should delay
-      const checkPromise = delayManager.checkLLMRateLimit({ provider: "test", model: "test" });
+      const checkPromise = delayManager.checkLLMRateLimit();
 
       let resolved = false;
       checkPromise.then(() => {
diff --git a/packages/core/src/rate-limit/manager.ts b/packages/core/src/rate-limit/manager.ts
index 09e1f7822..d5540681c 100644
--- a/packages/core/src/rate-limit/manager.ts
+++ b/packages/core/src/rate-limit/manager.ts
@@ -10,7 +10,7 @@
 
 import type { Logger } from "@voltagent/internal";
 import { FixedWindowCounterLimiter } from "./limiters/fixed-window";
-import type { AgentRateLimitConfig, RateLimiter, RateLimitScopeId } from "./types";
+import type { AgentRateLimitConfig, RateLimitScopeId, RateLimiter } from "./types";
 
 export class RateLimitManager {
   private limiters: Map<string, RateLimiter> = new Map();
@@ -28,25 +28,8 @@ export class RateLimitManager {
    * Check rate limit for LLM call
    * This is called before generateText/streamText
    */
-  async checkLLMRateLimit(context: { provider?: string; model?: string }): Promise<void> {
-    // Priority order:
-    // 1. Provider-specific limit (if configured)
-    // 2. Global LLM limit (if configured)
-
-    const providerName = context.provider?.toLowerCase();
-
-    // Check provider-specific limit first
-    if (providerName && this.config.providers?.[providerName]) {
-      const scopeId: RateLimitScopeId = {
-        type: "provider",
-        provider: providerName,
-      };
-      const limiter = this.getLimiter(scopeId, this.config.providers[providerName]);
-      await limiter.acquire();
-      return;
-    }
-
-    // Check global LLM limit
+  async checkLLMRateLimit(): Promise<void> {
+    // Check global LLM limit (if configured)
     if (this.config.llm) {
       const scopeId: RateLimitScopeId = {
         type: "global",
@@ -88,13 +71,14 @@ export class RateLimitManager {
       maxTokensPerMinute?: number;
       strategy?: string;
       onExceeded?: "delay" | "throw";
-    }
+    },
   ): RateLimiter {
     const key = this.getScopeKey(scopeId);
 
     // Return existing limiter if already created
-    if (this.limiters.has(key)) {
-      return this.limiters.get(key)!;
+    const existingLimiter = this.limiters.get(key);
+    if (existingLimiter) {
+      return existingLimiter;
     }
 
     // Create new limiter
@@ -120,7 +104,7 @@ export class RateLimitManager {
       maxTokensPerMinute?: number;
       strategy?: string;
       onExceeded?: "delay" | "throw";
-    }
+    },
   ): RateLimiter {
     const limit = config.maxRequestsPerMinute || 60; // Default 60 requests/min
     const strategy = config.strategy || "fixed_window";
@@ -129,7 +113,7 @@ export class RateLimitManager {
     // For MVP, only fixed_window is implemented
     if (strategy !== "fixed_window") {
       this.logger?.warn(
-        `Unsupported rate limit strategy: ${strategy}. Falling back to fixed_window`
+        `Unsupported rate limit strategy: ${strategy}. Falling back to fixed_window`,
       );
     }
 
@@ -148,12 +132,8 @@ export class RateLimitManager {
     switch (scopeId.type) {
       case "global":
         return "global:llm";
-      case "agent":
-        return `agent:${scopeId.agentId}`;
       case "tool":
         return `tool:${scopeId.agentId}:${scopeId.toolName}`;
-      case "provider":
-        return `provider:${scopeId.provider}`;
       default:
         return "unknown";
     }
diff --git a/packages/core/src/rate-limit/types.ts b/packages/core/src/rate-limit/types.ts
index c45a33b2d..baf75af57 100644
--- a/packages/core/src/rate-limit/types.ts
+++ b/packages/core/src/rate-limit/types.ts
@@ -5,8 +5,9 @@
 
 /**
  * Available rate limiting strategies
+ * - 'fixed_window': Simple counter that resets after a fixed time period (MVP implementation)
  */
-export type RateLimitStrategy = "fixed_window" | "token_bucket" | "leaky_bucket";
+export type RateLimitStrategy = "fixed_window";
 
 /**
  * Action to take when rate limit is exceeded
@@ -17,12 +18,10 @@ export type RateLimitExceededAction = "delay" | "throw";
 
 /**
  * Scope for rate limiting
- * - 'global': Apply to all operations across all agents
- * - 'agent': Apply per agent instance
- * - 'tool': Apply per tool
- * - 'provider': Apply per LLM provider
+ * - 'global': Apply to all LLM operations across all agents
+ * - 'tool': Apply per tool per agent
  */
-export type RateLimitScope = "global" | "agent" | "tool" | "provider";
+export type RateLimitScope = "global" | "tool";
 
 /**
  * Statistics for current rate limit state
@@ -38,20 +37,6 @@ export interface RateLimitStats {
   current: number;
 }
 
-/**
- * Base configuration for a rate limiter
- */
-export interface RateLimitConfig {
-  /** Rate limiting strategy to use */
-  strategy: RateLimitStrategy;
-  /** Maximum number of requests per window */
-  limit: number;
-  /** Time window in milliseconds */
-  windowMs: number;
-  /** Action when limit is exceeded */
-  onExceeded?: RateLimitExceededAction;
-}
-
 /**
  * Configuration for LLM-specific rate limiting
  */
@@ -80,17 +65,10 @@ export interface ToolRateLimitConfig {
 
 /**
  * Configuration for provider-specific rate limiting
+ * NOTE: Provider-specific limits are not currently implemented.
+ * Use LLMRateLimitConfig global limits instead.
  */
-export interface ProviderRateLimitConfig {
-  /** Maximum requests per minute */
-  maxRequestsPerMinute?: number;
-  /** Maximum tokens per minute (future enhancement) */
-  maxTokensPerMinute?: number;
-  /** Rate limiting strategy */
-  strategy?: RateLimitStrategy;
-  /** Action when limit exceeded */
-  onExceeded?: RateLimitExceededAction;
-}
+// REMOVED: ProviderRateLimitConfig - not implemented in current version
 
 /**
  * Complete rate limiting configuration for an agent
@@ -100,24 +78,6 @@ export interface AgentRateLimitConfig {
   llm?: LLMRateLimitConfig;
   /** Per-tool rate limits (keyed by tool name) */
   tools?: Record<string, ToolRateLimitConfig>;
-  /** Per-provider rate limits (keyed by provider name like 'openai', 'anthropic') */
-  providers?: Record<string, ProviderRateLimitConfig>;
-}
-
-/**
- * Context for rate limit check
- */
-export interface RateLimitContext {
-  /** Agent ID */
-  agentId: string;
-  /** Operation type (llm or tool) */
-  operationType: "llm" | "tool";
-  /** Provider name (for LLM operations) */
-  provider?: string;
-  /** Model name (for LLM operations) */
-  model?: string;
-  /** Tool name (for tool operations) */
-  toolName?: string;
 }
 
 /**
@@ -154,14 +114,13 @@ export interface RateLimiter {
 
 /**
  * Scope identifier for rate limit manager
+ * Identifies which rate limiter to use for a specific operation
  */
 export interface RateLimitScopeId {
-  /** Scope type */
-  type: "global" | "agent" | "tool" | "provider";
-  /** Agent ID (for agent/tool scopes) */
+  /** Scope type - either global (all LLM calls) or tool-specific */
+  type: "global" | "tool";
+  /** Agent ID (required for tool scope) */
   agentId?: string;
-  /** Tool name (for tool scope) */
+  /** Tool name (required for tool scope) */
   toolName?: string;
-  /** Provider name (for provider scope) */
-  provider?: string;
 }

From 486f11ba162a15d08330e65c1deed50ca78de057 Mon Sep 17 00:00:00 2001
From: sujal <sujalkesharwani220@gmail.com>
Date: Wed, 1 Apr 2026 21:03:28 +0530
Subject: [PATCH 4/6] feat: implement multiple rate limiting strategies with
 hook support

---
 examples/with-rate-limiting/README.md | 267 ++++++++++++++++++++------
 1 file changed, 205 insertions(+), 62 deletions(-)

diff --git a/examples/with-rate-limiting/README.md b/examples/with-rate-limiting/README.md
index 9ff104806..82262bbf7 100644
--- a/examples/with-rate-limiting/README.md
+++ b/examples/with-rate-limiting/README.md
@@ -1,107 +1,250 @@
-# Rate Limiting Example
+# Rate Limiting in VoltAgent
 
-This example demonstrates VoltAgent's rate limiting feature to control the frequency of LLM calls and tool executions.
+This example demonstrates how to implement comprehensive rate limiting in VoltAgent agents, including different strategies, tool-specific limits, and monitoring hooks.
 
 ## Features Demonstrated
 
-1. **Basic LLM Rate Limiting** - Limit requests per minute with error throwing
-2. **Delay Strategy** - Automatic waiting when limits are exceeded
-3. **Provider-Specific Limits** - Different limits for different LLM providers
-4. **Tool Rate Limiting** - Control tool execution frequency
-5. **Combined Limits** - Multiple rate limits working together
-6. **Monitoring Stats** - Track rate limit usage in real-time
+1. **Basic LLM Rate Limiting** - Enforce model request limits with error throwing
+2. **Delay Strategy** - Automatically wait when rate limits are exceeded
+3. **Tool-Specific Rate Limiting** - Set different limits for different tools
+4. **Tool Rate Limiting** - Focus on individual tool request constraints
+5. **Combined LLM + Tool Rate Limiting** - Apply limits at both agent and tool levels
+6. **Rate Limit Statistics** - Monitor and track rate limit usage
+7. **Hooks for Monitoring** - React to rate limit events with callbacks
 
 ## Installation
 
 ```bash
+cd examples/with-rate-limiting
 pnpm install
 ```
 
-## Configuration
+## Configuration Options
 
-Set your OpenAI API key:
+### Basic Rate Limit Configuration
 
-```bash
-export GOOGLE_GENERATIVE_AI_API_KEY=your_api_key_here
+```typescript
+const agent = new Agent({
+  name: "limited-agent",
+  model: google("gemini-2.0-flash-exp"),
+  instructions: "You are a helpful assistant.",
+  rateLimits: {
+    llm: {
+      maxRequestsPerMinute: 5,
+      strategy: "fixed_window", // or "sliding_window", "token_bucket", "leaky_bucket"
+      onExceeded: "throw", // or "delay"
+    },
+  },
+});
 ```
 
+### Rate Limiting Strategies
+
+- **Fixed Window**: Simple per-minute counter reset
+- **Sliding Window**: More accurate time-based windowing
+- **Token Bucket**: Allows burst traffic while maintaining average rate
+- **Leaky Bucket**: Smooth request distribution over time
+
+## Hooks for Monitoring
+
+Hooks allow you to react to rate limit events with custom callbacks:
+
+```typescript
+const agent = new Agent({
+  name: "hooked-agent",
+  model: google("gemini-2.0-flash-exp"),
+  instructions: "You are a helpful assistant.",
+  rateLimits: {
+    llm: {
+      maxRequestsPerMinute: 3,
+      onExceeded: "delay",
+      onLimitExceeded: async (context) => {
+        console.warn(`⚠️ Rate limit exceeded for scope: ${context.scope}`);
+        console.warn(`  Remaining: ${context.stats.remaining}`);
+        console.warn(`  Reset at: ${context.stats.resetAt}`);
+      },
+    },
+    tools: {
+      search_tool: {
+        maxRequestsPerMinute: 2,
+        onExceeded: "delay",
+        onLimitExceeded: async (context) => {
+          console.warn(`⚠️ Search tool rate limit exceeded!`);
+          // Send alerts, log to monitoring systems, etc.
+        },
+      },
+    },
+  },
+});
+```
+
+### Hook Context Properties
+
+When a hook is triggered, the context object contains:
+
+- `scope` - The scope where limit was exceeded (e.g., "llm", "tool:search_tool")
+- `stats` - Rate limit statistics object with:
+  - `remaining` - Number of requests still available in current window
+  - `resetAt` - Timestamp when the rate limit window resets
+
+## Examples in Detail
+
+### Example 1: Basic LLM Rate Limiting
+
+Demonstrates enforcing a 3 requests-per-minute limit with error throwing. The 4th request will throw a `RateLimitExceededError` containing retry information.
+
+**Key Features:**
+
+- `maxRequestsPerMinute: 3` - Only 3 requests allowed per minute
+- `onExceeded: "throw"` - Immediately throws error when limit exceeded
+- Error includes `remaining` count and `resetAt` timestamp
+
+### Example 2: Delay Strategy
+
+Shows automatic waiting when limits are reached. The agent automatically delays the 3rd request until the rate limit window resets, providing a seamless experience.
+
+**Key Features:**
+
+- `maxRequestsPerMinute: 2` - Only 2 requests allowed per minute
+- `onExceeded: "delay"` - Automatically waits instead of failing
+- Transparent to the caller - request completes after waiting
+
+### Example 3: Tool-Specific Rate Limiting
+
+Sets different limits for different tools:
+
+- `search_tool`: 5 requests/minute
+- `calculator`: 10 requests/minute
+
+Each tool has independent rate limit counters.
+
+**Key Features:**
+
+- Multiple tools with different limits
+- Each tool tracked separately
+- `onExceeded: "throw"` for strict enforcement
+
+### Example 4: Tool Rate Limiting
+
+Focuses on limiting a specific tool (`search_tool` at 2 requests/minute). Useful when a particular tool has stricter API rate limits.
+
+**Key Features:**
+
+- Single tool limiting
+- `onExceeded: "throw"` strategy
+- Tests agent behavior with multiple search requests
+
+### Example 5: Combined LLM + Tool Rate Limiting
+
+Applies limits at both levels:
+
+- Global LLM: 5 requests/minute with delay
+- Individual tool: 3 requests/minute with delay
+
+The agent respects all configured limits.
+
+**Key Features:**
+
+- Both LLM and tool limits active
+- Both use `onExceeded: "delay"`
+- Demonstrates multi-level rate limiting
+
+### Example 6: Monitoring Rate Limit Statistics
+
+Demonstrates how to access and monitor rate limit state. Statistics are tracked internally and accessible through error properties when limits are exceeded.
+
+**Key Features:**
+
+- Rate limits are maintained internally
+- Statistics available in error objects
+- Useful for logging and monitoring
+
+### Example 7: Hooks for Rate Limit Monitoring ⭐ NEW
+
+Uses callback hooks to react to rate limit events in real-time:
+
+- `onLimitExceeded` - Called when a rate limit is exceeded
+
+This provides flexible, event-driven monitoring without blocking the agent flow.
+
+**Key Features:**
+
+- `maxRequestsPerMinute: 3` with 4 requests (triggers hook)
+- Hook logs scope, remaining count, and reset timestamp
+- Demonstrates hook triggering and context usage
+
 ## Running Examples
 
 Edit `src/index.ts` and uncomment the examples you want to run in the `main()` function:
 
 ```typescript
 async function main() {
-  await example1_basicLLMRateLimit();
-  await example2_delayStrategy();
-  // ... etc
+  console.log("🚀 VoltAgent Rate Limiting Examples\n");
+
+  try {
+    await example1_basicLLMRateLimit();
+    await example2_delayStrategy();
+    await example3_toolRateLimiting();
+    await example4_toolRateLimiting();
+    await example5_combinedLimits();
+    await example6_monitoringStats();
+    await example7_hooksForMonitoring();
+
+    console.log("\n✓ Examples ready!");
+  } catch (error) {
+    console.error("\n✗ Error:", error);
+  }
 }
 ```
 
 Then run:
 
 ```bash
-pnpm start
+pnpm run dev
 ```
 
-## Rate Limit Configuration
+## Error Handling
 
-### LLM Rate Limiting
+When using `onExceeded: "throw"`:
 
 ```typescript
-rateLimits: {
-  llm: {
-    maxRequestsPerMinute: 10,
-    strategy: "fixed_window",
-    onExceeded: "throw" // or "delay"
+try {
+  const response = await agent.generateText("What is AI?");
+} catch (error) {
+  if (error instanceof RateLimitExceededError) {
+    console.log(`Retry after ${error.retryAfter} seconds`);
+    console.log(`Remaining requests: ${error.stats.remaining}`);
+    console.log(`Reset at: ${error.stats.resetAt}`);
   }
 }
 ```
 
-### Provider-Specific Limits
+When using `onExceeded: "delay"`:
 
 ```typescript
-rateLimits: {
-  providers: {
-    openai: {
-      maxRequestsPerMinute: 5,
-      onExceeded: "throw"
-    },
-    anthropic: {
-      maxRequestsPerMinute: 3,
-      onExceeded: "delay"
-    }
-  }
-}
+// The agent automatically waits for the rate limit window to reset
+const response = await agent.generateText("What is AI?");
+// Request completes transparently after waiting
 ```
 
-### Tool Rate Limiting
+## Best Practices
 
-```typescript
-rateLimits: {
-  tools: {
-    search_tool: {
-      maxRequestsPerMinute: 3,
-      onExceeded: "delay"
-    }
-  }
-}
-```
-
-## Strategies
+1. **Choose Appropriate Limits**: Set realistic limits based on your API quotas
+2. **Monitor Hooks**: Use hooks to track rate limit violations and alert teams
+3. **Combine Strategies**: Use different strategies at different levels (LLM vs tools)
+4. **Plan for Bursts**: Token bucket strategy works well for bursty workloads
+5. **Log Rate Limit Events**: Keep audit trails of rate limit violations
+6. **Test Gracefully**: Verify behavior when limits are reached before production
 
-### `onExceeded: "throw"`
-
-- Immediately throws `RateLimitExceededError` when limit is reached
-- Good for strict enforcement and error handling
-
-### `onExceeded: "delay"`
+## Use Cases
 
-- Automatically waits until the rate limit resets
-- Good for background jobs and retry scenarios
+- **Cost Control**: Limit expensive LLM API calls to manage expenses
+- **API Quota Management**: Stay within provider rate limits and quotas
+- **Resource Protection**: Prevent tool overuse and API abuse
+- **Fair Usage**: Distribute resources fairly across multiple agents
+- **Compliance**: Ensure rate limits required by service agreements
+- **Graceful Degradation**: Choose between failing fast or queuing requests
 
-## Use Cases
+## More Information
 
-- **Cost Control**: Limit expensive LLM API calls
-- **API Quota Management**: Stay within provider rate limits
-- **Resource Protection**: Prevent tool overuse
-- **Fair Usage**: Distribute resources across multiple agents
+For more details on rate limiting strategies and implementation, see the VoltAgent core documentation.

From a2682f9b50518ddc7c14150fab17aaed5646930c Mon Sep 17 00:00:00 2001
From: sujal <sujalkesharwani220@gmail.com>
Date: Wed, 1 Apr 2026 21:06:27 +0530
Subject: [PATCH 5/6] refactor: streamline delay handling in
 FixedWindowCounterLimiter

---
 .../src/rate-limit/limiters/fixed-window.ts   | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/packages/core/src/rate-limit/limiters/fixed-window.ts b/packages/core/src/rate-limit/limiters/fixed-window.ts
index b926a18a6..7be75c2f2 100644
--- a/packages/core/src/rate-limit/limiters/fixed-window.ts
+++ b/packages/core/src/rate-limit/limiters/fixed-window.ts
@@ -12,7 +12,7 @@
  */
 
 import { RateLimitExceededError } from "../errors";
-import type { RateLimiter, RateLimitExceededAction, RateLimitStats } from "../types";
+import type { RateLimitExceededAction, RateLimitStats, RateLimiter } from "../types";
 
 export interface FixedWindowCounterConfig {
   /** Maximum requests per window */
@@ -26,7 +26,7 @@ export interface FixedWindowCounterConfig {
 }
 
 export class FixedWindowCounterLimiter implements RateLimiter {
-  private count: number = 0;
+  private count = 0;
   private windowStart: number;
   private readonly limit: number;
   private readonly windowMs: number;
@@ -63,16 +63,15 @@ export class FixedWindowCounterLimiter implements RateLimiter {
           stats,
           scope: this.scope,
         });
-      } else {
-        // Delay until window resets
-        const waitTime = this.windowStart + this.windowMs - now;
-        if (waitTime > 0) {
-          await this.delay(waitTime);
-        }
-        // After waiting, reset window and retry
-        this.count = 0;
-        this.windowStart = Date.now();
       }
+      // Delay until window resets
+      const waitTime = this.windowStart + this.windowMs - now;
+      if (waitTime > 0) {
+        await this.delay(waitTime);
+      }
+      // After waiting, reset window and retry
+      this.count = 0;
+      this.windowStart = Date.now();
     }
 
     // Increment counter

From 1edd8d30dfb1bc07cfb5c4f99d9f379521fa9ff4 Mon Sep 17 00:00:00 2001
From: sujal <sujalkesharwani220@gmail.com>
Date: Wed, 1 Apr 2026 21:16:54 +0530
Subject: [PATCH 6/6] feat: add rate-limiting implementation in Agent class
 with hook support

---
 .changeset/young-rice-study.md | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 .changeset/young-rice-study.md

diff --git a/.changeset/young-rice-study.md b/.changeset/young-rice-study.md
new file mode 100644
index 000000000..7d1514611
--- /dev/null
+++ b/.changeset/young-rice-study.md
@@ -0,0 +1,6 @@
+---
+"with-rate-limiting": minor
+"@voltagent/core": minor
+---
+
+Implemented rate-limiting in Agent class and passed a hook