Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions tensorrt_llm/_torch/speculative/eagle3.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, List, Optional, Set

import torch
from torch import nn

from tensorrt_llm.logger import logger
from tensorrt_llm.mapping import Mapping

from ..attention_backend import AttentionMetadata
Expand Down Expand Up @@ -320,6 +322,15 @@ def __init__(self, spec_config: "EagleDecodingConfig", mapping: Mapping):
self.mapping = mapping
self.guided_decoder: Optional[CapturableGuidedDecoder] = None

env_value = os.environ.get("TLLM_EAGLE3_FORCE_NUM_ACCEPTED_TOKENS", "0")
try:
self.force_num_accepted_tokens = int(env_value)
except ValueError:
logger.warning(
f"TLLM_EAGLE3_FORCE_NUM_ACCEPTED_TOKENS must be a valid integer, "
f"got '{env_value}'. Using default value 0.")
self.force_num_accepted_tokens = 0

# Skip torch.compile for now since current Torch is not compatible with Triton 3.4
# @torch.compile(options={"max-autotune": True})
def forward(self, input_ids, position_ids, hidden_states, logits,
Expand Down Expand Up @@ -481,6 +492,9 @@ def sample_and_accept_draft_tokens(
num_accepted_tokens[num_contexts:] += torch.cumprod(
(draft_tokens == gen_target_tokens[:, :self.max_draft_len]).int(),
dim=-1).sum(1)
# Check for environment variable override
if self.force_num_accepted_tokens != 0:
num_accepted_tokens[num_contexts:] = self.force_num_accepted_tokens
return accepted_tokens, num_accepted_tokens

def draft_decoder(
Expand Down