Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions packages/leann-core/src/leann/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
from abc import ABC, abstractmethod
from typing import Any, Optional, cast

import torch

from .settings import (
resolve_anthropic_api_key,
resolve_anthropic_base_url,
Expand Down Expand Up @@ -720,6 +718,8 @@ def ask(self, prompt: str, **kwargs) -> str:
logger.info(f"Generating with HuggingFace model, config: {generation_config}")

# Generate
import torch

with torch.no_grad():
outputs = self.model.generate(**inputs, **generation_config)

Expand Down
10 changes: 8 additions & 2 deletions packages/leann-core/src/leann/embedding_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
from typing import Any, Optional, Protocol, cast

import numpy as np
import tiktoken
import torch

from .settings import resolve_ollama_host, resolve_openai_api_key, resolve_openai_base_url

# torch and tiktoken are imported lazily inside the functions that use them, so
# `import leann` (e.g. for MCP search over an existing index, BM25-only flows,
# or non-embedding utilities) doesn't pull torch's ~1 GB of state into memory.

# Set up logger with proper level
logger = logging.getLogger(__name__)
LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
Expand Down Expand Up @@ -145,6 +147,8 @@ def truncate_to_token_limit(texts: list[str], token_limit: int) -> list[str]:
if not texts:
return []

import tiktoken

# Use tiktoken with cl100k_base encoding
enc = tiktoken.get_encoding("cl100k_base")

Expand Down Expand Up @@ -419,6 +423,8 @@ def compute_embeddings_sentence_transformers(
is_build: Whether this is a build operation (shows progress bar)
adaptive_optimization: Whether to use adaptive optimization based on batch size
"""
import torch

outer_start_time = time.time()
# Handle empty input
if not texts:
Expand Down
Loading