Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions packages/cli/src/repowise/cli/commands/init_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ async def _make_cost_tracker() -> CostTracker:
progress=gen_callback,
resume=resume,
cost_tracker=cost_tracker,
generation_config=gen_config,
)
)

Expand Down Expand Up @@ -977,6 +978,7 @@ def init_command(

# Merge exclude_patterns from config.yaml and --exclude/-x flags
config = load_config(repo_path)
language = config.get("language", "en")
exclude_patterns: list[str] = list(config.get("exclude_patterns") or []) + list(exclude)

# Resolve commit limit: CLI flag → config.yaml → default (500)
Expand Down Expand Up @@ -1038,6 +1040,8 @@ def init_command(
f" Provider: [cyan]{provider.provider_name}[/cyan] / Model: [cyan]{provider.model_name}[/cyan]"
)
console.print(f" Embedder: [cyan]{embedder_name_resolved}[/cyan]")
if language != "en":
console.print(f" Language: [cyan]{language}[/cyan]")

# Validate provider connection
from repowise.core.providers.llm.base import ProviderError
Expand Down Expand Up @@ -1149,8 +1153,7 @@ def init_command(

# Cost estimation
from repowise.core.generation import GenerationConfig

gen_config = GenerationConfig(max_concurrency=concurrency)
gen_config = GenerationConfig(max_concurrency=concurrency, language=language)
plans = build_generation_plan(
result.parsed_files, result.graph_builder, gen_config, skip_tests, skip_infra
)
Expand Down Expand Up @@ -1291,6 +1294,7 @@ async def _make_cost_tracker() -> CostTracker:
progress=gen_callback,
resume=resume,
cost_tracker=cost_tracker,
generation_config=gen_config,
)
)

Expand Down
17 changes: 10 additions & 7 deletions packages/cli/src/repowise/cli/commands/update_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def _on_done(result: "RepoUpdateResult") -> None:
)

from repowise.core.workspace import RepoUpdateResult

results = run_async(
update_workspace(
ws_root,
Expand Down Expand Up @@ -121,6 +121,7 @@ def _on_done(result: "RepoUpdateResult") -> None:
@click.option("--provider", "provider_name", default=None, help="LLM provider name.")
@click.option("--model", default=None, help="Model identifier override.")
@click.option("--since", default=None, help="Base git ref to diff from (overrides state).")
@click.option("--concurrency", type=int, default=5, help="Max concurrent LLM calls.")
@click.option(
"--cascade-budget",
type=int,
Expand Down Expand Up @@ -152,6 +153,7 @@ def update_command(
dry_run: bool,
workspace: bool,
repo_alias: str | None,
concurrency: int = 5,
) -> None:
"""Incrementally update wiki pages for files changed since last sync."""
start = time.monotonic()
Expand Down Expand Up @@ -209,11 +211,12 @@ def update_command(
from repowise.core.generation import ContextAssembler, GenerationConfig, PageGenerator
from repowise.core.ingestion import ASTParser, FileTraverser, GraphBuilder

config = GenerationConfig()
cfg = load_config(repo_path)
language = cfg.get("language", "en")
config = GenerationConfig(max_concurrency=concurrency, language=language)

# Read exclude patterns from config (set during init or via web UI)
repo_config = load_config(repo_path)
exclude_patterns: list[str] = list(repo_config.get("exclude_patterns") or [])
exclude_patterns: list[str] = list(cfg.get("exclude_patterns") or [])

# Full re-ingest for graph (needed for cascade analysis)
traverser = FileTraverser(repo_path, extra_exclude_patterns=exclude_patterns or None)
Expand Down Expand Up @@ -252,8 +255,8 @@ def update_command(
try:
from repowise.core.ingestion.git_indexer import GitIndexer

_commit_limit = repo_config.get("commit_limit")
_follow_renames = repo_config.get("follow_renames", False)
_commit_limit = cfg.get("commit_limit")
_follow_renames = cfg.get("follow_renames", False)
git_indexer = GitIndexer(
repo_path,
commit_limit=_commit_limit,
Expand Down Expand Up @@ -329,7 +332,7 @@ def update_command(

# Generate affected pages
assembler = ContextAssembler(config)
generator = PageGenerator(provider, assembler, config)
generator = PageGenerator(provider, assembler, config, language=config.language)
repo_name = repo_path.name

generated_pages = run_async(
Expand Down
2 changes: 1 addition & 1 deletion packages/core/src/repowise/core/generation/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class GenerationConfig:
max_pages_pct: float = 0.10 # hard cap: total pages ≤ max(50, N_files * this)
jobs_dir: str = ".repowise/jobs"
large_file_source_pct: float = 0.4 # use structural summary when source tokens > budget * this

language: str = "en"

# ---------------------------------------------------------------------------
# GeneratedPage
Expand Down
56 changes: 48 additions & 8 deletions packages/core/src/repowise/core/generation/page_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,24 @@
compute_page_id,
compute_source_hash,
)
# Language name mapping for prompt clarity
_LANGUAGE_NAMES = {
"en": "English",
"ru": "Russian",
"es": "Spanish",
"fr": "French",
"de": "German",
"zh": "Chinese",
"ja": "Japanese",
"ko": "Korean",
"it": "Italian",
"pt": "Portuguese",
"nl": "Dutch",
"pl": "Polish",
"tr": "Turkish",
"ar": "Arabic",
"hi": "Hindi",
}

log = structlog.get_logger(__name__)

Expand Down Expand Up @@ -135,12 +153,14 @@ def __init__(
assembler: ContextAssembler,
config: GenerationConfig,
jinja_env: jinja2.Environment | None = None,
vector_store: Any | None = None, # VectorStore | None
vector_store: Any | None = None,
language: str = "en",
) -> None:
self._provider = provider
self._assembler = assembler
self._config = config
self._vector_store = vector_store
self._language = language
self._cache: dict[str, GeneratedResponse] = {}

if jinja_env is None:
Expand All @@ -152,7 +172,6 @@ def __init__(
autoescape=False,
)
self._jinja_env = jinja_env

# ------------------------------------------------------------------
# Per-type generation methods
# ------------------------------------------------------------------
Expand Down Expand Up @@ -921,21 +940,41 @@ async def _generate_file_page_from_ctx(
)
page.metadata["hallucination_warnings"] = hal_warnings
return page

async def _call_provider(
self,
page_type: str,
user_prompt: str,
request_id: str,
) -> GeneratedResponse:
"""Call the provider with caching."""
key = self._compute_cache_key(page_type, user_prompt)
if self._config.cache_enabled and key in self._cache:
log.debug("Cache hit", page_type=page_type, key=key[:8])
return self._cache[key]

base_system = SYSTEM_PROMPTS[page_type]

# Validate and sanitize language
lang_code = self._language.lower().strip() if self._language else "en"
# Remove any newlines or control characters (prevent prompt injection)
lang_code = ''.join(ch for ch in lang_code if ch.isalnum() or ch == '_')
if lang_code not in _LANGUAGE_NAMES:
log.warning(f"Unknown language code '{lang_code}', falling back to English")
lang_code = "en"
lang_name = _LANGUAGE_NAMES.get(lang_code, "English")

if lang_code != "en":
language_instruction = (
f"Generate all documentation content in {lang_name}. "
"Keep all code, file paths, and symbol names in their original form. "
"Do not translate them.\n\n"
)
system_prompt = language_instruction + base_system
else:
system_prompt = base_system

response = await self._provider.generate(
SYSTEM_PROMPTS[page_type],
system_prompt,
user_prompt,
max_tokens=self._config.max_tokens,
temperature=self._config.temperature,
Expand All @@ -946,10 +985,10 @@ async def _call_provider(
self._cache[key] = response

return response

def _compute_cache_key(self, page_type: str, user_prompt: str) -> str:
"""Return SHA256(model + page_type + user_prompt) as cache key."""
raw = f"{self._provider.model_name}:{page_type}:{user_prompt}"
"""Return SHA256(model + language + page_type + user_prompt) as cache key."""
raw = f"{self._provider.model_name}:{self._language}:{page_type}:{user_prompt}"
return hashlib.sha256(raw.encode()).hexdigest()

def _build_generated_page(
Expand Down Expand Up @@ -1299,6 +1338,7 @@ def _validate_symbol_references(
continue
# Check against known names
base = ref.split(".")[-1]

if ref in known or base in known:
continue
# Skip if the ref is a substring of any known symbol (covers partial
Expand Down
16 changes: 13 additions & 3 deletions packages/core/src/repowise/core/pipeline/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ async def _ingestion_stage() -> tuple:
concurrency=concurrency,
progress=progress,
resume=resume,
generation_config=config,
)

# ---- Execution flow tracing -----------------------------------------------
Expand Down Expand Up @@ -721,6 +722,7 @@ async def run_generation(
vector_store: Any | None,
concurrency: int,
progress: ProgressCallback | None,
generation_config: Any,
resume: bool = False,
cost_tracker: Any | None = None,
) -> list[Any]:
Expand All @@ -741,7 +743,9 @@ async def run_generation(
if cost_tracker is not None and llm_client is not None and hasattr(llm_client, "_cost_tracker"):
llm_client._cost_tracker = cost_tracker

config = GenerationConfig(max_concurrency=concurrency)
# Create a new config based on the passed generation_config, but with desired max_concurrency
from dataclasses import replace
config = replace(generation_config, max_concurrency=concurrency)
assembler = ContextAssembler(config)

# Resolve embedder and vector store
Expand All @@ -750,8 +754,6 @@ async def run_generation(
if vector_store is None:
vector_store = InMemoryVectorStore(embedder_impl)

generator = PageGenerator(llm_client, assembler, config, vector_store=vector_store)

# Job system — use a temp-like dir under repo_path for checkpoints
jobs_dir = repo_path / ".repowise" / "jobs"
jobs_dir.mkdir(parents=True, exist_ok=True)
Expand All @@ -778,6 +780,14 @@ def on_total_known(total: int) -> None:
if progress:
progress.on_phase_start("generation", total)

generator = PageGenerator(
llm_client,
assembler,
config,
vector_store=vector_store,
language=config.language,
)

generated_pages = await generator.generate_all(
parsed_files,
source_map,
Expand Down