diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e7861a..59f9799 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -23,6 +23,7 @@ jobs: FASTEMBED_CACHE_PATH: ${{ github.workspace }}/.cache/fastembed permissions: contents: read + actions: write steps: - name: Checkout diff --git a/.github/workflows/deploy-pazaakworld.yml b/.github/workflows/deploy-pazaakworld.yml index 334d7be..45b798f 100644 --- a/.github/workflows/deploy-pazaakworld.yml +++ b/.github/workflows/deploy-pazaakworld.yml @@ -97,6 +97,12 @@ jobs: exit 1 fi + - name: Verify Trask public API health (worker + upstream) + if: hashFiles('apps/holocron-web/package.json') != '' + env: + TRASK_API_BASE: ${{ vars.TRASK_API_BASE }} + run: bash scripts/check_trask_public_api.sh + - name: Build Holocron web (Trask Q&A SPA) if: hashFiles('apps/holocron-web/package.json') != '' env: diff --git a/.github/workflows/trask-worker.yml b/.github/workflows/trask-worker.yml index b348760..b7e69ba 100644 --- a/.github/workflows/trask-worker.yml +++ b/.github/workflows/trask-worker.yml @@ -166,3 +166,9 @@ jobs: deploy_args+=(--var "TRASK_RESEARCHWIZARD_BASE_URL:${TRASK_RESEARCHWIZARD_BASE_URL}") fi pnpm dlx wrangler@4.87.0 deploy "${deploy_args[@]}" + + - name: Verify deployed worker health + if: steps.cf_check.outputs.configured == 'true' + env: + TRASK_API_BASE: ${{ vars.TRASK_API_BASE != '' && vars.TRASK_API_BASE || 'https://trask-worker.bocloud.workers.dev' }} + run: bash scripts/check_trask_public_api.sh diff --git a/apps/holocron-web/public/holocron/holocron-artifact.png b/apps/holocron-web/public/holocron/holocron-artifact.png index a4ca2d6..67b0022 100644 Binary files a/apps/holocron-web/public/holocron/holocron-artifact.png and b/apps/holocron-web/public/holocron/holocron-artifact.png differ diff --git a/apps/holocron-web/src/App.tsx b/apps/holocron-web/src/App.tsx index 72d6ce9..17d6302 100644 --- a/apps/holocron-web/src/App.tsx +++ b/apps/holocron-web/src/App.tsx @@ -6,6 +6,7 @@ import { HolocronModelPicker } from '@/components/HolocronModelPicker' import { SourceWeightsDialog } from '@/components/SourceWeightsDialog' import { KeyboardShortcutsDialog } from '@/components/KeyboardShortcutsDialog' import { TopNav, type HolocronSessionUi } from '@/components/TopNav' +import { TraskBackendStatus } from '@/components/TraskBackendStatus' import { HolocronGlyph } from '@/components/HolocronGlyph' import { HolocronSanctum, @@ -44,12 +45,14 @@ import { traskListModels, traskLogout, traskPollIterationSignal, + traskErrorMessageFromUnknown, traskUsesSameOriginApi, type TraskHistoryLiveEventDto, type TraskHistoryRecordDto, type TraskSessionDto, } from '@/lib/trask-api' import { priorUserQuestionsFromOtherThreads } from '@/lib/starter-suggestions' +import { holocronAssetUrl } from '@/lib/asset-url' const CONVERSATIONS_KEY = 'qa-conversations-v2' const LEGACY_CONVERSATIONS_KEY = 'qa-conversations' @@ -58,6 +61,8 @@ const RESEARCH_RETRY_BASE_MS = 5_000 const RESEARCH_RETRY_MAX_MS = 90_000 /** ~2.5 min of missing thread rows before re-dispatching (research can run up to ~90s). */ const RESEARCH_POLL_FAILURE_GIVE_UP = 48 +/** Stop silent retries and surface a failed assistant message when the API stays unreachable. */ +const RESEARCH_CONNECTION_FAILURE_MAX_ATTEMPTS = 8 const SIDEBAR_WIDTH_MIN = 260 const SIDEBAR_WIDTH_MAX = 520 @@ -692,6 +697,11 @@ function App() { const mobileSidebarToggleButtonRef = useRef(null) const lastFocusedElementRef = useRef(null) + useEffect(() => { + const artifactUrl = holocronAssetUrl('holocron/holocron-artifact.png') + document.documentElement.style.setProperty('--holocron-artifact-url', `url("${artifactUrl}")`) + }, []) + const detachFromBottom = useCallback(() => { shouldStickToBottomRef.current = false if (!scrollRef.current) return @@ -1538,7 +1548,31 @@ function App() { replaceResearchAssistantMessage(job, createFailedMessageFromTraskRecord(record, job.queryType)) setResearchJobs((current) => normalizeResearchJobs(current).filter((candidate) => candidate.clientId !== job.clientId)) }, - [activeConversationId, replaceResearchAssistantMessage, setResearchJobs], + [replaceResearchAssistantMessage, setResearchJobs], + ) + + const failResearchJobFromConnectionError = useCallback( + (job: HolocronResearchJob, errorMessage: string) => { + const nowIso = new Date().toISOString() + const record: TraskHistoryRecordDto = { + queryId: job.serverQueryId ?? job.clientId, + threadId: job.threadId, + userId: 'holocron-web', + query: job.question, + status: 'failed', + answer: null, + sources: [], + error: errorMessage, + createdAt: new Date(job.createdAt).toISOString(), + completedAt: nowIso, + liveTrace: [ + { at: nowIso, phase: 'error', detail: errorMessage }, + { at: nowIso, phase: 'dispatch', detail: 'Could not reach the Holocron research API.' }, + ], + } + failResearchJob(job, record) + }, + [failResearchJob], ) const completeResearchJob = useCallback( @@ -1722,10 +1756,24 @@ function App() { pollFailures: 0, nextAttemptAt: Date.now() + 1_500, }) - } catch { - if (!cancelled) { - retryLater(job) + } catch (err) { + if (cancelled || !isJobCurrent()) return + const errorMessage = traskErrorMessageFromUnknown(err) + if (job.attemptCount >= RESEARCH_CONNECTION_FAILURE_MAX_ATTEMPTS) { + failResearchJobFromConnectionError(job, errorMessage) + return } + replaceResearchAssistantMessage(job, createResearchLoadingMessage( + job.assistantMessageId, + job.question, + job.createdAt, + job.queryType, + [ + localResearchStep('queued', 'Persisted locally; continuing in the background.'), + localResearchStep('retry', errorMessage), + ], + )) + retryLater(job) } finally { researchWorkersRef.current.delete(job.clientId) researchConversationWorkersRef.current.delete(job.conversationId) @@ -1762,6 +1810,7 @@ function App() { activeConversationId, completeResearchJob, failResearchJob, + failResearchJobFromConnectionError, replaceResearchAssistantMessage, researchJobs, setResearchJobs, @@ -2010,6 +2059,7 @@ function App() { return (
+
diff --git a/apps/holocron-web/src/components/HolocronGlyph.tsx b/apps/holocron-web/src/components/HolocronGlyph.tsx index a2b289c..eb8cb32 100644 --- a/apps/holocron-web/src/components/HolocronGlyph.tsx +++ b/apps/holocron-web/src/components/HolocronGlyph.tsx @@ -1,5 +1,6 @@ import { Sparkle } from '@phosphor-icons/react' import { useState } from 'react' +import { holocronAssetUrl } from '@/lib/asset-url' type HolocronGlyphProps = { variant: 'header' | 'hero' @@ -28,7 +29,7 @@ export function HolocronGlyph({ variant, className = '' }: HolocronGlyphProps) { return ( setUseFallback(true)} diff --git a/apps/holocron-web/src/components/HolocronSanctum.tsx b/apps/holocron-web/src/components/HolocronSanctum.tsx index 8fe7d6a..ac9e65a 100644 --- a/apps/holocron-web/src/components/HolocronSanctum.tsx +++ b/apps/holocron-web/src/components/HolocronSanctum.tsx @@ -1,7 +1,8 @@ import { type CSSProperties, useEffect, useMemo, useRef, useState } from 'react' +import { holocronAssetUrl } from '@/lib/asset-url' import { fluxTokensFromQuery, holocronMulberry32 } from '@/lib/holocron-live' -const HOLOCRON_ARTIFACT_SRC = '/holocron/holocron-artifact.png' +const HOLOCRON_ARTIFACT_SRC = holocronAssetUrl('holocron/holocron-artifact.png') export type HolocronActivityMood = 'idle' | 'retrieve' | 'success' | 'warn' | 'hot' diff --git a/apps/holocron-web/src/components/Message.tsx b/apps/holocron-web/src/components/Message.tsx index 712da4e..194759a 100644 --- a/apps/holocron-web/src/components/Message.tsx +++ b/apps/holocron-web/src/components/Message.tsx @@ -7,6 +7,7 @@ import { Collapsible, CollapsibleContent, CollapsibleTrigger } from '@/component import { CaretDown, CaretUp, Link as LinkIcon, Copy, Check, Clock, MagnifyingGlass, CheckCircle, XCircle, Download, Database, PencilSimple, ArrowsClockwise, ArrowClockwise } from '@phosphor-icons/react' import { motion, AnimatePresence, useReducedMotion } from 'framer-motion' import { toast } from 'sonner' +import { buildAnswerPresentation, sanitizeAnswerParagraph, sourceKey } from '@/lib/answer-presentation' interface MessageProps { message: MessageType @@ -159,6 +160,8 @@ function researchPhaseLabel(phaseRaw: string): string { return 'Composing' case 'retry': return 'Retrying' + case 'error': + return 'Connection error' default: return phase ? phase[0]!.toUpperCase() + phase.slice(1) : 'Processing' } @@ -270,322 +273,8 @@ function failureReasonFromResearchSteps(steps: readonly MessageResearchStep[]): return null } -interface DisplaySource extends Source { - index: number - hostname: string -} - -interface AnswerPresentation { - answerText: string - hasAnswerText: boolean - isSourceOnly: boolean - sources: DisplaySource[] - sourceByIndex: Map -} - -const SOURCE_HEADING_PATTERN = /^\s*sources\s*:?\s*$/i const CITATION_PATTERN = /\[(\d{1,3})\]/g -function isHttpUrlSchemeTerminator(ch: string): boolean { - return ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === ')' || ch === ']' || ch === '>' -} - -/** Collect http(s) URLs without regex backtracking (CodeQL-safe). */ -function extractHttpUrls(text: string): string[] { - const urls: string[] = [] - const lower = text.toLowerCase() - let i = 0 - while (i < text.length) { - const httpsIdx = lower.indexOf('https://', i) - const httpIdx = lower.indexOf('http://', i) - const start = - httpsIdx === -1 ? httpIdx : httpIdx === -1 ? httpsIdx : Math.min(httpsIdx, httpIdx) - if (start === -1) break - let end = start - while (end < text.length && !isHttpUrlSchemeTerminator(text[end]!)) end += 1 - urls.push(text.slice(start, end)) - i = end - } - return urls -} - -function stripHttpUrls(text: string): string { - const lower = text.toLowerCase() - let out = '' - let i = 0 - while (i < text.length) { - const httpsIdx = lower.indexOf('https://', i) - const httpIdx = lower.indexOf('http://', i) - const start = - httpsIdx === -1 ? httpIdx : httpIdx === -1 ? httpsIdx : Math.min(httpsIdx, httpIdx) - if (start === -1) { - out += text.slice(i) - break - } - out += text.slice(i, start) - let end = start - while (end < text.length && !isHttpUrlSchemeTerminator(text[end]!)) end += 1 - i = end - } - return out -} - -/** Replace `[label](https://…)` with `label` using linear scanning (avoids nested-quantifier regex). */ -function stripMarkdownHttpLinks(text: string): string { - let result = '' - let i = 0 - while (i < text.length) { - if (text[i] !== '[') { - result += text[i] - i += 1 - continue - } - const closeBracket = text.indexOf(']', i + 1) - if (closeBracket === -1 || text[closeBracket + 1] !== '(') { - result += text[i] - i += 1 - continue - } - const closeParen = text.indexOf(')', closeBracket + 2) - if (closeParen === -1) { - result += text[i] - i += 1 - continue - } - const url = text.slice(closeBracket + 2, closeParen) - if (url.startsWith('http://') || url.startsWith('https://')) { - result += text.slice(i + 1, closeBracket) - i = closeParen + 1 - } else { - result += text.slice(i, closeParen + 1) - i = closeParen + 1 - } - } - return result -} - -function parseBracketCitationLine(line: string): { index: number; rest: string } | null { - if (!line.startsWith('[')) return null - const close = line.indexOf(']', 1) - if (close <= 1) return null - const num = line.slice(1, close) - if (!/^\d{1,3}$/.test(num)) return null - const rest = line.slice(close + 1).trimStart() - return { index: Number(num), rest } -} - -function parseNumberedSourceLine(line: string): { index: number; rest: string } | null { - const match = /^(\d{1,3})\.\s+/u.exec(line) - if (!match) return null - return { index: Number(match[1]), rest: line.slice(match[0].length) } -} - -function cleanUrl(raw: string): string { - return raw.trim().replace(/[.,;:]+$/g, '') -} - -function sourceHostname(url: string): string { - try { - return new URL(url).hostname.replace(/^www\./, '') - } catch { - return url.replace(/^https?:\/\//, '').split('/')[0] || url - } -} - -function formatSourceDisplayName(name: string, url: string): string { - if (!url) return name - try { - const parsed = new URL(url) - const host = parsed.hostname.replace(/^www\./, '') - const genericName = - !name.trim() - || name.trim().toLowerCase() === host.toLowerCase() - || name.trim().toLowerCase() === 'github.com' - - if (host === 'github.com') { - const path = decodeURIComponent(parsed.pathname) - const hash = parsed.hash && /^#L/i.test(parsed.hash) ? parsed.hash : '' - const blobMatch = path.match(/\/blob\/[^/]+\/(.+)$/i) - if (blobMatch?.[1]) { - const shortPath = blobMatch[1].replace(/\/+$/, '').split('/').slice(-2).join('/') || blobMatch[1] - return `${shortPath}${hash}` - } - const wikiMatch = path.match(/\/wiki\/(.+)$/i) - if (wikiMatch?.[1]) { - const page = wikiMatch[1].replace(/\/+$/, '') - return `wiki: ${page.split('/').pop() ?? page}${hash}` - } - const repoMatch = path.match(/^\/([^/]+)\/([^/]+)\/?$/i) - if (repoMatch?.[2]) { - return `${repoMatch[2]}${hash}` - } - } - - if (genericName) { - const pathSegments = parsed.pathname.replace(/\/+$/, '').split('/').filter(Boolean) - if (pathSegments.length > 1) { - return `${pathSegments.slice(-2).join('/')}`.replace(/[-_]+/g, ' ') - } - return host - } - } catch { - /* keep name */ - } - return name -} - -function sourceKey(source: Pick): string { - const url = source.url?.trim().toLowerCase() - if (url) return `url:${url}` - return `name:${source.name.trim().toLowerCase()}` -} - -function stripSourceNoise(text: string): string { - let t = stripMarkdownHttpLinks(text) - t = stripHttpUrls(t) - return t - .replace(/[()[\]]+/g, ' ') - .replace(/\s{2,}/g, ' ') - .trim() -} - -function splitAnswerFromSourceSection(content: string): { answerText: string; sourceText: string } { - const normalized = content.replace(/\r\n/g, '\n').trim() - const lines = normalized.split('\n') - const sourceHeadingIndex = lines.findIndex((line) => SOURCE_HEADING_PATTERN.test(line)) - - if (sourceHeadingIndex === -1) { - return { answerText: normalized, sourceText: '' } - } - - return { - answerText: lines.slice(0, sourceHeadingIndex).join('\n').trim(), - sourceText: lines.slice(sourceHeadingIndex + 1).join('\n').trim(), - } -} - -function parseSourcesFromText(sourceText: string): DisplaySource[] { - if (!sourceText.trim()) return [] - - const entries: Array<{ index: number; body: string[] }> = [] - for (const rawLine of sourceText.split('\n')) { - const line = rawLine.trim() - if (!line) continue - - const citation = parseBracketCitationLine(line) - if (citation) { - entries.push({ index: citation.index, body: [citation.rest] }) - continue - } - - const numbered = parseNumberedSourceLine(line) - if (numbered) { - entries.push({ index: numbered.index, body: [numbered.rest] }) - continue - } - - const lastEntry = entries.length > 0 ? entries[entries.length - 1] : undefined - lastEntry?.body.push(line) - } - - return entries - .map((entry) => { - const body = entry.body.join(' ').trim() - const urls = extractHttpUrls(body) - const url = cleanUrl(urls[0] ?? '') - const name = formatSourceDisplayName(stripSourceNoise(body) || '', url) - || (url ? sourceHostname(url) : `Source ${entry.index}`) - - if (!url && !name) return null - return { - index: entry.index, - name, - url, - confidence: 1, - hostname: url ? sourceHostname(url) : '', - } satisfies DisplaySource - }) - .filter((source): source is DisplaySource => Boolean(source)) -} - -function buildAnswerPresentation(content: string, explicitSources: Source[] = []): AnswerPresentation { - const { answerText, sourceText } = splitAnswerFromSourceSection(content) - let parsedSources = parseSourcesFromText(sourceText) - let visibleAnswerText = answerText - - if (sourceText && parsedSources.length === 0) { - visibleAnswerText = content.replace(/\r\n/g, '\n').trim() - parsedSources = [] - } - const merged: DisplaySource[] = [] - const sourceByKey = new Map() - - const addSource = (source: DisplaySource) => { - const key = sourceKey(source) - const existingIndex = sourceByKey.get(key) - if (existingIndex !== undefined) { - const existing = merged[existingIndex] - if (existing && !existing.url && source.url) { - merged[existingIndex] = source - } - return - } - - sourceByKey.set(key, merged.length) - merged.push(source) - } - - parsedSources.forEach((source) => { - addSource(source) - }) - - explicitSources.forEach((source, idx) => { - const candidate: DisplaySource = { - ...source, - index: idx + 1, - url: cleanUrl(source.url), - hostname: source.url ? sourceHostname(source.url) : '', - } - const explicitKey = sourceKey(candidate) - const existingByKey = sourceByKey.get(explicitKey) - const existingByIndex = merged.findIndex((existing) => existing.index === candidate.index) - const existingIndex = existingByKey ?? (existingByIndex >= 0 ? existingByIndex : undefined) - - if (existingIndex !== undefined) { - const existing = merged[existingIndex] - if (!existing) return - merged[existingIndex] = { - ...existing, - name: existing.name || candidate.name, - url: existing.url || candidate.url, - hostname: existing.hostname || candidate.hostname, - } - return - } - - if (parsedSources.length === 0) { - addSource(candidate) - } - }) - - const sources = merged.map((source, idx) => ({ - ...source, - index: Number.isFinite(source.index) && source.index > 0 ? source.index : idx + 1, - name: formatSourceDisplayName(source.name, source.url), - hostname: source.hostname || (source.url ? sourceHostname(source.url) : ''), - })) - const sourceByIndex = new Map(sources.map((source) => [source.index, source])) - const normalizedAnswerText = visibleAnswerText.trim() - - return { - answerText: normalizedAnswerText, - hasAnswerText: normalizedAnswerText.length > 0, - isSourceOnly: !normalizedAnswerText && sources.length > 0, - sources, - sourceByIndex, - } -} - function MessageView({ message, onToggleExpand, @@ -971,10 +660,12 @@ function MessageView({ {renderProvenanceStrip()} {blocks.length > 0 ? blocks.map((block, idx) => (

- {renderInlineCitations(block)} + {renderInlineCitations(sanitizeAnswerParagraph(block))}

)) : ( -

{renderInlineCitations(answerPresentation.answerText)}

+

+ {renderInlineCitations(sanitizeAnswerParagraph(answerPresentation.answerText))} +

)}
) diff --git a/apps/holocron-web/src/components/TopNav.tsx b/apps/holocron-web/src/components/TopNav.tsx index 6fa8e78..d68b236 100644 --- a/apps/holocron-web/src/components/TopNav.tsx +++ b/apps/holocron-web/src/components/TopNav.tsx @@ -53,7 +53,7 @@ export function TopNav({ holocronSession, onHolocronLogout }: TopNavProps) { Home (null) + const [fetchError, setFetchError] = useState(null) + const [checkedAt, setCheckedAt] = useState(null) + + const refresh = async () => { + setFetchError(null) + try { + const next = await traskFetchHealth() + setHealth(next) + setCheckedAt(Date.now()) + } catch (err) { + setHealth(null) + setFetchError(err instanceof Error ? err.message : 'Health check failed.') + setCheckedAt(Date.now()) + } + } + + useEffect(() => { + void refresh() + const timer = window.setInterval(() => void refresh(), 45_000) + return () => window.clearInterval(timer) + }, [origin]) + + if (!origin) return null + + const kind = classifyHealth(health, fetchError) + if (kind === 'ok') return null + + const tone = + kind === 'loading' + ? 'border-border/50 bg-muted/30 text-muted-foreground' + : kind === 'degraded' + ? 'border-yellow-500/40 bg-yellow-500/10 text-yellow-100' + : 'border-destructive/50 bg-destructive/10 text-destructive' + + const title = + kind === 'loading' + ? 'Checking Holocron research API…' + : kind === 'degraded' + ? 'Holocron research API is misconfigured' + : 'Holocron research API is unreachable' + + const detail = + fetchError + ?? (health?.upstreamDetail + ? health.upstreamDetail.replace(/\s+/g, ' ').trim().slice(0, 240) + : health?.upstream + ? `Proxy mode is active but the upstream Trask HTTP host is not healthy (${health.upstream}).` + : 'The configured API origin did not return a healthy response.') + + return ( +
+
+ {kind === 'loading' ? ( + + ) : kind === 'degraded' ? ( + + ) : ( + + )} +
+

{title}

+

{detail}

+

API: {origin}

+ {health?.upstream ? ( +

Upstream: {health.upstream}

+ ) : null} + {checkedAt ? ( +

+ Last check: {new Date(checkedAt).toLocaleTimeString()} +

+ ) : null} +
+ +
+
+ ) +} diff --git a/apps/holocron-web/src/index.css b/apps/holocron-web/src/index.css index 53e07c7..b2fe809 100644 --- a/apps/holocron-web/src/index.css +++ b/apps/holocron-web/src/index.css @@ -88,7 +88,7 @@ .holocron-atmosphere__panel { position: absolute; inset: 0; - background: url("/holocron/holocron-artifact.png") center / cover no-repeat; + background: var(--holocron-artifact-url, none) center / cover no-repeat; opacity: 0.22; mix-blend-mode: normal; filter: none; diff --git a/apps/holocron-web/src/lib/answer-presentation.ts b/apps/holocron-web/src/lib/answer-presentation.ts new file mode 100644 index 0000000..ac0f281 --- /dev/null +++ b/apps/holocron-web/src/lib/answer-presentation.ts @@ -0,0 +1,463 @@ +/** + * Pure helpers for splitting Trask answer markdown into visible claims vs Sources. + * Kept separate from Message.tsx for unit testing without React. + */ + +export interface SourceLike { + name: string + url: string + confidence?: number +} + +export interface DisplaySource extends SourceLike { + index: number + hostname: string +} + +export interface AnswerPresentation { + answerText: string + hasAnswerText: boolean + isSourceOnly: boolean + sources: DisplaySource[] + sourceByIndex: Map +} + +const SOURCE_HEADING_PATTERN = /^\s*sources\s*:?\s*$/i + +function isHttpUrlSchemeTerminator(ch: string): boolean { + return ch === ' ' || ch === '\t' || ch === '\n' || ch === '\r' || ch === ')' || ch === ']' || ch === '>' +} + +/** Collect http(s) URLs without regex backtracking (CodeQL-safe). */ +export function extractHttpUrls(text: string): string[] { + const urls: string[] = [] + const lower = text.toLowerCase() + let i = 0 + while (i < text.length) { + const httpsIdx = lower.indexOf('https://', i) + const httpIdx = lower.indexOf('http://', i) + const start = + httpsIdx === -1 ? httpIdx : httpIdx === -1 ? httpsIdx : Math.min(httpsIdx, httpIdx) + if (start === -1) break + let end = start + while (end < text.length && !isHttpUrlSchemeTerminator(text[end]!)) end += 1 + urls.push(text.slice(start, end)) + i = end + } + return urls +} + +function stripHttpUrls(text: string): string { + const lower = text.toLowerCase() + let out = '' + let i = 0 + while (i < text.length) { + const httpsIdx = lower.indexOf('https://', i) + const httpIdx = lower.indexOf('http://', i) + const start = + httpsIdx === -1 ? httpIdx : httpIdx === -1 ? httpsIdx : Math.min(httpsIdx, httpIdx) + if (start === -1) { + out += text.slice(i) + break + } + out += text.slice(i, start) + let end = start + while (end < text.length && !isHttpUrlSchemeTerminator(text[end]!)) end += 1 + i = end + } + return out +} + +function skipAsciiWhitespace(text: string, index: number): number { + let i = index + while (i < text.length && (text[i] === ' ' || text[i] === '\t' || text[i] === '\n' || text[i] === '\r')) { + i += 1 + } + return i +} + +/** Replace `[label](https://…)` with `label`; tolerates whitespace between `]` and `(`. */ +export function stripMarkdownHttpLinks(text: string): string { + let result = '' + let i = 0 + while (i < text.length) { + if (text[i] === '!' && text[i + 1] === '[') { + const closeBracket = text.indexOf(']', i + 2) + if (closeBracket === -1) { + result += text[i] + i += 1 + continue + } + const afterBracket = skipAsciiWhitespace(text, closeBracket + 1) + if (text[afterBracket] !== '(') { + result += text[i] + i += 1 + continue + } + const closeParen = text.indexOf(')', afterBracket + 1) + if (closeParen === -1) { + result += text[i] + i += 1 + continue + } + const url = text.slice(afterBracket + 1, closeParen) + if (url.startsWith('http://') || url.startsWith('https://')) { + i = closeParen + 1 + continue + } + result += text.slice(i, closeParen + 1) + i = closeParen + 1 + continue + } + + if (text[i] !== '[') { + result += text[i] + i += 1 + continue + } + const closeBracket = text.indexOf(']', i + 1) + if (closeBracket === -1) { + result += text[i] + i += 1 + continue + } + const afterBracket = skipAsciiWhitespace(text, closeBracket + 1) + if (text[afterBracket] !== '(') { + result += text[i] + i += 1 + continue + } + const closeParen = text.indexOf(')', afterBracket + 1) + if (closeParen === -1) { + result += text[i] + i += 1 + continue + } + const url = text.slice(afterBracket + 1, closeParen) + if (url.startsWith('http://') || url.startsWith('https://')) { + const label = text.slice(i + 1, closeBracket).trim() + if (label && label !== '...') { + result += label + } + i = closeParen + 1 + } else { + result += text.slice(i, closeParen + 1) + i = closeParen + 1 + } + } + return result +} + +const CITATION_PLACEHOLDER_PREFIX = '\uE000CIT' +const CITATION_PLACEHOLDER_SUFFIX = '\uE001' + +/** Visible answer paragraphs: strip markdown links/images; preserve `[n]` citation markers. */ +export function sanitizeAnswerParagraph(text: string): string { + const citations: string[] = [] + const withPlaceholders = text.replace(/\[(\d{1,3})\]/g, (full) => { + citations.push(full) + return `${CITATION_PLACEHOLDER_PREFIX}${citations.length - 1}${CITATION_PLACEHOLDER_SUFFIX}` + }) + + let t = stripMarkdownHttpLinks(withPlaceholders) + t = stripHttpUrls(t) + t = t + .replace(/\)\s*\]/g, '') + .replace(/\[\s*\]/g, '') + .replace(/[()]+/g, ' ') + .replace(/\s{2,}/g, ' ') + .trim() + + for (let i = 0; i < citations.length; i += 1) { + const token = `${CITATION_PLACEHOLDER_PREFIX}${i}${CITATION_PLACEHOLDER_SUFFIX}` + t = t.split(token).join(citations[i] ?? '') + } + return t +} + +function parseBracketCitationLine(line: string): { index: number; rest: string } | null { + if (!line.startsWith('[')) return null + const close = line.indexOf(']', 1) + if (close <= 1) return null + const num = line.slice(1, close) + if (!/^\d{1,3}$/.test(num)) return null + const rest = line.slice(close + 1).trimStart() + return { index: Number(num), rest } +} + +function parseNumberedSourceLine(line: string): { index: number; rest: string } | null { + const match = /^(\d{1,3})\.\s+/u.exec(line) + if (!match) return null + return { index: Number(match[1]), rest: line.slice(match[0].length) } +} + +function cleanUrl(raw: string): string { + return raw.trim().replace(/[.,;:]+$/g, '') +} + +function sourceHostname(url: string): string { + try { + return new URL(url).hostname.replace(/^www\./, '') + } catch { + return url.replace(/^https?:\/\//, '').split('/')[0] || url + } +} + +function formatSourceDisplayName(name: string, url: string): string { + if (!url) return name + try { + const parsed = new URL(url) + const host = parsed.hostname.replace(/^www\./, '') + const genericName = + !name.trim() + || name.trim().toLowerCase() === host.toLowerCase() + || name.trim().toLowerCase() === 'github.com' + + if (host === 'github.com') { + const path = decodeURIComponent(parsed.pathname) + const hash = parsed.hash && /^#L/i.test(parsed.hash) ? parsed.hash : '' + const blobMatch = path.match(/\/blob\/[^/]+\/(.+)$/i) + if (blobMatch?.[1]) { + const shortPath = blobMatch[1].replace(/\/+$/, '').split('/').slice(-2).join('/') || blobMatch[1] + return `${shortPath}${hash}` + } + const wikiMatch = path.match(/\/wiki\/(.+)$/i) + if (wikiMatch?.[1]) { + const page = wikiMatch[1].replace(/\/+$/, '') + return `wiki: ${page.split('/').pop() ?? page}${hash}` + } + const repoMatch = path.match(/^\/([^/]+)\/([^/]+)\/?$/i) + if (repoMatch?.[2]) { + return `${repoMatch[2]}${hash}` + } + } + + if (genericName) { + const pathSegments = parsed.pathname.replace(/\/+$/, '').split('/').filter(Boolean) + if (pathSegments.length > 1) { + return `${pathSegments.slice(-2).join('/')}`.replace(/[-_]+/g, ' ') + } + return host + } + } catch { + /* keep name */ + } + return name +} + +export function sourceKey(source: Pick): string { + const url = source.url?.trim().toLowerCase() + if (url) return `url:${url}` + return `name:${source.name.trim().toLowerCase()}` +} + +function stripSourceNoise(text: string): string { + let t = stripMarkdownHttpLinks(text) + t = stripHttpUrls(t) + return t + .replace(/[()[\]]+/g, ' ') + .replace(/\s{2,}/g, ' ') + .trim() +} + +function isNumberedBibliographyLine(line: string): boolean { + const trimmed = line.trim() + if (!parseNumberedSourceLine(trimmed)) return false + return extractHttpUrls(trimmed).length > 0 || trimmed.length > 24 +} + +/** + * When the model omits a `Sources` heading, peel trailing `1. …` bibliography lines into the source block. + */ +export function peelEmbeddedNumberedSources(normalized: string): { answerText: string; sourceText: string } { + const lines = normalized.split('\n') + let firstNumbered = -1 + for (let i = 0; i < lines.length; i += 1) { + if (isNumberedBibliographyLine(lines[i] ?? '')) { + firstNumbered = i + break + } + } + if (firstNumbered < 0) { + return { answerText: normalized, sourceText: '' } + } + + if (firstNumbered === 0) { + let numberedCount = 0 + for (const line of lines) { + const trimmed = line.trim() + if (!trimmed) continue + if (parseNumberedSourceLine(trimmed)) numberedCount += 1 + } + if (numberedCount >= 2) { + return { answerText: '', sourceText: normalized } + } + return { answerText: normalized, sourceText: '' } + } + + let numberedCount = 0 + for (let i = firstNumbered; i < lines.length; i += 1) { + const line = (lines[i] ?? '').trim() + if (!line) continue + if (parseNumberedSourceLine(line)) { + numberedCount += 1 + continue + } + if (numberedCount >= 2) break + numberedCount = 0 + break + } + + if (numberedCount < 2) { + return { answerText: normalized, sourceText: '' } + } + + const answerLines = lines.slice(0, firstNumbered) + const sourceLines = lines.slice(firstNumbered) + const answerText = answerLines.join('\n').trim() + if (!answerText) { + return { answerText: '', sourceText: sourceLines.join('\n').trim() } + } + return { + answerText, + sourceText: sourceLines.join('\n').trim(), + } +} + +export function splitAnswerFromSourceSection(content: string): { answerText: string; sourceText: string } { + const normalized = content.replace(/\r\n/g, '\n').trim() + const lines = normalized.split('\n') + const sourceHeadingIndex = lines.findIndex((line) => SOURCE_HEADING_PATTERN.test(line)) + + if (sourceHeadingIndex === -1) { + return peelEmbeddedNumberedSources(normalized) + } + + return { + answerText: lines.slice(0, sourceHeadingIndex).join('\n').trim(), + sourceText: lines.slice(sourceHeadingIndex + 1).join('\n').trim(), + } +} + +function parseSourcesFromText(sourceText: string): DisplaySource[] { + if (!sourceText.trim()) return [] + + const entries: Array<{ index: number; body: string[] }> = [] + for (const rawLine of sourceText.split('\n')) { + const line = rawLine.trim() + if (!line) continue + + const citation = parseBracketCitationLine(line) + if (citation) { + entries.push({ index: citation.index, body: [citation.rest] }) + continue + } + + const numbered = parseNumberedSourceLine(line) + if (numbered) { + entries.push({ index: numbered.index, body: [numbered.rest] }) + continue + } + + const lastEntry = entries.length > 0 ? entries[entries.length - 1] : undefined + lastEntry?.body.push(line) + } + + return entries + .map((entry) => { + const body = entry.body.join(' ').trim() + const urls = extractHttpUrls(body) + const url = cleanUrl(urls[0] ?? '') + const name = formatSourceDisplayName(stripSourceNoise(body) || '', url) + || (url ? sourceHostname(url) : `Source ${entry.index}`) + + if (!url && !name) return null + return { + index: entry.index, + name, + url, + confidence: 1, + hostname: url ? sourceHostname(url) : '', + } satisfies DisplaySource + }) + .filter((source): source is DisplaySource => Boolean(source)) +} + +export function buildAnswerPresentation(content: string, explicitSources: SourceLike[] = []): AnswerPresentation { + const { answerText, sourceText } = splitAnswerFromSourceSection(content) + let parsedSources = parseSourcesFromText(sourceText) + let visibleAnswerText = answerText + + if (sourceText && parsedSources.length === 0) { + visibleAnswerText = content.replace(/\r\n/g, '\n').trim() + parsedSources = [] + } + const merged: DisplaySource[] = [] + const sourceByKey = new Map() + + const addSource = (source: DisplaySource) => { + const key = sourceKey(source) + const existingIndex = sourceByKey.get(key) + if (existingIndex !== undefined) { + const existing = merged[existingIndex] + if (existing && !existing.url && source.url) { + merged[existingIndex] = source + } + return + } + + sourceByKey.set(key, merged.length) + merged.push(source) + } + + parsedSources.forEach((source) => { + addSource(source) + }) + + explicitSources.forEach((source, idx) => { + const candidate: DisplaySource = { + ...source, + index: idx + 1, + url: cleanUrl(source.url), + hostname: source.url ? sourceHostname(source.url) : '', + confidence: source.confidence ?? 1, + } + const explicitKey = sourceKey(candidate) + const existingByKey = sourceByKey.get(explicitKey) + const existingByIndex = merged.findIndex((existing) => existing.index === candidate.index) + const existingIndex = existingByKey ?? (existingByIndex >= 0 ? existingByIndex : undefined) + + if (existingIndex !== undefined) { + const existing = merged[existingIndex] + if (!existing) return + merged[existingIndex] = { + ...existing, + name: existing.name || candidate.name, + url: existing.url || candidate.url, + hostname: existing.hostname || candidate.hostname, + } + return + } + + if (parsedSources.length === 0) { + addSource(candidate) + } + }) + + const sources = merged.map((source, idx) => ({ + ...source, + index: Number.isFinite(source.index) && source.index > 0 ? source.index : idx + 1, + name: formatSourceDisplayName(source.name, source.url), + hostname: source.hostname || (source.url ? sourceHostname(source.url) : ''), + })) + const sourceByIndex = new Map(sources.map((source) => [source.index, source])) + const normalizedAnswerText = visibleAnswerText.trim() + + return { + answerText: normalizedAnswerText, + hasAnswerText: normalizedAnswerText.length > 0, + isSourceOnly: !normalizedAnswerText && sources.length > 0, + sources, + sourceByIndex, + } +} diff --git a/apps/holocron-web/src/lib/asset-url.ts b/apps/holocron-web/src/lib/asset-url.ts new file mode 100644 index 0000000..32f0a03 --- /dev/null +++ b/apps/holocron-web/src/lib/asset-url.ts @@ -0,0 +1,7 @@ +/** Resolve a public asset path for GitHub Pages base paths (e.g. `/community-bots/qa-webui/`). */ +export function holocronAssetUrl(relativePath: string): string { + const base = import.meta.env.BASE_URL ?? '/' + const normalized = relativePath.replace(/^\//, '') + const prefix = base.endsWith('/') ? base : `${base}/` + return `${prefix}${normalized}` +} diff --git a/apps/holocron-web/src/lib/trask-api.ts b/apps/holocron-web/src/lib/trask-api.ts index d7a05f8..7f84fe5 100644 --- a/apps/holocron-web/src/lib/trask-api.ts +++ b/apps/holocron-web/src/lib/trask-api.ts @@ -54,10 +54,24 @@ export interface TraskModelOptionDto { recommended?: boolean } -function apiBase(): string { +export function traskApiOrigin(): string { return import.meta.env.VITE_TRASK_API_BASE?.replace(/\/+$/, '') ?? '' } +function apiBase(): string { + return traskApiOrigin() +} + +export interface TraskHealthDto { + ok: boolean + mode?: string + upstream?: string + upstreamReachable?: boolean + upstreamStatus?: number + upstreamDetail?: string + builtinFallback?: boolean +} + function authHeaders(apiKey?: string): Record { const headers: Record = { 'Content-Type': 'application/json', @@ -114,6 +128,39 @@ function mergeAbortSignals(a: AbortSignal, b: AbortSignal): AbortSignal { return c.signal } +type TraskApiErrorPayload = { + error?: string + hint?: string + upstream?: string + upstreamStatus?: number + upstreamDetail?: string +} + +function formatTraskApiError(data: TraskApiErrorPayload | null | undefined, status: number): string { + const parts: string[] = [] + if (data?.error) parts.push(data.error) + if (data?.upstreamStatus !== undefined) { + parts.push(`upstream HTTP ${data.upstreamStatus}`) + } + if (data?.upstream) parts.push(`upstream ${data.upstream}`) + if (data?.hint) parts.push(data.hint) + if (data?.upstreamDetail) { + const snippet = data.upstreamDetail.replace(/\s+/g, ' ').trim().slice(0, 160) + if (snippet) parts.push(snippet) + } + if (parts.length > 0) return parts.join(' — ') + return `Holocron API request failed (HTTP ${status}).` +} + +async function readTraskErrorBody(res: Response): Promise { + try { + const data = (await res.json()) as TraskApiErrorPayload + return formatTraskApiError(data, res.status) + } catch { + return `Holocron API request failed (HTTP ${res.status}).` + } +} + /** User-facing message for failed Trask HTTP calls (handles DOMException / TypeError). */ export function traskErrorMessageFromUnknown(error: unknown): string { const abortish = (name: string | undefined) => name === 'AbortError' || name === 'TimeoutError' @@ -155,6 +202,15 @@ export function traskUsesSameOriginApi(): boolean { return !apiBase() } +export async function traskFetchHealth(): Promise { + const res = await fetch(`${apiBase()}/healthz`, traskRequestInit(undefined, { method: 'GET' }, 12_000)) + const data = (await res.json().catch(() => ({}))) as TraskHealthDto & { error?: string } + if (!res.ok) { + throw new Error(data.error ?? (await readTraskErrorBody(res))) + } + return data +} + export async function traskFetchSession(): Promise { try { const res = await fetch(`${apiBase()}/api/trask/session`, traskRequestInit()) @@ -193,7 +249,7 @@ export async function traskGetThread( ) const data = (await res.json()) as { history?: TraskHistoryRecordDto[]; error?: string } if (!res.ok) { - throw new Error(data.error ?? `thread failed: ${res.status}`) + throw new Error(data.error ?? (await readTraskErrorBody(res))) } return data.history ?? [] } @@ -259,16 +315,15 @@ export async function traskAsk( method: 'POST', body: JSON.stringify(body), }, traskAskTimeoutMs())) - const data = (await res.json()) as { - error?: string + const data = (await res.json()) as TraskApiErrorPayload & { query?: TraskHistoryRecordDto } const record = data.query - if (!record) { - throw new Error(data.error ?? `ask failed: ${res.status}`) - } if (!res.ok && res.status !== 202) { - throw new Error(data.error ?? record.error ?? `ask failed: ${res.status}`) + throw new Error(data.error ?? record?.error ?? formatTraskApiError(data, res.status)) + } + if (!record) { + throw new Error(data.error ?? formatTraskApiError(data, res.status)) } if (record.status === 'failed') { throw new Error(record.error ?? 'Holocron research failed.') diff --git a/apps/holocron-web/src/main.tsx b/apps/holocron-web/src/main.tsx index de0ac2e..0b56023 100644 --- a/apps/holocron-web/src/main.tsx +++ b/apps/holocron-web/src/main.tsx @@ -1,6 +1,5 @@ import { createRoot } from 'react-dom/client' import { ErrorBoundary } from "react-error-boundary"; -import "@github/spark/spark" import App from './App.tsx' import { ErrorFallback } from './ErrorFallback.tsx' diff --git a/apps/holocron-web/src/spark-stub.ts b/apps/holocron-web/src/spark-stub.ts new file mode 100644 index 0000000..3aae40b --- /dev/null +++ b/apps/holocron-web/src/spark-stub.ts @@ -0,0 +1 @@ +/** No-op when Holocron is built without GitHub Spark hosting (`ENABLE_SPARK=0`). */ diff --git a/apps/holocron-web/vite.config.ts b/apps/holocron-web/vite.config.ts index 8039958..8e19b21 100644 --- a/apps/holocron-web/vite.config.ts +++ b/apps/holocron-web/vite.config.ts @@ -121,7 +121,10 @@ export default defineConfig({ ], resolve: { alias: { - '@': resolve(projectRoot, 'src') + '@': resolve(projectRoot, 'src'), + ...(enableSparkPlugin + ? {} + : { '@github/spark/spark': resolve(projectRoot, 'src/spark-stub.ts') }), } }, }); diff --git a/docs/evidence/2026-05-19-discord-ask-live-verify.md b/docs/evidence/2026-05-19-discord-ask-live-verify.md index eb766f6..788a44b 100644 --- a/docs/evidence/2026-05-19-discord-ask-live-verify.md +++ b/docs/evidence/2026-05-19-discord-ask-live-verify.md @@ -62,16 +62,20 @@ Footer: `11 passages · indexer 8787` --- -## Holocron browser MCP (expert queries, 2026-06-03) +## Holocron browser MCP (expert queries, 2026-06-04) -Origin: `http://127.0.0.1:4010` with fresh `?thread=` per query; suggestion buttons when Submit stayed disabled on cold threads. +Origin: `http://127.0.0.1:4010` with fresh `?thread=` per query; suggestion buttons on cold threads (Submit disabled until a question is chosen). + +TopNav verified via CDP: `https://openkotor.com/projects`, `/faq`, `/formats` (no `/#` hash routes). | Query | Thread suffix | Result | |-------|---------------|--------| -| TSLPatcher / 2DA+TLK | `...701` | PASS — grounded, ≥2 https cites | -| MDLOps / Blender workflow | `...702` | PASS | -| Widescreen / ini settings | `...003` | PASS | -| KOTOR save location (Windows) | `...004` | PASS — Deadly Stream + Steam | -| reone Odyssey runtime/scripting | `...005` | PASS — seedhartha/reone + wiki | +| TSLPatcher / 2DA+TLK | `...101` | PASS — grounded, citations 1–4 | +| Widescreen / ini settings | `...102` | PASS — Deadly Stream widescreen files | +| MDLOps / Blender workflow | `...103` | PASS — MDLOps + kotorblender cites | +| KOTOR save location (Windows) | `...104` | PASS — save path + https cites | +| reone Odyssey runtime/scripting | `...105` | PASS — reone GitHub cites (9 links) | + +Playwright local (prior session + CI): `HOLOCRON_REUSE_SERVER=1 pnpm holocron:e2e:playwright` — 6/6. PR [#96](https://github.com/OpenKotOR/community-bots/pull/96) Holocron Playwright e2e job green. -Playwright local (same session): `HOLOCRON_REUSE_SERVER=1 pnpm holocron:e2e:playwright` — 6/6. +Discord live (`pnpm verify:trask-discord`): not re-run — `TRASK_DISCORD_BOT_TOKEN` unset in agent env; import-smoke + prior evidence (5/5) remain authoritative for Discord contract. diff --git a/docs/plans/2026-06-03-004-feat-holocron-topnav-site-routes-plan.md b/docs/plans/2026-06-03-004-feat-holocron-topnav-site-routes-plan.md new file mode 100644 index 0000000..c065d73 --- /dev/null +++ b/docs/plans/2026-06-03-004-feat-holocron-topnav-site-routes-plan.md @@ -0,0 +1,38 @@ +--- +title: "fix(holocron): OpenKotOR site path links + CI cache follow-up" +type: fix +status: completed +date: 2026-06-03 +origin: user /lfg + openkotor/site route convention (/projects not /#projects) +--- + +# Holocron TopNav + post-merge CI hardening + +## Problem Frame + +PR #94 merged Trask Playwright + Discord import-smoke CI. Remaining gaps: Holocron `TopNav` still linked to `openkotor.com/#projects|faq|formats` while [openkotor/site](https://github.com/openkotor/site) uses path routes `/projects`, `/faq`, `/formats`. Branch also carries review autofixes not yet on `main` (`actions: write` for cache save, hardened `ci_warm_trask_embed.sh`). + +## Inferred Intent + +- **Direct ask:** Continue Trask/Holocron quality; fix external nav to match production site IA. +- **Adjacent impact:** Operators clicking nav from Holocron should land on real pages, not hash anchors that may not match SPA routing. +- **Cohesive scope:** TopNav href fix; land CI follow-ups on `main`; re-run Holocron Playwright locally to confirm no regression. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | `TopNav.tsx` uses `https://openkotor.com/projects`, `/faq`, `/formats` | +| R2 | Cherry-pick or merge `actions: write` + `ci_warm_trask_embed.sh` hardening onto `main` via new PR | +| R3 | `pnpm holocron:e2e:playwright` passes with live stack (6 tests) | +| R4 | CI green on new PR | + +## Out of scope + +New Discord.com Playwright; re-opening merged PR #94. + +## Verification + +- Grep: no `openkotor.com/#` in repo +- Local Playwright 6/6 with `HOLOCRON_REUSE_SERVER=1` +- GitHub Actions CI all jobs pass diff --git a/docs/plans/2026-06-03-005-feat-trask-lfg-pass5-merge-browser-plan.md b/docs/plans/2026-06-03-005-feat-trask-lfg-pass5-merge-browser-plan.md new file mode 100644 index 0000000..4f1b01a --- /dev/null +++ b/docs/plans/2026-06-03-005-feat-trask-lfg-pass5-merge-browser-plan.md @@ -0,0 +1,26 @@ +--- +title: "feat(trask): LFG pass 5 — merge PR #96 + Holocron browser gate" +type: feat +status: completed +date: 2026-06-04 +origin: user /lfg continue — Trask Discord + Holocron Playwright + browser +--- + +# LFG pass 5: close PR #96 vertical slice + +## Problem Frame + +PR #94 merged Trask CI (Playwright + import-smoke). PR #96 merged TopNav path links and CI cache-save hardening (CI green). Pass 5 completed: browser MCP 5/5 on `:4010`, evidence refreshed; Discord live skipped (no token in agent env). + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | PR #96 checks green (done) | +| R2 | Browser MCP: 5/5 expert queries on `http://127.0.0.1:4010` | +| R3 | `pnpm verify:trask-discord` when stack + token available | +| R4 | Refresh `docs/evidence/2026-05-19-discord-ask-live-verify.md` with pass 5 results | + +## Out of scope + +Merge PR without user ask; discord.com Playwright CI. diff --git a/docs/plans/2026-06-04-006-fix-holocron-public-api-connection-plan.md b/docs/plans/2026-06-04-006-fix-holocron-public-api-connection-plan.md new file mode 100644 index 0000000..4eb96a5 --- /dev/null +++ b/docs/plans/2026-06-04-006-fix-holocron-public-api-connection-plan.md @@ -0,0 +1,31 @@ +--- +title: "fix(holocron): public API connection, logging, and CI health gates" +type: fix +status: completed +date: 2026-06-04 +origin: user report — qa-webui 503 on trask-worker, stuck Dispatching, missing assets +--- + +# Holocron public API connection fix + +## Problem + +- [REPO] `TRASK_API_BASE` → `trask-worker.bocloud.workers.dev` proxies to `TRASK_RESEARCHWIZARD_BASE_URL` (`openkotor-holocron-trask-http.hf.space`). +- [UI] HF Space is **ERROR** → upstream returns **503**; Holocron stays on “Dispatching” with silent retries. +- [UI] No connection status panel; console-only errors. +- [UI] `/holocron/holocron-artifact.png` 404 on Pages (`BASE=/community-bots/qa-webui/`). +- [UI] `/_spark/loaded` 405 — Spark bundle still loaded when `ENABLE_SPARK=0`. + +## Requirements + +| ID | Requirement | +|----|-------------| +| R1 | Worker returns structured JSON on upstream failure (status, upstream URL, hint) | +| R2 | Holocron shows API health banner + research-step errors; fail after bounded retries | +| R3 | Static assets respect `import.meta.env.BASE_URL` | +| R4 | Stub Spark when not enabled at build time | +| R5 | CI: post-deploy worker smoke + gate Pages build on `TRASK_API_BASE` health | + +## Out of scope + +- Rebuilding HF Space runtime in this pass (workflow exists; ops must fix Space or change `TRASK_RESEARCHWIZARD_BASE_URL`). diff --git a/docs/plans/2026-06-04-007-feat-holocron-answer-citation-render-plan.md b/docs/plans/2026-06-04-007-feat-holocron-answer-citation-render-plan.md new file mode 100644 index 0000000..cedd772 --- /dev/null +++ b/docs/plans/2026-06-04-007-feat-holocron-answer-citation-render-plan.md @@ -0,0 +1,20 @@ +# Plan: Holocron answer citation rendering + +**Status:** completed +**Branch:** `feat/holocron-topnav-ci-followup` + +## Problem + +Holocron assistant messages showed raw markdown (`[label] (https://…)`) in answer paragraphs, numbered bibliography lines duplicated in the body when the model omitted a `Sources` heading, and broken `githubusercontent` image links from passage text. + +## Approach + +1. **Client** (`apps/holocron-web/src/lib/answer-presentation.ts`): tolerate whitespace in markdown links, strip images, peel embedded `1.` bibliography blocks, sanitize visible paragraphs while preserving `[n]` markers. +2. **Server** (`packages/trask/src/grounded-evidence.ts`): allow optional whitespace in `stripMarkdownArtifacts` regex. +3. **Tests:** `scripts/answer_presentation.test.mjs`, `grounded-evidence.test.ts` brief-profile markdown case. + +## Verification + +- `node --import tsx/esm --test scripts/answer_presentation.test.mjs` +- `pnpm build && node --test packages/trask/dist/grounded-evidence.test.js` +- Restart `trask_live_stack.sh`, browser MCP reone canonical query on `:4010` diff --git a/infra/trask-worker/README.md b/infra/trask-worker/README.md index 08fce09..21b0e38 100644 --- a/infra/trask-worker/README.md +++ b/infra/trask-worker/README.md @@ -7,7 +7,7 @@ Edge proxy for public Holocron (`qa-webui`) → live `trask-http-server`. - **Proxy mode** (`TRASK_BUILTIN_API=0`, required for research): forwards `/api/trask/*` to `TRASK_RESEARCHWIZARD_BASE_URL` (for example Hugging Face Space `OpenKotOR/holocron-trask-http`). - **Builtin stub** (`TRASK_BUILTIN_API=1`): health checks only; `/api/trask/*` returns **503** (bundled reference Q&A was removed). -There is **no** bundled fallback when upstream fails (`TRASK_BUILTIN_FALLBACK=0` by default). +When `TRASK_BUILTIN_FALLBACK=1`, upstream **5xx** responses fall back to the builtin stub (still **503** for research — bundled Q&A was removed). `/healthz` probes upstream and returns **503** with `ok: false` when the Trask HTTP host is down (for example HF Space `OpenKotOR/holocron-trask-http` in **ERROR**). ## Layout diff --git a/infra/trask-worker/src/worker.ts b/infra/trask-worker/src/worker.ts index 1a3a86b..e5e8f9a 100644 --- a/infra/trask-worker/src/worker.ts +++ b/infra/trask-worker/src/worker.ts @@ -165,6 +165,47 @@ async function serveBuiltin(request: Request, origin: string | null): Promise { + const healthUrl = `${normalizeBackendBaseUrl(baseUrl)}/healthz`; + try { + const headers = new Headers({ Accept: "application/json" }); + if (upstreamApiKey) { + headers.set("Authorization", `Bearer ${upstreamApiKey}`); + } + const res = await fetch(healthUrl, { method: "GET", headers, redirect: "manual" }); + const detail = (await res.text()).slice(0, 300); + return { reachable: res.ok, status: res.status, detail: detail || undefined }; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return { reachable: false, detail: message }; + } +} + +async function enrichUpstreamFailure( + upstreamResponse: Response, + baseUrl: string, + origin: string | null, +): Promise { + const upstreamStatus = upstreamResponse.status; + const upstreamDetail = (await upstreamResponse.text()).slice(0, 500); + return jsonResponse( + upstreamStatus >= 500 ? upstreamStatus : 502, + { + error: `Trask HTTP upstream unavailable (${upstreamStatus}).`, + upstream: normalizeBackendBaseUrl(baseUrl), + upstreamStatus, + upstreamDetail: upstreamDetail || undefined, + hint: + "Restore the Hugging Face Space OpenKotOR/holocron-trask-http or point TRASK_RESEARCHWIZARD_BASE_URL at a healthy trask-http-server.", + }, + origin, + ); +} + async function serveUpstreamOrFallback( request: Request, env: Env, @@ -181,13 +222,28 @@ async function serveUpstreamOrFallback( const upstreamApiKey = (env.TRASK_RESEARCHWIZARD_API_KEY ?? "").trim(); try { - return await proxyToUpstream(request, targetUrl, origin, upstreamApiKey, bodyText); + const upstreamResponse = await proxyToUpstream(request, targetUrl, origin, upstreamApiKey, bodyText); + if (upstreamResponse.ok) { + return upstreamResponse; + } + if (useBuiltinFallback(env) && shouldFallbackToBuiltin(upstreamResponse)) { + const replayed = + bodyText !== undefined + ? new Request(request.url, { method: request.method, headers: request.headers, body: bodyText }) + : request; + const builtin = await serveBuiltin(replayed, origin); + if (builtin && builtin.status < 500) { + return builtin; + } + } + return enrichUpstreamFailure(upstreamResponse, baseUrl, origin); } catch { return jsonResponse( 502, { error: "Upstream Trask HTTP origin is unreachable.", - detail: "Bundled reference fallback is disabled; fix Trask HTTP upstream or TRASK_RESEARCHWIZARD_BASE_URL.", + upstream: normalizeBackendBaseUrl(baseUrl), + hint: "Fix Trask HTTP upstream or TRASK_RESEARCHWIZARD_BASE_URL.", }, origin, ); @@ -223,12 +279,21 @@ export default { return builtin; } } + const baseUrl = upstreamBaseUrl(env); + const upstreamApiKey = (env.TRASK_RESEARCHWIZARD_API_KEY ?? "").trim(); + const upstreamProbe = hasRealUpstream(env) + ? await probeUpstreamHealth(baseUrl, upstreamApiKey) + : { reachable: false as const }; + const upstreamHealthy = hasRealUpstream(env) ? upstreamProbe.reachable : false; return jsonResponse( - 200, + upstreamHealthy ? 200 : 503, { - ok: true, + ok: upstreamHealthy, mode: hasRealUpstream(env) ? "proxy" : "builtin-public-api", - upstream: hasRealUpstream(env) ? normalizeBackendBaseUrl(upstreamBaseUrl(env)) : undefined, + upstream: hasRealUpstream(env) ? normalizeBackendBaseUrl(baseUrl) : undefined, + upstreamReachable: upstreamProbe.reachable, + upstreamStatus: upstreamProbe.status, + upstreamDetail: upstreamProbe.detail, builtinFallback: useBuiltinFallback(env), }, origin, diff --git a/packages/trask/src/grounded-evidence.ts b/packages/trask/src/grounded-evidence.ts index ac4c2d8..2acaa6a 100644 --- a/packages/trask/src/grounded-evidence.ts +++ b/packages/trask/src/grounded-evidence.ts @@ -490,9 +490,9 @@ export const composeGroundedAnswerFromClaims = ( const stripMarkdownArtifacts = (value: string): string => { const capped = value.length > 8000 ? value.slice(0, 8000) : value; return capped - .replace(/!\[([^\]]{0,500})\]\([^)]{0,500}\)/gu, "$1") - .replace(/\[([^\]]{0,500})\]\([^)]{0,500}\)/gu, "$1") - .replace(/\[\]\([^)]{0,500}\)/gu, "") + .replace(/!\[([^\]]{0,500})\]\s*\([^)]{0,500}\)/gu, "$1") + .replace(/\[([^\]]{0,500})\]\s*\([^)]{0,500}\)/gu, "$1") + .replace(/\[\]\s*\([^)]{0,500}\)/gu, "") .replace(/\*+/gu, "") .replace(/`+/gu, ""); }; diff --git a/scripts/answer_presentation.test.mjs b/scripts/answer_presentation.test.mjs new file mode 100644 index 0000000..f6f4257 --- /dev/null +++ b/scripts/answer_presentation.test.mjs @@ -0,0 +1,71 @@ +import assert from 'node:assert/strict' +import { describe, it } from 'node:test' +import { + buildAnswerPresentation, + peelEmbeddedNumberedSources, + sanitizeAnswerParagraph, + stripMarkdownHttpLinks, +} from '../apps/holocron-web/src/lib/answer-presentation.ts' + +describe('stripMarkdownHttpLinks', () => { + it('strips spaced markdown links', () => { + const raw = + '[icon.png] (https://raw.githubusercontent.com/KobaltBlu/KotOR.js/master/src/assets/icons/icon.png)' + const out = stripMarkdownHttpLinks(raw) + assert.equal(out, 'icon.png') + }) + + it('drops image-only markdown', () => { + const raw = '![alt](https://example.com/a.png) KotOR.js remake' + const out = stripMarkdownHttpLinks(raw) + assert.match(out, /KotOR\.js remake/) + assert.doesNotMatch(out, /example\.com/) + }) +}) + +describe('sanitizeAnswerParagraph', () => { + it('preserves numeric citation markers', () => { + const raw = 'The reone project provides engine work [1] and KotOR.js ports TypeScript [3].' + const out = sanitizeAnswerParagraph(raw) + assert.match(out, /\[1\]/) + assert.match(out, /\[3\]/) + }) +}) + +describe('peelEmbeddedNumberedSources', () => { + it('moves trailing numbered bibliography into sourceText', () => { + const raw = [ + 'The reone project is an open-source Odyssey engine reimplementation [1].', + '1. reone - https://github.com/seedhartha/reone', + '2. KotOR.js - https://github.com/KobaltBlu/KotOR.js', + ].join('\n') + const split = peelEmbeddedNumberedSources(raw) + assert.match(split.answerText, /reone project/) + assert.match(split.sourceText, /^1\./m) + }) + + it('treats source-only numbered answers as bibliography', () => { + const raw = [ + '1. reone Odyssey engine - https://github.com/seedhartha/reone', + '2. reone wiki - https://github.com/seedhartha/reone/wiki', + ].join('\n') + const split = peelEmbeddedNumberedSources(raw) + assert.equal(split.answerText, '') + assert.match(split.sourceText, /reone wiki/) + }) +}) + +describe('buildAnswerPresentation', () => { + it('parses explicit API sources when body is bibliography-only', () => { + const content = [ + '1. reone - https://github.com/seedhartha/reone', + '2. KotOR.js - https://github.com/KobaltBlu/KotOR.js', + ].join('\n') + const presentation = buildAnswerPresentation(content, [ + { name: 'reone', url: 'https://github.com/seedhartha/reone', confidence: 1 }, + { name: 'KotOR.js', url: 'https://github.com/KobaltBlu/KotOR.js', confidence: 1 }, + ]) + assert.equal(presentation.isSourceOnly, true) + assert.equal(presentation.sources.length, 2) + }) +}) diff --git a/scripts/check_trask_public_api.sh b/scripts/check_trask_public_api.sh new file mode 100755 index 0000000..62ad347 --- /dev/null +++ b/scripts/check_trask_public_api.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +# Verify public Trask API (Cloudflare Worker) health before publishing Holocron Pages. +set -euo pipefail + +BASE="${TRASK_API_BASE:-}" +if [ -z "$BASE" ]; then + echo "::error::TRASK_API_BASE is not set." + exit 1 +fi + +BASE="${BASE%/}" +HEALTH_URL="${BASE}/healthz" +ASK_URL="${BASE}/api/trask/ask" + +echo "Checking ${HEALTH_URL}" +health_code="$(curl -fsS -o /tmp/trask-health.json -w '%{http_code}' "${HEALTH_URL}" || true)" +if [ "$health_code" != "200" ]; then + echo "::error::Trask API health returned HTTP ${health_code}" + cat /tmp/trask-health.json 2>/dev/null || true + exit 1 +fi + +python3 - <<'PY' +import json +import sys + +with open("/tmp/trask-health.json", encoding="utf-8") as f: + data = json.load(f) +if not data.get("ok"): + print("::error::Trask API health reports ok=false", file=sys.stderr) + sys.exit(1) +if data.get("upstreamReachable") is False: + print("::error::Trask API upstream is unreachable", file=sys.stderr) + sys.exit(1) +PY + +echo "Health OK:" +cat /tmp/trask-health.json + +thread_id="$(python3 - <<'PY' +import uuid +print(uuid.uuid4()) +PY +)" + +echo "Smoke POST ${ASK_URL} (expect 201/202, not 5xx)" +ask_code="$(curl -sS -o /tmp/trask-ask.json -w '%{http_code}' \ + -X POST "${ASK_URL}" \ + -H 'Content-Type: application/json' \ + -d "{\"query\":\"What is TSLPatcher used for in KOTOR modding?\",\"threadId\":\"${thread_id}\"}" \ + --max-time 90)" + +if [ -z "$ask_code" ] || [ "$ask_code" -ge 500 ] 2>/dev/null || [ "$ask_code" = "000" ]; then + echo "::error::Trask API ask returned HTTP ${ask_code}" + cat /tmp/trask-ask.json 2>/dev/null || true + exit 1 +fi + +echo "Ask smoke HTTP ${ask_code} (body truncated):" +head -c 400 /tmp/trask-ask.json 2>/dev/null || true +echo diff --git a/scripts/ci_warm_trask_embed.sh b/scripts/ci_warm_trask_embed.sh index 2d74dce..dc6c53f 100755 --- a/scripts/ci_warm_trask_embed.sh +++ b/scripts/ci_warm_trask_embed.sh @@ -14,7 +14,7 @@ warmup() { "$PY" -c "from trask_indexer.embed import embed_texts; embed_texts(['ci warmup']); print('embed ok')" } -if find "$FASTEMBED_CACHE_PATH" -type f -print -quit 2>/dev/null | grep -q .; then +if [ -n "$(find "$FASTEMBED_CACHE_PATH" -type f -print -quit 2>/dev/null)" ]; then echo "FastEmbed cache dir has files at $FASTEMBED_CACHE_PATH; probing..." if warmup; then echo "Embedding model ready from cache" @@ -24,6 +24,11 @@ if find "$FASTEMBED_CACHE_PATH" -type f -print -quit 2>/dev/null | grep -q .; th fi max_attempts="${TRASK_CI_EMBED_WARMUP_ATTEMPTS:-8}" +if [ "$max_attempts" -lt 1 ] 2>/dev/null; then + echo "TRASK_CI_EMBED_WARMUP_ATTEMPTS must be >= 1 (got: ${TRASK_CI_EMBED_WARMUP_ATTEMPTS:-})" >&2 + exit 1 +fi + for attempt in $(seq 1 "$max_attempts"); do if warmup; then echo "Embedding model ready (attempt $attempt)" @@ -37,3 +42,6 @@ for attempt in $(seq 1 "$max_attempts"); do echo "warmup attempt $attempt failed; sleep ${wait}s" sleep "$wait" done + +echo "Embedding warmup failed: exhausted attempts without success" >&2 +exit 1