Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,6 @@ sessions/
.reasonix/semantic-skip
# Scratch entry regenerated each time scripts/bundle-codemirror.mjs runs.
scripts/.cm-entry.mjs
# Personal bun lockfile — project uses npm officially.
bun.lock

45 changes: 45 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,49 @@

<br/>

## Web search

Reasonix includes `web_search` and `web_fetch` tools. By default it uses **Mojeek** (no setup required). You can switch to a **self-hosted SearXNG** instance — a metasearch engine that aggregates whatever upstream engines your instance is configured for.

### Switching engines (persists to disk)

The `/search-engine` slash command (alias `/se`) writes your choice to `~/.reasonix/config.json` immediately — it survives restarts:

```
/search-engine mojeek # default, no external deps
/search-engine searxng # SearXNG at http://localhost:8080
/search-engine searxng http://192.168.1.100:8888 # custom endpoint
```

Equivalent `~/.reasonix/config.json`:

```json
{
"webSearchEngine": "searxng",
"webSearchEndpoint": "http://localhost:8080"
}
```

The tool picks up the change on the next call — no restart needed.

### Starting SearXNG

```sh
podman run -d --replace --name searxng -p 8080:8080 docker.io/searxng/searxng
# or: docker run -d -p 8080:8080 searxng/searxng
```

Verify it's running:

```sh
curl http://localhost:8080/search?q=test
# → HTML search results page
```

> **Note:** The endpoint must include the protocol (`http://`). `localhost:8080` alone will fail — the tool will show a clear error telling you to install SearXNG if the server is unreachable.

<br/>

## Install

```bash
Expand Down Expand Up @@ -90,8 +133,10 @@ Click any card to read the full architecture writeup → [Pillar 1](./docs/ARCHI
| Cost profile | **low per task** | premium | subscription + use | varies |
| DeepSeek prefix-cache | **engineered** | not applicable | not applicable | incidental |
| Embedded web dashboard | yes | — | n/a (IDE) | — |
| Configurable web search engine | `/search-engine` | — | — | — |
| Persistent per-workspace sessions | yes | partial | n/a | — |
| Plan mode · MCP · hooks · skills | yes | yes | yes | partial |
| Web search (Mojeek + SearXNG) | yes | yes | yes | yes |
| Open community development | yes | — | — | yes |

For live cache-hit rates, costs, and methodology, see [`benchmarks/`](./benchmarks/) — the numbers move with model pricing, so they live with the harness, not in the README.
Expand Down
2 changes: 1 addition & 1 deletion docs/ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ src/
│ ├── skills.ts # list + invoke SKILL.md playbooks
│ ├── subagent.ts # spawn_subagent — flash+high by default
│ ├── plan.ts # submit_plan (review gate)
│ └── web.ts # web_search, web_fetch
│ └── web.ts # web_search, web_fetch (multi-engine: Mojeek or SearXNG)
├── mcp/ # MCP client + bridge (stdio + SSE)
├── memory.ts # ImmutablePrefix / AppendOnlyLog / VolatileScratch
├── project-memory.ts # REASONIX.md loader
Expand Down
13 changes: 11 additions & 2 deletions src/cli/commands/chat.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import { render } from "ink";
import React, { useState } from "react";
import { loadApiKey, readConfig, searchEnabled } from "../../config.js";
import {
loadApiKey,
readConfig,
searchEnabled,
webSearchEndpoint,
webSearchEngine,
} from "../../config.js";
import { loadDotenv } from "../../env.js";
import type { CacheFirstLoop } from "../../loop.js";
import { McpClient } from "../../mcp/client.js";
Expand Down Expand Up @@ -444,7 +450,10 @@ export async function chatCommand(opts: ChatOptions): Promise<void> {
// a question needs info fresher than its training data.
if (searchEnabled()) {
if (!tools) tools = new ToolRegistry();
registerWebTools(tools);
registerWebTools(tools, {
webSearchEngine: webSearchEngine(),
webSearchEndpoint: webSearchEndpoint(),
});
}

// Memory tools — available in every session, not just code mode.
Expand Down
7 changes: 7 additions & 0 deletions src/cli/ui/slash/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ export const SLASH_COMMANDS: readonly SlashCommandSpec[] = [
argsHint: "[list|show <name>|forget <name>|clear <scope> confirm]",
summary: "show / manage pinned memory (REASONIX.md + ~/.reasonix/memory)",
},
{
cmd: "search-engine",
argsHint: "<mojeek|searxng> [<endpoint>]",
summary: "switch web search backend — mojeek (default, no deps) or searxng (self-hosted)",
argCompleter: ["mojeek", "searxng"],
aliases: ["se"],
},
{
cmd: "skill",
argsHint: "[list|show <name>|<name> [args]]",
Expand Down
2 changes: 2 additions & 0 deletions src/cli/ui/slash/dispatch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { handlers as plansHandlers } from "./handlers/plans.js";
import { handlers as semanticHandlers } from "./handlers/semantic.js";
import { handlers as sessionsHandlers } from "./handlers/sessions.js";
import { handlers as skillHandlers } from "./handlers/skill.js";
import { handlers as webSearchEngineHandlers } from "./handlers/web-search-engine.js";
import type { SlashContext, SlashResult } from "./types.js";

/** Synchronous return — async work fires-and-forgets via `ctx.postInfo` to keep input non-blocking. */
Expand All @@ -38,6 +39,7 @@ const HANDLERS: Record<string, SlashHandler> = {
...semanticHandlers,
...sessionsHandlers,
...skillHandlers,
...webSearchEngineHandlers,
};

export function handleSlash(
Expand Down
43 changes: 43 additions & 0 deletions src/cli/ui/slash/handlers/web-search-engine.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { readConfig, webSearchEndpoint, webSearchEngine, writeConfig } from "../../../../config.js";
import type { SlashHandler } from "../dispatch.js";

export const handlers: Record<string, SlashHandler> = {
"search-engine": (args, _loop, ctx) => {
const engine = args[0];
if (!engine || (engine !== "mojeek" && engine !== "searxng")) {
return {
info: [
`Current web search engine: ${webSearchEngine()}`,
`SearXNG endpoint: ${webSearchEndpoint()}`,
"",
"Usage:",
" /search-engine mojeek use Mojeek (default, no external deps)",
" /search-engine searxng use SearXNG at default endpoint",
" /search-engine searxng <url> use SearXNG at custom endpoint",
"",
"Alias: /se",
"",
"SearXNG is a self-hosted metasearch engine (https://github.com/searxng/searxng).",
"Install it with: docker run -d -p 8080:8080 searxng/searxng",
].join("\n"),
};
}

const cfg = readConfig();
cfg.webSearchEngine = engine;
if (engine === "searxng" && args[1]) {
const raw = args[1];
cfg.webSearchEndpoint = raw.includes("://") ? raw : `http://${raw}`;
}
writeConfig(cfg);

ctx.postInfo?.(
`Switched web search engine to "${engine}". ${engine === "searxng" ? `Make sure SearXNG is running at ${webSearchEndpoint()}.` : ""}`,
);

return {
info: `✓ Web search engine set to "${engine}"${engine === "searxng" ? ` (${webSearchEndpoint()})` : ""}. Next assistant turn will pick up the change.`,
};
},
se: (args, loop, ctx) => handlers["search-engine"]!(args, loop, ctx),
};
16 changes: 16 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ export interface ReasonixConfig {
session?: string | null;
setupCompleted?: boolean;
search?: boolean;
/** Web search engine backend: "mojeek" (default, scrapes Mojeek) or "searxng" (self-hosted SearXNG). */
webSearchEngine?: "mojeek" | "searxng";
/** Base URL for SearXNG instance (default http://localhost:8080). */
webSearchEndpoint?: string;
projects?: {
[absoluteRootDir: string]: {
shellAllowed?: string[];
Expand Down Expand Up @@ -93,6 +97,18 @@ export function searchEnabled(path: string = defaultConfigPath()): boolean {
return true;
}

export function webSearchEngine(path: string = defaultConfigPath()): "mojeek" | "searxng" {
const cfg = readConfig(path).webSearchEngine;
if (cfg === "searxng") return "searxng";
return "mojeek";
}

export function webSearchEndpoint(path: string = defaultConfigPath()): string {
const cfg = readConfig(path).webSearchEndpoint;
if (cfg && typeof cfg === "string") return cfg;
return "http://localhost:8080";
}

export function saveApiKey(key: string, path: string = defaultConfigPath()): void {
const cfg = readConfig(path);
cfg.apiKey = key.trim();
Expand Down
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ export {
formatSearchResults,
htmlToText,
parseMojeekResults,
parseSearxngHtmlResults,
registerWebTools,
webFetch,
webSearch,
Expand Down
121 changes: 120 additions & 1 deletion src/tools/web.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
/** web_search uses Mojeek (DDG returns anti-bot 202 to unauthenticated POSTs); web_fetch sniffs HTML to text. */

import { parse as parseHtml } from "node-html-parser";
import {
webSearchEndpoint as loadWebSearchEndpoint,
webSearchEngine as loadWebSearchEngine,
} from "../config.js";
import type { ToolRegistry } from "../tools.js";

export interface SearchResult {
Expand Down Expand Up @@ -28,6 +32,10 @@ export interface WebFetchOptions {
export interface WebSearchOptions {
topK?: number;
signal?: AbortSignal;
/** Backend engine: "mojeek" (scrapes Mojeek HTML) or "searxng" (self-hosted SearXNG JSON API). */
engine?: "mojeek" | "searxng";
/** Base URL for SearXNG. Default http://localhost:8080. */
endpoint?: string;
}

const DEFAULT_FETCH_MAX_CHARS = 32_000;
Expand All @@ -46,6 +54,13 @@ export async function webSearch(
query: string,
opts: WebSearchOptions = {},
): Promise<SearchResult[]> {
if (opts.engine === "searxng") {
return searchSearxng(query, opts);
}
return searchMojeek(query, opts);
}

async function searchMojeek(query: string, opts: WebSearchOptions = {}): Promise<SearchResult[]> {
const topK = Math.max(1, Math.min(10, opts.topK ?? DEFAULT_TOPK));
const resp = await fetch(`${MOJEEK_ENDPOINT}?q=${encodeURIComponent(query)}`, {
headers: {
Expand All @@ -71,6 +86,101 @@ export async function webSearch(
return results;
}

/** Parse + validate a SearXNG endpoint. Returns origin (protocol + host). */
function normalizeSearxngEndpoint(raw: string): string {
let url: URL;
try {
url = new URL(raw.includes("://") ? raw : `http://${raw}`);
} catch {
throw new Error(`web_search: invalid SearXNG endpoint "${raw}"`);
}
if (url.protocol !== "http:" && url.protocol !== "https:") {
throw new Error(`web_search: SearXNG endpoint must be http(s), got ${url.protocol}`);
}
return url.origin;
}

async function searchSearxng(query: string, opts: WebSearchOptions = {}): Promise<SearchResult[]> {
const topK = Math.max(1, Math.min(10, opts.topK ?? DEFAULT_TOPK));
const baseUrl = normalizeSearxngEndpoint(opts.endpoint ?? "http://localhost:8080");

// JSON API is often blocked by SearXNG's default limiter; HTML always works.
const url = `${baseUrl}/search?format=html&q=${encodeURIComponent(query)}`;
let resp: Response;
try {
resp = await fetch(url, {
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
headers: {
"User-Agent": USER_AGENT,
Accept: "text/html",
},
signal: opts.signal,
});
} catch (err) {
if (err instanceof TypeError && (err as Error).message.includes("fetch")) {
throw new Error(
`web_search: Cannot reach SearXNG server at ${opts.endpoint ?? "http://localhost:8080"}. Please install SearXNG (https://github.com/searxng/searxng) and start it (e.g. \`docker run -d -p 8080:8080 searxng/searxng\`), or switch to the default engine with /search-engine mojeek.`,
);
}
throw err;
}
if (!resp.ok) throw new Error(`web_search ${resp.status}`);
const html = await resp.text();
const results = parseSearxngHtmlResults(html).slice(0, topK);
if (results.length === 0) {
if (/no results found|did not match any documents/i.test(html)) return [];
throw new Error(
`web_search: 0 results but SearXNG response doesn't look like an empty results page (${html.length} chars)`,
);
}
return results;
}

/** Parse SearXNG HTML search results using node-html-parser. */
export function parseSearxngHtmlResults(html: string): SearchResult[] {
const root = parseHtml(html);
const results: SearchResult[] = [];

// Try <article class="result"> first (default SearXNG theme)
const articles = root.querySelectorAll("article.result, div.result");
if (articles.length > 0) {
for (const article of articles) {
const link = article.querySelector("h3 a, h4 a, a[href^='http']");
if (!link) continue;
const href = link.getAttribute("href");
if (!href) continue;
const title = link.textContent.trim();
if (!title) continue;
let snippet = "";
for (const p of article.querySelectorAll("p")) {
const text = p.textContent.trim();
if (text.length > 10 && !text.includes(title)) {
snippet = text;
break;
}
}
if (!snippet) {
const cs = article.querySelector(".content, .result-content, [class*='snippet']");
if (cs) snippet = cs.textContent.trim();
}
results.push({ title, url: href, snippet });
}
return results;
}

// Fallback: <h3><a href> pairs directly
for (const a of root.querySelectorAll("h3 a[href]")) {
const href = a.getAttribute("href");
if (!href || href.startsWith("#")) continue;
const title = a.textContent.trim();
if (!title) continue;
let snippet = "";
const p = a.parentNode?.parentNode?.querySelector("p");
if (p) snippet = p.textContent.trim();
results.push({ title, url: href, snippet });
}
return results;
}

/** Title-anchor + snippet-paragraph passes paired positionally — robust to attribute reorder. */
export function parseMojeekResults(html: string): SearchResult[] {
const titles: string[] = [];
Expand Down Expand Up @@ -283,6 +393,10 @@ export interface WebToolsOptions {
defaultTopK?: number;
/** Byte cap for `web_fetch` extracted text. */
maxFetchChars?: number;
/** Backend engine: "mojeek" (default, scrapes Mojeek) or "searxng" (self-hosted SearXNG). */
webSearchEngine?: "mojeek" | "searxng";
/** Base URL for SearXNG (default http://localhost:8080). */
webSearchEndpoint?: string;
}

export function registerWebTools(registry: ToolRegistry, opts: WebToolsOptions = {}): ToolRegistry {
Expand All @@ -292,7 +406,8 @@ export function registerWebTools(registry: ToolRegistry, opts: WebToolsOptions =
registry.register({
name: "web_search",
description:
"Search the public web. Returns ranked results with title, url, and snippet. Call this when the answer's correctness depends on current state — anything that changes over time (events, prices, releases, status of a thing in the real world). Composing such answers from training memory invents stale numbers; search first, then ground the answer in the results. For evergreen / definitional questions you don't need this.",
"Search the public web. Returns ranked results with title, url, and snippet. Call this when the answer's correctness depends on current state — anything that changes over time (events, prices, releases, status of a thing in the real world). Composing such answers from training memory invents stale numbers; search first, then ground the answer in the results. For evergreen / definitional questions you don't need this." +
" To change the backend, use /web-search-engine mojeek|searxng.",
readOnly: true,
parallelSafe: true,
parameters: {
Expand All @@ -307,9 +422,13 @@ export function registerWebTools(registry: ToolRegistry, opts: WebToolsOptions =
required: ["query"],
},
fn: async (args: { query: string; topK?: number }, ctx) => {
const engine = opts.webSearchEngine ?? loadWebSearchEngine();
const endpoint = opts.webSearchEndpoint ?? loadWebSearchEndpoint();
const results = await webSearch(args.query, {
topK: args.topK ?? defaultTopK,
signal: ctx?.signal,
engine,
endpoint,
});
return formatSearchResults(args.query, results);
},
Expand Down
1 change: 1 addition & 0 deletions tests/public-api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ const PUBLIC_API: readonly string[] = [
"parseEditBlocks",
"parseMcpSpec",
"parseMojeekResults",
"parseSearxngHtmlResults",
"parseTranscript",
"prepareSpawn",
"projectHash",
Expand Down
Loading
Loading