openwebui-plasmate/function.json at master · plasmate-labs/openwebui-plasmate · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{
  "id": "plasmate_url_filter",
  "name": "Plasmate URL Filter",
  "type": "filter",
  "description": "Automatically fetch webpage content when URLs are shared in chat. Uses Plasmate for 10-100x token compression.",
  "version": "0.1.0",
  "author": "Plasmate",
  "author_url": "https://plasmate.app",
  "icon_url": "https://plasmate.app/icon.png",
  "license": "MIT",
  "funding_url": null,
  "meta": {
    "tags": ["web", "browsing", "url", "filter", "plasmate"],
    "requirements": ["plasmate"]
  },
  "valves": {
    "enabled": {
      "type": "boolean",
      "default": true,
      "description": "Enable automatic URL content injection"
    },
    "plasmate_path": {
      "type": "string",
      "default": "plasmate",
      "description": "Path to the Plasmate binary"
    },
    "max_urls_per_message": {
      "type": "integer",
      "default": 3,
      "description": "Maximum URLs to fetch per message"
    },
    "max_content_per_url": {
      "type": "integer",
      "default": 10000,
      "description": "Maximum content length per URL"
    },
    "timeout_seconds": {
      "type": "integer",
      "default": 30,
      "description": "Timeout for fetch operations"
    },
    "exclude_domains": {
      "type": "string",
      "default": "localhost,127.0.0.1,0.0.0.0",
      "description": "Comma-separated domains to exclude"
    },
    "inject_position": {
      "type": "string",
      "default": "after",
      "description": "Where to inject content",
      "enum": ["before", "after"]
    },
    "show_fetch_status": {
      "type": "boolean",
      "default": true,
      "description": "Show status messages for URL fetching"
    }
  },
  "content": "\"\"\"\\nOpen WebUI Filter Function for automatic URL content injection.\\n\\nThis function intercepts URLs in user messages and automatically\\nfetches their content using Plasmate, injecting it into the conversation.\\n\"\"\"\\n\\nimport re\\nimport subprocess\\nfrom typing import Optional, List\\nfrom pydantic import BaseModel, Field\\n\\n\\nclass Filter:\\n    \\\"\\\"\\\"\\n    Plasmate URL Filter for Open WebUI.\\n\\n    Automatically detects URLs in messages and injects page content,\\n    enabling seamless web browsing in conversations.\\n    \\\"\\\"\\\"\\n\\n    class Valves(BaseModel):\\n        \\\"\\\"\\\"Configuration options for the Plasmate filter.\\\"\\\"\\\"\\n\\n        enabled: bool = Field(\\n            default=True,\\n            description=\\\"Enable automatic URL content injection.\\\",\\n        )\\n        plasmate_path: str = Field(\\n            default=\\\"plasmate\\\",\\n            description=\\\"Path to the Plasmate binary.\\\",\\n        )\\n        max_urls_per_message: int = Field(\\n            default=3,\\n            description=\\\"Maximum number of URLs to fetch per message.\\\",\\n        )\\n        max_content_per_url: int = Field(\\n            default=10000,\\n            description=\\\"Maximum content length per URL (characters).\\\",\\n        )\\n        timeout_seconds: int = Field(\\n            default=30,\\n            description=\\\"Timeout for fetch operations in seconds.\\\",\\n        )\\n        url_patterns: str = Field(\\n            default=r\\\"https?://[^\\\\s<>\\\\\\\"']+\\\",\\n            description=\\\"Regex pattern to match URLs in messages.\\\",\\n        )\\n        exclude_domains: str = Field(\\n            default=\\\"localhost,127.0.0.1,0.0.0.0\\\",\\n            description=\\\"Comma-separated list of domains to exclude.\\\",\\n        )\\n        inject_position: str = Field(\\n            default=\\\"after\\\",\\n            description=\\\"Where to inject content: 'before' or 'after' the message.\\\",\\n        )\\n        show_fetch_status: bool = Field(\\n            default=True,\\n            description=\\\"Show status messages for URL fetching.\\\",\\n        )\\n\\n    def __init__(self):\\n        \\\"\\\"\\\"Initialize the Filter with default valves.\\\"\\\"\\\"\\n        self.valves = self.Valves()\\n\\n    def _extract_urls(self, text: str) -> List[str]:\\n        \\\"\\\"\\\"Extract URLs from text based on configured pattern.\\\"\\\"\\\"\\n        pattern = self.valves.url_patterns\\n        urls = re.findall(pattern, text)\\n\\n        excluded = [d.strip() for d in self.valves.exclude_domains.split(\\\",\\\")]\\n        filtered_urls = []\\n\\n        for url in urls:\\n            url = re.sub(r'[.,;:!?\\\\'\\\"\\\\)\\\\]]+$', '', url)\\n            is_excluded = any(domain in url for domain in excluded if domain)\\n            if not is_excluded:\\n                filtered_urls.append(url)\\n\\n        seen = set()\\n        unique_urls = []\\n        for url in filtered_urls:\\n            if url not in seen:\\n                seen.add(url)\\n                unique_urls.append(url)\\n\\n        return unique_urls[:self.valves.max_urls_per_message]\\n\\n    def _fetch_url(self, url: str) -> tuple[bool, str]:\\n        \\\"\\\"\\\"Fetch URL content using Plasmate.\\\"\\\"\\\"\\n        cmd = [self.valves.plasmate_path, \\\"fetch\\\", url, \\\"--text\\\"]\\n\\n        try:\\n            result = subprocess.run(\\n                cmd,\\n                capture_output=True,\\n                text=True,\\n                timeout=self.valves.timeout_seconds,\\n            )\\n\\n            if result.returncode != 0:\\n                return False, result.stderr\\n\\n            content = result.stdout\\n            if len(content) > self.valves.max_content_per_url:\\n                content = content[:self.valves.max_content_per_url] + \\\"\\\\n\\\\n[Content truncated...]\\\"\\n\\n            return True, content\\n\\n        except subprocess.TimeoutExpired:\\n            return False, f\\\"Timeout after {self.valves.timeout_seconds}s\\\"\\n        except FileNotFoundError:\\n            return False, \\\"Plasmate not found\\\"\\n        except Exception as e:\\n            return False, str(e)\\n\\n    def inlet(\\n        self,\\n        body: dict,\\n        __user__: Optional[dict] = None,\\n    ) -> dict:\\n        \\\"\\\"\\\"\\n        Process incoming messages before they reach the LLM.\\n\\n        :param body: The request body containing messages.\\n        :param __user__: User information from Open WebUI.\\n        :return: Modified request body with injected content.\\n        \\\"\\\"\\\"\\n        if not self.valves.enabled:\\n            return body\\n\\n        messages = body.get(\\\"messages\\\", [])\\n        if not messages:\\n            return body\\n\\n        last_message = messages[-1]\\n        if last_message.get(\\\"role\\\") != \\\"user\\\":\\n            return body\\n\\n        content = last_message.get(\\\"content\\\", \\\"\\\")\\n        if isinstance(content, list):\\n            text_parts = [\\n                part.get(\\\"text\\\", \\\"\\\")\\n                for part in content\\n                if part.get(\\\"type\\\") == \\\"text\\\"\\n            ]\\n            content = \\\" \\\".join(text_parts)\\n\\n        urls = self._extract_urls(content)\\n        if not urls:\\n            return body\\n\\n        fetched_content = []\\n        for url in urls:\\n            success, result = self._fetch_url(url)\\n            if success:\\n                fetched_content.append(\\n                    f\\\"<webpage url=\\\\\\\"{url}\\\\\\\">\\\\n{result}\\\\n</webpage>\\\"\\n                )\\n            elif self.valves.show_fetch_status:\\n                fetched_content.append(\\n                    f\\\"<webpage url=\\\\\\\"{url}\\\\\\\" error=\\\\\\\"{result}\\\\\\\" />\\\"\\n                )\\n\\n        if not fetched_content:\\n            return body\\n\\n        injection = \\\"\\\\n\\\\n---\\\\n**Fetched Web Content:**\\\\n\\\\n\\\" + \\\"\\\\n\\\\n\\\".join(fetched_content) + \\\"\\\\n---\\\\n\\\"\\n\\n        if isinstance(last_message.get(\\\"content\\\"), list):\\n            if self.valves.inject_position == \\\"before\\\":\\n                last_message[\\\"content\\\"].insert(0, {\\\"type\\\": \\\"text\\\", \\\"text\\\": injection})\\n            else:\\n                last_message[\\\"content\\\"].append({\\\"type\\\": \\\"text\\\", \\\"text\\\": injection})\\n        else:\\n            original = last_message.get(\\\"content\\\", \\\"\\\")\\n            if self.valves.inject_position == \\\"before\\\":\\n                last_message[\\\"content\\\"] = injection + original\\n            else:\\n                last_message[\\\"content\\\"] = original + injection\\n\\n        return body\\n\\n    def outlet(\\n        self,\\n        body: dict,\\n        __user__: Optional[dict] = None,\\n    ) -> dict:\\n        \\\"\\\"\\\"\\n        Process outgoing messages after LLM response.\\n\\n        :param body: The response body containing messages.\\n        :param __user__: User information from Open WebUI.\\n        :return: Unchanged response body.\\n        \\\"\\\"\\\"\\n        return body\\n"
}