From d75e54f26f644eb8cf5cc0942e11062e1fecc28f Mon Sep 17 00:00:00 2001 From: DragonnZhang <731557579@qq.com> Date: Wed, 13 May 2026 11:26:13 +0800 Subject: [PATCH 1/2] feat: add computer and browser use skills with corresponding documentation --- extensions/computer-use-vision/QWEN.md | 14 ++++++ .../computer-use-vision/qwen-extension.json | 17 ++++++++ .../skills/browser-use/SKILL.md | 31 +++++++++++++ .../skills/computer-use/SKILL.md | 43 +++++++++++++++++++ 4 files changed, 105 insertions(+) create mode 100644 extensions/computer-use-vision/QWEN.md create mode 100644 extensions/computer-use-vision/qwen-extension.json create mode 100644 extensions/computer-use-vision/skills/browser-use/SKILL.md create mode 100644 extensions/computer-use-vision/skills/computer-use/SKILL.md diff --git a/extensions/computer-use-vision/QWEN.md b/extensions/computer-use-vision/QWEN.md new file mode 100644 index 0000000..6f535fa --- /dev/null +++ b/extensions/computer-use-vision/QWEN.md @@ -0,0 +1,14 @@ +# Computer And Browser Use Extension + +This extension exposes two local automation MCP servers: + +- `computer-use`: local desktop control through domdomegg/computer-use-mcp. +- `playwright`: browser and web app control through Microsoft Playwright MCP. + +Use the `computer-use` skill whenever the user asks Qwen Code to operate the local desktop, inspect the screen, click UI, type into an app, use keyboard shortcuts, or control native Mac/Windows applications. + +Use the `browser-use` skill whenever the user asks Qwen Code to operate a website, browser tab, localhost web app, web form, DOM element, link, input, or browser navigation flow. + +Routing rule: native desktop and OS-level UI should use `computer-use`; browser-specific work should use `browser-use`. If a browser task temporarily requires OS-level interaction, use `computer-use` only for that step and then return to `browser-use`. + +The tools operate on the user's real machine. Ask before destructive, privacy-sensitive, or externally visible actions such as sending messages, deleting files, purchasing, submitting forms, or changing security settings. diff --git a/extensions/computer-use-vision/qwen-extension.json b/extensions/computer-use-vision/qwen-extension.json new file mode 100644 index 0000000..3fd89f4 --- /dev/null +++ b/extensions/computer-use-vision/qwen-extension.json @@ -0,0 +1,17 @@ +{ + "name": "computer-use", + "version": "0.1.0", + "description": "Local desktop and browser automation tools for Qwen Code via computer-use-mcp and Playwright MCP.", + "mcpServers": { + "computer-use": { + "command": "npx", + "args": ["-y", "computer-use-mcp"] + }, + "playwright": { + "command": "npx", + "args": ["-y", "@playwright/mcp@latest"] + } + }, + "skills": "skills", + "contextFileName": "QWEN.md" +} diff --git a/extensions/computer-use-vision/skills/browser-use/SKILL.md b/extensions/computer-use-vision/skills/browser-use/SKILL.md new file mode 100644 index 0000000..cf055e7 --- /dev/null +++ b/extensions/computer-use-vision/skills/browser-use/SKILL.md @@ -0,0 +1,31 @@ +--- +name: browser-use +description: Control browser pages using the Playwright MCP server. Use when the user asks to open, inspect, navigate, click, type, test, or automate websites, localhost web apps, browser tabs, forms, and web UI. +--- + +# Browser Use + +Use the MCP tools from the `playwright` MCP server for browser-specific tasks. + +Prefer this skill over `computer-use` when the target is a website, browser tab, localhost web app, web form, DOM element, link, button, input, page content, or browser navigation flow. + +Do not use shell commands to start another browser automation server, and do not edit Qwen settings as a fallback. +If the Playwright MCP tools are not available in the current tool list, stop and tell the user to restart Qwen Code or reconnect the `playwright` MCP server. + +## Operating Loop + +1. Navigate or attach to the requested browser page using the available Playwright MCP tool. +2. Observe the page with the Playwright accessibility snapshot before acting. +3. Prefer semantic element interactions over coordinate clicks. +4. After every meaningful action, verify the resulting page state. +5. Use screenshots only when visual verification is needed. + +## Routing + +- Use `browser-use` for browser pages and web apps. +- Use `computer-use` for native desktop apps, operating system UI, app switching, dialogs outside the browser page, or anything that cannot be represented through the browser DOM/accessibility tree. +- If a browser task needs an OS-level action, use `computer-use` only for that OS-level step, then return to Playwright tools. + +## Safety + +Ask for confirmation before destructive, privacy-sensitive, or externally visible actions, including deleting data, sending messages, submitting forms, making purchases, changing security settings, or entering credentials. diff --git a/extensions/computer-use-vision/skills/computer-use/SKILL.md b/extensions/computer-use-vision/skills/computer-use/SKILL.md new file mode 100644 index 0000000..f3ca4ac --- /dev/null +++ b/extensions/computer-use-vision/skills/computer-use/SKILL.md @@ -0,0 +1,43 @@ +--- +name: computer-use +description: Control the local desktop using the `computer` MCP tool from computer-use-mcp. Use when the user asks to operate local Mac/Windows apps, inspect the screen, click UI, type text, press shortcuts, scroll, drag, or interact with native GUI software. +--- + +# Computer Use + +Use the `computer` MCP tool from the `computer-use` MCP server to operate the user's real local desktop. + +The only valid tool path for this skill is the MCP tool named `computer`. +Do not use shell commands to start another desktop automation MCP server, do not install `@anthropic-ai/mcp-computer-use-server`, and do not edit Qwen settings as a fallback. +If the `computer` tool is not available in the current tool list, stop and tell the user to restart Qwen Code or reconnect the `computer-use` MCP server. + +For browser pages, websites, localhost web apps, web forms, DOM elements, links, inputs, or browser navigation flows, use the `browser-use` skill and the Playwright MCP server instead. Use `computer-use` only when the task requires native OS or app UI that Playwright cannot see. + +## Operating Loop + +1. Observe first with `computer` action `get_screenshot`. +2. Prefer keyboard shortcuts and typed navigation when practical. +3. Use coordinate clicks only after a screenshot confirms the target location. +4. After every action, verify with another `get_screenshot`. +5. Keep actions small and reversible. + +## Safety + +Ask for confirmation before destructive, privacy-sensitive, or externally visible actions, including deleting files, sending messages, submitting forms, making purchases, changing security settings, or entering credentials. + +Do not assume the user wants the whole desktop automated. Operate only the app, window, or workflow they asked for. + +## Tool Notes + +- Use `action: "get_screenshot"` to establish the coordinate frame. +- Use `action: "left_click"`, `action: "right_click"`, `action: "middle_click"`, `action: "double_click"`, `action: "mouse_move"`, and `action: "left_click_drag"` for pointer actions. +- Use `action: "type"` for text input. +- Use `action: "key"` for keys or key combinations. +- Use `action: "scroll"` for scrolling. +- The tool uses a single MCP tool named `computer`; do not look for separate tools named `click` or `screenshot`. + +## Platform Notes + +On macOS, the user may need to grant Accessibility and Screen Recording permissions to the Node/npm process that runs the MCP server. + +On Windows, the desktop must be unlocked and interactive for GUI input to work reliably. From e51345cc476daa06db715336732e74ca1e5a8a2d Mon Sep 17 00:00:00 2001 From: DragonnZhang <731557579@qq.com> Date: Wed, 13 May 2026 15:43:30 +0800 Subject: [PATCH 2/2] feat: add computer-use-hybrid extension with documentation and skills --- extensions/computer-use-hybrid/QWEN.md | 31 ++++++++++++++++ extensions/computer-use-hybrid/README.md | 33 +++++++++++++++++ .../computer-use-hybrid/qwen-extension.json | 13 +++++++ .../skills/computer-use-hybrid/SKILL.md | 36 +++++++++++++++++++ 4 files changed, 113 insertions(+) create mode 100644 extensions/computer-use-hybrid/QWEN.md create mode 100644 extensions/computer-use-hybrid/README.md create mode 100644 extensions/computer-use-hybrid/qwen-extension.json create mode 100644 extensions/computer-use-hybrid/skills/computer-use-hybrid/SKILL.md diff --git a/extensions/computer-use-hybrid/QWEN.md b/extensions/computer-use-hybrid/QWEN.md new file mode 100644 index 0000000..4ea2646 --- /dev/null +++ b/extensions/computer-use-hybrid/QWEN.md @@ -0,0 +1,31 @@ +# Computer Use Hybrid Extension + +This extension exposes one cross-platform desktop automation MCP server: + +- `open-computer-use`: launches iFurySt open-computer-use through `npx -y open-computer-use mcp`. + +Use the `computer-use-hybrid` skill whenever the user asks Qwen Code to operate the local desktop, inspect native app UI, use the accessibility tree, capture screenshots, click UI, type into an app, press shortcuts, scroll, drag, or control native Mac/Windows applications. + +Important: `computer-use-hybrid` is a skill and MCP routing guide, not a subagent. Do not call the Agent/Subagent tool with `computer-use-hybrid` as the subagent type. Use the MCP tools exposed by `open-computer-use` directly in the current agent. + +Do not use this extension for browser-only DOM automation. Playwright is intentionally not included in this extension. + +Routing rule: use the cross-platform MCP tools exposed by `open-computer-use`. Do not try to start another desktop automation server or Playwright from this extension. + +The tools operate on the user's real machine. Ask before destructive, privacy-sensitive, or externally visible actions such as sending messages, deleting files, purchasing, submitting forms, entering credentials, or changing security settings. + +## Upstream Projects + +- Desktop MCP: https://github.com/iFurySt/open-codex-computer-use + +## Requirements + +All platforms: + +- Node.js and npm available to run `npx`. +- macOS, Windows, or Linux. + +macOS: + +- The first run may require Accessibility and Screen Recording permissions. If desktop actions fail because permissions are missing, ask the user to run `npx -y open-computer-use doctor` once and grant the prompted permissions. +- Do not ask the user to run `open-computer-use install-codex-mcp` or `open-computer-use install-codex-plugin`; this extension already registers the MCP server through `qwen-extension.json`. diff --git a/extensions/computer-use-hybrid/README.md b/extensions/computer-use-hybrid/README.md new file mode 100644 index 0000000..2d5c044 --- /dev/null +++ b/extensions/computer-use-hybrid/README.md @@ -0,0 +1,33 @@ +# computer-use-hybrid + +Qwen Code extension for cross-platform desktop automation through accessibility trees and screenshots. + +It exposes one MCP server: + +- `open-computer-use`: `npx -y open-computer-use mcp` from iFurySt open-codex-computer-use. + +Playwright is intentionally not included. + +## Install + +Copy or symlink this directory into Qwen Code's extension directory, then restart Qwen Code. + +## Requirements + +Node.js and npm are required because the extension launches `open-computer-use` through `npx`. + +macOS, Windows, and Linux are supported by upstream open-computer-use. + +On macOS, the first run may require Accessibility and Screen Recording permissions. You can trigger upstream onboarding manually: + +```bash +npx -y open-computer-use doctor +``` + +You do not need to run `open-computer-use install-codex-mcp` or `open-computer-use install-codex-plugin` for this extension. `qwen-extension.json` already registers the MCP server. + +The MCP server is configured directly in `qwen-extension.json` as: + +```bash +npx -y open-computer-use mcp +``` diff --git a/extensions/computer-use-hybrid/qwen-extension.json b/extensions/computer-use-hybrid/qwen-extension.json new file mode 100644 index 0000000..314b7ec --- /dev/null +++ b/extensions/computer-use-hybrid/qwen-extension.json @@ -0,0 +1,13 @@ +{ + "name": "computer-use-hybrid", + "version": "0.1.0", + "description": "Cross-platform desktop automation via open-computer-use on macOS, Windows, and Linux.", + "mcpServers": { + "open-computer-use": { + "command": "npx", + "args": ["-y", "open-computer-use", "mcp"] + } + }, + "skills": "skills", + "contextFileName": "QWEN.md" +} diff --git a/extensions/computer-use-hybrid/skills/computer-use-hybrid/SKILL.md b/extensions/computer-use-hybrid/skills/computer-use-hybrid/SKILL.md new file mode 100644 index 0000000..7299d99 --- /dev/null +++ b/extensions/computer-use-hybrid/skills/computer-use-hybrid/SKILL.md @@ -0,0 +1,36 @@ +--- +name: computer-use-hybrid +description: Control native macOS, Windows, and Linux desktop apps through the `open-computer-use` MCP server. Use when the user asks to operate local apps with accessibility-tree context plus screenshots, inspect the screen, click UI, type text, press shortcuts, scroll, drag, or interact with OS-level GUI software. +--- + +# Computer Use Hybrid + +Use the MCP tools exposed by the `open-computer-use` server to operate the user's real local desktop through accessibility-tree context and screenshots. + +This is a skill, not a subagent. Never invoke the Agent/Subagent tool with `computer-use-hybrid` as the subagent type. Stay in the current agent and call the `open-computer-use` MCP tools directly. + +Do not use shell commands to start Playwright or a second desktop automation MCP server, and do not edit Qwen settings as a fallback. If the `open-computer-use` MCP tools are not available in the current tool list, stop and tell the user to restart Qwen Code or reconnect the extension. + +Playwright is intentionally not part of this extension. For browser-only DOM automation, use whatever browser-specific extension or MCP server the user has separately enabled. + +## Operating Loop + +1. Observe first with the platform server's accessibility-state or screenshot tool. +2. Prefer accessibility-tree targets, window identifiers, focused elements, and semantic actions when the server exposes them. +3. Use screenshots to confirm visual state, target bounds, and post-action results. +4. Prefer keyboard shortcuts and typed navigation when practical. +5. Use coordinate clicks only after accessibility metadata or a screenshot confirms the target location. +6. After every meaningful action, verify with another state read or screenshot. +7. Keep actions small and reversible. + +## Tool Notes + +open-computer-use supports macOS, Windows, and Linux and is launched through `npx -y open-computer-use mcp` by default. + +On macOS, the first run may require Accessibility and Screen Recording permissions. If a permission prompt or onboarding window appears, guide the user through granting the permission before continuing. + +## Safety + +Ask for confirmation before destructive, privacy-sensitive, or externally visible actions, including deleting files, sending messages, submitting forms, making purchases, changing security settings, or entering credentials. + +Do not assume the user wants the whole desktop automated. Operate only the app, window, or workflow they asked for.