From e5c52b742a0ef49a4ba33c2b8a3a1c88daa2c67c Mon Sep 17 00:00:00 2001
From: Jax <jax.zhang@novita.ai>
Date: Mon, 9 Mar 2026 21:43:46 +0800
Subject: [PATCH 1/3] feat: add APUS AI Inference skill with SKILL.md

Add the core instruction document for the APUS AI Inference skill,
covering OpenAI-compatible deterministic/confidential/verifiable
inference on AO Network with TEE attestation support.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 skills/apus/SKILL.md | 342 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 342 insertions(+)
 create mode 100644 skills/apus/SKILL.md

diff --git a/skills/apus/SKILL.md b/skills/apus/SKILL.md
new file mode 100644
index 0000000..745a677
--- /dev/null
+++ b/skills/apus/SKILL.md
@@ -0,0 +1,342 @@
+---
+name: apus
+description: AI inference via APUS on AO Network - deterministic, confidential, verifiable chat completions with TEE attestation. Use when the user wants to run AI inference through APUS, chat with AI models on AO, verify TEE attestation, or stream AI responses.
+compatibility: Requires Python 3.8+ with openai SDK, or Node.js 18+ with openai package
+metadata:
+  author: apus-network
+  version: "0.0.1"
+---
+
+# APUS AI Inference Skill
+
+Run deterministic, confidential, and verifiable AI inference on AO Network via APUS. All inference runs inside a Trusted Execution Environment (TEE), producing attestation proofs that can be independently verified. The API is fully OpenAI-compatible, so existing code using the OpenAI SDK works with minimal changes.
+
+## Phrase Mappings
+
+| User Request | Action |
+|--------------|--------|
+| "use apus to chat" | Send a chat completion request |
+| "use apus to ask" | Send a single-turn question |
+| "use apus to stream" | Stream a chat completion response |
+| "use apus to verify" | Verify TEE attestation of a response |
+| "use apus to check health" | Check API health status |
+
+## Prerequisites
+
+Install the OpenAI SDK for your language of choice. No API key is required during the current test phase.
+
+**Python:**
+
+```bash
+pip install openai
+```
+
+**Node.js:**
+
+```bash
+npm install openai
+```
+
+## API Reference
+
+| Property | Value |
+|----------|-------|
+| Base URL | `https://hb.apus.network/~inference@1.0` |
+| Model | `google/gemma-3-27b-it` |
+| Auth | None required (test phase) |
+
+### Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/v1/chat/completions` | Chat completions (single-turn, multi-turn, streaming) |
+| POST | `/v1/completions` | Text completions |
+| GET | `/health` | Health check |
+
+## Request Parameters
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `model` | string | — | Model ID. Use `google/gemma-3-27b-it` |
+| `messages` | array | — | Array of message objects with `role` and `content` |
+| `temperature` | float | 1.0 | Sampling temperature (0.0 - 2.0) |
+| `max_tokens` | int | — | Maximum tokens to generate |
+| `stream` | bool | false | Enable streaming response |
+| `top_p` | float | 1.0 | Nucleus sampling threshold |
+| `frequency_penalty` | float | 0.0 | Penalize repeated tokens (-2.0 to 2.0) |
+| `presence_penalty` | float | 0.0 | Penalize tokens already present (-2.0 to 2.0) |
+| `tee` | bool | false | Return TEE attestation with the response (APUS-specific) |
+
+## Usage Guide
+
+### Initialize Client
+
+**Python:**
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="https://hb.apus.network/~inference@1.0/v1",
+    api_key="unused",  # No key required during test phase
+)
+```
+
+**Node.js:**
+
+```javascript
+import OpenAI from "openai";
+
+const client = new OpenAI({
+  baseURL: "https://hb.apus.network/~inference@1.0/v1",
+  apiKey: "unused", // No key required during test phase
+});
+```
+
+### Single-Turn Chat
+
+**Python:**
+
+```python
+response = client.chat.completions.create(
+    model="google/gemma-3-27b-it",
+    messages=[
+        {"role": "user", "content": "What is AO Network?"}
+    ],
+    temperature=0.7,
+    max_tokens=512,
+)
+
+print(response.choices[0].message.content)
+```
+
+**Node.js:**
+
+```javascript
+const response = await client.chat.completions.create({
+  model: "google/gemma-3-27b-it",
+  messages: [
+    { role: "user", content: "What is AO Network?" }
+  ],
+  temperature: 0.7,
+  max_tokens: 512,
+});
+
+console.log(response.choices[0].message.content);
+```
+
+### Multi-Turn Conversation
+
+**Python:**
+
+```python
+messages = [
+    {"role": "system", "content": "You are a helpful assistant knowledgeable about AO Network."},
+    {"role": "user", "content": "What is AO Network?"},
+]
+
+response = client.chat.completions.create(
+    model="google/gemma-3-27b-it",
+    messages=messages,
+    temperature=0.7,
+    max_tokens=512,
+)
+
+# Append assistant reply and continue
+assistant_reply = response.choices[0].message.content
+messages.append({"role": "assistant", "content": assistant_reply})
+messages.append({"role": "user", "content": "How does it relate to Arweave?"})
+
+response = client.chat.completions.create(
+    model="google/gemma-3-27b-it",
+    messages=messages,
+    temperature=0.7,
+    max_tokens=512,
+)
+
+print(response.choices[0].message.content)
+```
+
+### Streaming
+
+**Python:**
+
+```python
+stream = client.chat.completions.create(
+    model="google/gemma-3-27b-it",
+    messages=[
+        {"role": "user", "content": "Explain TEE attestation in simple terms."}
+    ],
+    stream=True,
+    max_tokens=512,
+)
+
+for chunk in stream:
+    content = chunk.choices[0].delta.content
+    if content:
+        print(content, end="", flush=True)
+print()
+```
+
+**Node.js:**
+
+```javascript
+const stream = await client.chat.completions.create({
+  model: "google/gemma-3-27b-it",
+  messages: [
+    { role: "user", content: "Explain TEE attestation in simple terms." }
+  ],
+  stream: true,
+  max_tokens: 512,
+});
+
+for await (const chunk of stream) {
+  const content = chunk.choices[0]?.delta?.content;
+  if (content) process.stdout.write(content);
+}
+console.log();
+```
+
+### TEE Attestation
+
+Request a TEE attestation proof alongside the inference result by setting `tee: true` via `extra_body`.
+
+**Python:**
+
+```python
+response = client.chat.completions.create(
+    model="google/gemma-3-27b-it",
+    messages=[
+        {"role": "user", "content": "What is verifiable inference?"}
+    ],
+    max_tokens=256,
+    extra_body={"tee": True},
+)
+
+print("Response:", response.choices[0].message.content)
+print("Attestation:", response.tee)
+```
+
+### Attestation Response Structure
+
+When `tee` is enabled, the response includes an attestation object:
+
+```json
+{
+  "id": "chatcmpl-abc123",
+  "object": "chat.completion",
+  "choices": [
+    {
+      "index": 0,
+      "message": {
+        "role": "assistant",
+        "content": "Verifiable inference means ..."
+      },
+      "finish_reason": "stop"
+    }
+  ],
+  "tee": {
+    "tee_type": "SEV-SNP",
+    "token": "<attestation-token>",
+    "input_hash": "<sha256-hash-of-input>",
+    "output_hash": "<sha256-hash-of-output>"
+  }
+}
+```
+
+### Verify Attestation
+
+#### Method 1: APUS Verifier Service
+
+Submit the attestation token to the APUS verification endpoint:
+
+```bash
+curl -X POST https://hb.apus.network/~sev_gpu@1.0/verify \
+  -H "Content-Type: application/json" \
+  -d '{
+    "token": "<attestation-token>"
+  }'
+```
+
+A successful response indicates the attestation is valid:
+
+```json
+{
+  "valid": true,
+  "tee_type": "SEV-SNP",
+  "details": {
+    "measurement": "...",
+    "report_data": "..."
+  }
+}
+```
+
+#### Method 2: NVIDIA SDK
+
+For independent local verification using the NVIDIA Attestation SDK:
+
+```bash
+pip install nv-attestation-sdk
+```
+
+```python
+import hashlib
+from nv_attestation_sdk import attestation
+
+# 1. Verify the attestation token signature and claims
+verifier = attestation.Verifier()
+result = verifier.verify_token(attestation_token)
+print("Token valid:", result.valid)
+
+# 2. Verify input/output hash integrity
+input_data = '{"messages": [{"role": "user", "content": "What is verifiable inference?"}]}'
+computed_hash = hashlib.sha256(input_data.encode()).hexdigest()
+assert computed_hash == response_tee["input_hash"], "Input hash mismatch"
+print("Input hash verified")
+```
+
+### Health Check
+
+```bash
+curl https://hb.apus.network/~inference@1.0/health
+```
+
+Expected response:
+
+```json
+{
+  "status": "ok"
+}
+```
+
+## Demo Scripts
+
+| Script | Description | Run Command |
+|--------|-------------|-------------|
+| `examples/chat.py` | Single-turn chat (Python) | `python skills/apus/examples/chat.py` |
+| `examples/stream.py` | Streaming response (Python) | `python skills/apus/examples/stream.py` |
+| `examples/verify.py` | TEE attestation + verification (Python) | `python skills/apus/examples/verify.py` |
+| `examples/chat.mjs` | Single-turn chat (Node.js) | `node skills/apus/examples/chat.mjs` |
+| `examples/verify.mjs` | TEE attestation + verification (Node.js) | `node skills/apus/examples/verify.mjs` |
+
+## Error Handling
+
+| Error | Cause | Resolution |
+|-------|-------|------------|
+| `Connection refused` | APUS inference service is unreachable | Check network connectivity; verify the base URL; retry after a short wait |
+| `Model not found` | Invalid or unsupported model ID | Use `google/gemma-3-27b-it` as the model parameter |
+| `Attestation verification failed` | TEE attestation token is invalid or tampered | Re-request with `tee: true`; verify you are using the correct token; try the APUS verifier service |
+
+## Notes
+
+- **No API key required** during the current test phase. Set `api_key` to any non-empty string (e.g. `"unused"`).
+- **OpenAI-compatible API** -- any code written for the OpenAI SDK works by changing only `base_url` and `api_key`.
+- **`tee` is APUS-specific** -- this parameter is not part of the OpenAI spec. Pass it via `extra_body` in Python or as an additional body field in Node.js.
+
+## See Also
+
+- [APUS Network Documentation](https://docs.apus.network)
+- [APUS Network GitHub](https://github.com/apuslabs)
+- [AO Network](https://ao.arweave.dev)
+- [OpenAI Python SDK](https://github.com/openai/openai-python)
+- [OpenAI Node.js SDK](https://github.com/openai/openai-node)

From 0ea1b16e33b4b5f0b784317e537ebf7bcff41913 Mon Sep 17 00:00:00 2001
From: Jax <jax.zhang@novita.ai>
Date: Mon, 9 Mar 2026 21:46:42 +0800
Subject: [PATCH 2/3] feat: add Python and Node.js demo scripts for APUS skill

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 skills/apus/examples/chat.mjs   | 47 +++++++++++++++++++++
 skills/apus/examples/chat.py    | 44 ++++++++++++++++++++
 skills/apus/examples/stream.py  | 29 +++++++++++++
 skills/apus/examples/verify.mjs | 72 +++++++++++++++++++++++++++++++++
 skills/apus/examples/verify.py  | 71 ++++++++++++++++++++++++++++++++
 5 files changed, 263 insertions(+)
 create mode 100644 skills/apus/examples/chat.mjs
 create mode 100644 skills/apus/examples/chat.py
 create mode 100644 skills/apus/examples/stream.py
 create mode 100644 skills/apus/examples/verify.mjs
 create mode 100644 skills/apus/examples/verify.py

diff --git a/skills/apus/examples/chat.mjs b/skills/apus/examples/chat.mjs
new file mode 100644
index 0000000..89c2247
--- /dev/null
+++ b/skills/apus/examples/chat.mjs
@@ -0,0 +1,47 @@
+#!/usr/bin/env node
+/**
+ * APUS AI Inference — Chat Demo (single-turn + multi-turn)
+ *
+ * Usage: node skills/apus/examples/chat.mjs
+ * Requires: npm install openai
+ */
+
+import OpenAI from "openai";
+
+const client = new OpenAI({
+  apiKey: "",
+  baseURL: "https://hb.apus.network/~inference@1.0",
+});
+
+const MODEL = "google/gemma-3-27b-it";
+
+async function singleTurn() {
+  console.log("=== Single-Turn Chat ===");
+  const resp = await client.chat.completions.create({
+    model: MODEL,
+    messages: [{ role: "user", content: "What is 2 + 2?" }],
+  });
+  console.log("Assistant:", resp.choices[0].message.content);
+  console.log();
+}
+
+async function multiTurn() {
+  console.log("=== Multi-Turn Chat ===");
+  const messages = [
+    { role: "system", content: "You are a math assistant." },
+    { role: "user", content: "What is 10 * 10?" },
+  ];
+
+  const resp = await client.chat.completions.create({ model: MODEL, messages });
+  console.log("Assistant:", resp.choices[0].message.content);
+
+  messages.push({ role: "assistant", content: resp.choices[0].message.content });
+  messages.push({ role: "user", content: "And what is 100 / 5?" });
+
+  const resp2 = await client.chat.completions.create({ model: MODEL, messages });
+  console.log("Assistant:", resp2.choices[0].message.content);
+  console.log();
+}
+
+await singleTurn();
+await multiTurn();
diff --git a/skills/apus/examples/chat.py b/skills/apus/examples/chat.py
new file mode 100644
index 0000000..784014f
--- /dev/null
+++ b/skills/apus/examples/chat.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+"""APUS AI Inference — Chat Demo (single-turn + multi-turn)."""
+
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="",
+    base_url="https://hb.apus.network/~inference@1.0",
+)
+
+MODEL = "google/gemma-3-27b-it"
+
+
+def single_turn():
+    print("=== Single-Turn Chat ===")
+    resp = client.chat.completions.create(
+        model=MODEL,
+        messages=[{"role": "user", "content": "What is 2 + 2?"}],
+    )
+    print("Assistant:", resp.choices[0].message.content)
+    print()
+
+
+def multi_turn():
+    print("=== Multi-Turn Chat ===")
+    messages = [
+        {"role": "system", "content": "You are a math assistant."},
+        {"role": "user", "content": "What is 10 * 10?"},
+    ]
+
+    resp = client.chat.completions.create(model=MODEL, messages=messages)
+    print("Assistant:", resp.choices[0].message.content)
+
+    messages.append({"role": "assistant", "content": resp.choices[0].message.content})
+    messages.append({"role": "user", "content": "And what is 100 / 5?"})
+
+    resp2 = client.chat.completions.create(model=MODEL, messages=messages)
+    print("Assistant:", resp2.choices[0].message.content)
+    print()
+
+
+if __name__ == "__main__":
+    single_turn()
+    multi_turn()
diff --git a/skills/apus/examples/stream.py b/skills/apus/examples/stream.py
new file mode 100644
index 0000000..6180a01
--- /dev/null
+++ b/skills/apus/examples/stream.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+"""APUS AI Inference — Streaming Demo."""
+
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="",
+    base_url="https://hb.apus.network/~inference@1.0",
+)
+
+MODEL = "google/gemma-3-27b-it"
+
+
+def stream_chat():
+    print("=== Streaming Chat ===")
+    stream = client.chat.completions.create(
+        model=MODEL,
+        messages=[{"role": "user", "content": "Explain blockchain in 3 sentences."}],
+        stream=True,
+    )
+    for chunk in stream:
+        content = chunk.choices[0].delta.content
+        if content:
+            print(content, end="", flush=True)
+    print("\n")
+
+
+if __name__ == "__main__":
+    stream_chat()
diff --git a/skills/apus/examples/verify.mjs b/skills/apus/examples/verify.mjs
new file mode 100644
index 0000000..8d1ca48
--- /dev/null
+++ b/skills/apus/examples/verify.mjs
@@ -0,0 +1,72 @@
+#!/usr/bin/env node
+/**
+ * APUS AI Inference — TEE Attestation Verification Demo
+ *
+ * Usage: node skills/apus/examples/verify.mjs
+ * Requires: npm install openai
+ */
+
+import { createHash } from "node:crypto";
+import OpenAI from "openai";
+
+const client = new OpenAI({
+  apiKey: "",
+  baseURL: "https://hb.apus.network/~inference@1.0",
+});
+
+const MODEL = "google/gemma-3-27b-it";
+const VERIFY_URL = "https://hb.apus.network/~sev_gpu@1.0/verify";
+
+async function chatWithAttestation() {
+  console.log("=== Chat with TEE Attestation ===");
+
+  const resp = await client.chat.completions.create({
+    model: MODEL,
+    messages: [{ role: "user", content: "What is AO?" }],
+    tee: true,
+  });
+
+  console.log("Assistant:", resp.choices[0].message.content);
+  console.log();
+
+  // Extract attestation
+  const attestation = resp.attestation;
+  if (!attestation) {
+    console.log("No attestation in response (tee may not be supported yet).");
+    return;
+  }
+
+  console.log("=== Attestation Received ===");
+  console.log("Nonce:", attestation.nonce ?? "N/A");
+  console.log("Token length:", (attestation.token ?? "").length, "chars");
+  console.log();
+
+  // Verify binding: SHA-256(raw) === nonce
+  const raw = attestation.raw ?? "";
+  const nonce = attestation.nonce ?? "";
+  const calculated = createHash("sha256").update(raw, "utf-8").digest("hex");
+
+  if (calculated === nonce) {
+    console.log("Binding check PASSED: SHA-256(raw) matches nonce");
+  } else {
+    console.log("Binding check FAILED: nonce mismatch");
+    return;
+  }
+
+  // Verify via APUS Verifier Service
+  console.log();
+  console.log("=== Verifying via APUS Service ===");
+  try {
+    const verifyResp = await fetch(VERIFY_URL, {
+      method: "GET",
+      headers: { "Content-Type": "application/json" },
+      body: attestation.token,
+    });
+    const result = await verifyResp.text();
+    console.log("Verification result:", result);
+  } catch (err) {
+    console.log(`Verification request failed: ${err.message}`);
+  }
+}
+
+await chatWithAttestation();
diff --git a/skills/apus/examples/verify.py b/skills/apus/examples/verify.py
new file mode 100644
index 0000000..572fed3
--- /dev/null
+++ b/skills/apus/examples/verify.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+"""APUS AI Inference — TEE Attestation Verification Demo."""
+
+import hashlib
+import json
+import urllib.request
+
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="",
+    base_url="https://hb.apus.network/~inference@1.0",
+)
+
+MODEL = "google/gemma-3-27b-it"
+
+VERIFY_URL = "https://hb.apus.network/~sev_gpu@1.0/verify"
+
+
+def chat_with_attestation():
+    print("=== Chat with TEE Attestation ===")
+    resp = client.chat.completions.create(
+        model=MODEL,
+        messages=[{"role": "user", "content": "What is AO?"}],
+        extra_body={"tee": True},
+    )
+    print("Assistant:", resp.choices[0].message.content)
+    print()
+
+    # Extract attestation from response
+    raw_resp = resp.model_extra or {}
+    attestation = raw_resp.get("attestation")
+    if not attestation:
+        print("No attestation in response (tee may not be supported yet).")
+        return
+
+    print("=== Attestation Received ===")
+    print("Nonce:", attestation.get("nonce", "N/A"))
+    print("Token length:", len(attestation.get("token", "")), "chars")
+    print()
+
+    # Verify binding: SHA-256(raw) == nonce
+    raw = attestation.get("raw", "")
+    nonce = attestation.get("nonce", "")
+    calculated = hashlib.sha256(raw.encode("utf-8")).hexdigest()
+
+    if calculated == nonce:
+        print("Binding check PASSED: SHA-256(raw) matches nonce")
+    else:
+        print("Binding check FAILED: nonce mismatch")
+        return
+
+    # Verify via APUS Verifier Service
+    print()
+    print("=== Verifying via APUS Service ===")
+    try:
+        req = urllib.request.Request(
+            VERIFY_URL,
+            data=attestation.get("token", "").encode("utf-8"),
+            headers={"Content-Type": "application/json"},
+            method="GET",
+        )
+        with urllib.request.urlopen(req, timeout=30) as response:
+            result = response.read().decode("utf-8")
+            print("Verification result:", result)
+    except Exception as e:
+        print(f"Verification request failed: {e}")
+
+
+if __name__ == "__main__":
+    chat_with_attestation()

From a8e5a224641bed3ca960d154ecc841e818f05eb0 Mon Sep 17 00:00:00 2001
From: Jax <jax.zhang@novita.ai>
Date: Mon, 9 Mar 2026 21:48:04 +0800
Subject: [PATCH 3/3] docs: add APUS skill to project README

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 skills/README.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/skills/README.md b/skills/README.md
index 732ddb2..f8632a8 100644
--- a/skills/README.md
+++ b/skills/README.md
@@ -13,6 +13,7 @@ A collection of Permaweb CLI skills for [Claude Code](https://claude.ai/code) an
 | `arweave` | Upload files/sites to Arweave + manage ArNS records | [skills/arweave/SKILL.md](skills/arweave/SKILL.md) |
 | `monitor` | AO Task Monitor client (summaries, alerts, logs) | [skills/monitor/SKILL.md](skills/monitor/SKILL.md) |
 | `aoconnect` | Interact with AO processes - spawn, message, read results, monitor | [skills/aoconnect/SKILL.md](skills/aoconnect/SKILL.md) |
+| `apus` | AI inference via APUS on AO Network (chat, streaming, TEE attestation) | [skills/apus/SKILL.md](skills/apus/SKILL.md) |
 
 ## Installation
 
@@ -27,6 +28,9 @@ npx skills add https://github.com/permaweb/skills --skill monitor
 
 # Install the AO Connect skill
 npx skills add https://github.com/permaweb/skills --skill aoconnect
+
+# Install the APUS skill
+npx skills add https://github.com/permaweb/skills --skill apus
 ```
 
 This adds the skill to your project's `.claude/skills/` or `.opencode/skills/` directory.
@@ -72,6 +76,19 @@ Claude Code will prompt for your wallet path if not configured.
 
 **Full docs:** [skills/aoconnect/SKILL.md](skills/aoconnect/SKILL.md)
 
+### APUS AI Inference
+
+```
+use apus to chat "What is AO?"
+use apus to stream "Explain blockchain"
+use apus to verify <attestation>
+use apus to check health
+```
+
+Requires `openai` SDK (`pip install openai` or `npm install openai`).
+
+**Full docs:** [skills/apus/SKILL.md](skills/apus/SKILL.md)
+
 ## Manual CLI Usage
 
 You can also run the CLIs directly:
@@ -153,6 +170,7 @@ node skills/aoconnect/index.mjs monitor \
 - Arweave wallet (JWK format) for `arweave` and `aoconnect` skills
 - `AO_MONITOR_KEY` env var for `monitor` skill
 - `@permaweb/aoconnect` package for `aoconnect` skill
+- `openai` SDK (Python or Node.js) for `apus` skill
 
 ## Development