From 41aa04094478f9f50b36e4dc30779b00f1f61378 Mon Sep 17 00:00:00 2001 From: beatwade Date: Wed, 8 Apr 2026 12:03:12 +0800 Subject: [PATCH 1/4] =?UTF-8?q?fix:=20/retrieve=20endpoint=20=E2=80=94=20p?= =?UTF-8?q?ass=20user=5Fid=20as=20where=20filter=20for=20memory=20isolatio?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /retrieve endpoint was calling service.retrieve() without passing user_id, causing all memories to be returned regardless of user boundary. This fix reads user_id from the request payload and passes it as a where filter. --- app/main.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/app/main.py b/app/main.py index a129fb6..b61f463 100644 --- a/app/main.py +++ b/app/main.py @@ -202,7 +202,9 @@ async def retrieve(request: Request, payload: dict[str, Any]): raise HTTPException(status_code=400, detail="'query' must be a non-empty string") try: service = request.app.state.service - result = await service.retrieve([query.strip()]) + user_id = payload.get("user_id") + where = {"user_id": user_id} if user_id else None + result = await service.retrieve([query.strip()], where=where) return JSONResponse(content={"status": "success", "result": result}) except Exception as exc: logger.exception("Retrieve request failed") From 929cbfb1867f205b21076ac0505e78ef2ac5bc6f Mon Sep 17 00:00:00 2001 From: beatwade Date: Wed, 8 Apr 2026 15:00:43 +0800 Subject: [PATCH 2/4] fix: disable route_intention by default + datetime JSON serialization - retrieve endpoint defaults skip_routing=true (bypass LLM gatekeeper) - Fix datetime serialization error in JSON response --- app/main.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/app/main.py b/app/main.py index b61f463..2b73719 100644 --- a/app/main.py +++ b/app/main.py @@ -204,8 +204,14 @@ async def retrieve(request: Request, payload: dict[str, Any]): service = request.app.state.service user_id = payload.get("user_id") where = {"user_id": user_id} if user_id else None + skip_routing = payload.get("skip_routing", True) + service.retrieve_config.route_intention = not skip_routing result = await service.retrieve([query.strip()], where=where) - return JSONResponse(content={"status": "success", "result": result}) + # Sanitize datetime objects for JSON serialization + import json as _json + + sanitized = _json.loads(_json.dumps(result, default=str)) + return JSONResponse(content={"status": "success", "result": sanitized}) except Exception as exc: logger.exception("Retrieve request failed") raise HTTPException(status_code=500, detail="Internal server error") from exc From 52086b9cf22805e87151b4f4f37ca6c4731c8dd8 Mon Sep 17 00:00:00 2001 From: beatwade Date: Wed, 8 Apr 2026 16:02:26 +0800 Subject: [PATCH 3/4] =?UTF-8?q?feat:=20=E5=8F=AF=E9=85=8D=E7=BD=AE=20promp?= =?UTF-8?q?t=20override=20=E2=80=94=20YAML=20=E6=96=87=E4=BB=B6=E5=8A=A0?= =?UTF-8?q?=E8=BD=BD=E5=8F=8D=E5=99=AA=E9=9F=B3=E6=8F=90=E5=8F=96=E8=A7=84?= =?UTF-8?q?=E5=88=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 PROMPT_CONFIG_PATH 环境变量,支持外部 YAML 文件配置提取 prompt - config/prompts.yaml 添加反噪音规则:禁止提取问题当事实、AI 无知当用户特征、元动作当事实 - build_memu_config() 自动加载 YAML prompt 覆盖到 memorize_config - create_memory_service() 支持深层合并 base prompt 与 override_config - 不修改 memu-py 源码,纯配置驱动 --- app/services/memu.py | 24 +++--- config/memu.py | 63 +++++++++++--- config/prompts.yaml | 193 +++++++++++++++++++++++++++++++++++++++++++ config/settings.py | 5 ++ 4 files changed, 263 insertions(+), 22 deletions(-) create mode 100644 config/prompts.yaml diff --git a/app/services/memu.py b/app/services/memu.py index e8df706..f622afe 100644 --- a/app/services/memu.py +++ b/app/services/memu.py @@ -8,21 +8,22 @@ from config.settings import Settings +def _deep_merge(base: dict, override: dict) -> dict: + """Merge override into base dict recursively. Override wins on conflicts.""" + merged = {**base} + for key, val in override.items(): + if key in merged and isinstance(merged[key], dict) and isinstance(val, dict): + merged[key] = _deep_merge(merged[key], val) + else: + merged[key] = val + return merged + + def create_memory_service( settings: Settings | None = None, memorize_config: dict[str, Any] | None = None, retrieve_config: dict[str, Any] | None = None, ) -> MemoryService: - """Create a configured MemoryService instance. - - Args: - settings: Application settings. Uses default if not provided. - memorize_config: Optional memorize workflow config override. - retrieve_config: Optional retrieve workflow config override. - - Returns: - Configured MemoryService instance. - """ if settings is None: settings = Settings() @@ -30,7 +31,8 @@ def create_memory_service( kwargs = {**memu_config} if memorize_config: - kwargs["memorize_config"] = memorize_config + base = kwargs.get("memorize_config", {}) + kwargs["memorize_config"] = _deep_merge(base, memorize_config) if retrieve_config: kwargs["retrieve_config"] = retrieve_config diff --git a/config/memu.py b/config/memu.py index fdf76da..e7602ba 100644 --- a/config/memu.py +++ b/config/memu.py @@ -1,21 +1,23 @@ """MemU configuration for memory service.""" +import logging +from pathlib import Path from typing import Any +import yaml from pydantic import BaseModel from config.settings import Settings +logger = logging.getLogger(__name__) -class MemUUser(BaseModel): - """User model for memu-py.""" +class MemUUser(BaseModel): user_id: str agent_id: str | None = None def build_memu_llm_profiles(settings: Settings) -> dict[str, Any]: - """Build LLM profiles for memu-py.""" return { "default": { "api_key": settings.OPENAI_API_KEY, @@ -30,22 +32,61 @@ def build_memu_llm_profiles(settings: Settings) -> dict[str, Any]: } -def build_memu_config(settings: Settings) -> dict[str, Any]: - """Build memu-py core configuration. +def load_prompt_overrides(path: str) -> dict[str, Any] | None: + """Load custom prompt overrides from a YAML file. - This configures memu-py to: - 1. Connect to PostgreSQL with pgvector - 2. Auto-create tables (ddl_mode: create) - 3. Use configured LLM profiles + The YAML structure should match memu-py's MemorizeConfig.memory_type_prompts: + profile: + rules: + ordinal: 30 + prompt: | + # Rules + ... + examples: + ordinal: 60 + prompt: | + # Examples + ... + event: + rules: + ordinal: 30 + prompt: | + ... """ - return { + if not path: + return None + p = Path(path) + if not p.exists(): + logger.warning("PROMPT_CONFIG_PATH=%s does not exist, using defaults", path) + return None + try: + with p.open("r", encoding="utf-8") as f: + data = yaml.safe_load(f) + if data and isinstance(data, dict): + logger.info("Loaded prompt overrides from %s (types: %s)", path, list(data.keys())) + return data + logger.warning("PROMPT_CONFIG_PATH=%s is empty or invalid, using defaults", path) + return None + except Exception: + logger.exception("Failed to load prompt overrides from %s", path) + return None + + +def build_memu_config(settings: Settings) -> dict[str, Any]: + config: dict[str, Any] = { "llm_profiles": build_memu_llm_profiles(settings), "database_config": { "metadata_store": { "provider": "postgres", - "ddl_mode": "create", # Auto-create tables + "ddl_mode": "create", "dsn": settings.DATABASE_URL, } }, "user_config": {"model": MemUUser}, } + + prompt_overrides = load_prompt_overrides(settings.PROMPT_CONFIG_PATH) + if prompt_overrides: + config["memorize_config"] = {"memory_type_prompts": prompt_overrides} + + return config diff --git a/config/prompts.yaml b/config/prompts.yaml new file mode 100644 index 0000000..0dd94aa --- /dev/null +++ b/config/prompts.yaml @@ -0,0 +1,193 @@ +# memU 自定义提取 prompt 覆盖 +# 用于防止 memu-py 提取噪音信息(问题当事实、AI 无知当用户特征、元动作当事实) +# 结构:memory_type → block_name → {ordinal, prompt} +# ordinal 控制拼接顺序(30=rules, 60=examples) +# 完整覆盖整个 block,不会与默认 block 合并 + +profile: + rules: + ordinal: 30 + prompt: | + # Rules + ## General requirements (must satisfy all) + - Use "user" to refer to the user consistently. + - Each memory item must be complete and self-contained, written as a declarative descriptive sentence. + - Each memory item must express one single complete piece of information and be understandable without context. + - Similar/redundant items must be merged into one, and assigned to only one category. + - Each memory item must be < 30 words worth of length (keep it as concise as possible). + - A single memory item must NOT contain timestamps. + Important: Extract only facts directly stated or confirmed by the user. No guesses, no suggestions, and no content introduced only by the assistant. + Important: Accurately reflect whether the subject is the user or someone around the user. + Important: Do not record temporary/one-off situational information; focus on meaningful, persistent information. + + ## Special rules for User Information + - Any event-related item is forbidden in User Information. + - Do not extract content that was obtained only through the model's follow-up questions unless the user shows strong proactive intent. + + ## Anti-noise rules (CRITICAL — violate these = invalid extraction) + - NEVER extract a question or inquiry as a memory item. Questions do not convey facts. + BAD: "The user asked about their own name" — this is a question, not a fact. + BAD: "The user wants to know the weather" — this is a question, not a fact. + - NEVER extract the assistant's lack of knowledge or uncertainty as a user trait. + BAD: "The user does not know their own name" — if the ASSISTANT said "I don't know", this is NOT a user trait. + BAD: "The user is unaware of X" — unless the user themselves stated "I don't know X". + - NEVER extract meta-conversation actions as memory items. + BAD: "The user started a new conversation" — conversational commands are not user facts. + BAD: "The user said hello" — greetings are not meaningful user information. + BAD: "The user ended the conversation" — not a user trait. + - NEVER extract the existence of a conversation itself as a memory item. + BAD: "The user had a conversation about programming" — the topic of conversation is not a user fact unless a specific preference or belief was stated. + - NEVER extract summaries of what was discussed. Only extract explicit, declarative facts about the user. + + ## Forbidden content + - Knowledge Q&A without a clear user fact. + - Trivial updates that do not add meaningful value (e.g., "full → too full"). + - Turns where the user did not respond and only the assistant spoke. + - Illegal / harmful sensitive topics (violence, politics, drugs, etc.). + - Private financial accounts, IDs, addresses, military/defense/government job details, precise street addresses—unless explicitly requested by the user (still avoid if not necessary). + - Any content mentioned only by the assistant and not explicitly confirmed by the user. + + ## Review & validation rules + - Merge similar items: keep only one and assign a single category. + - Resolve conflicts: keep the latest / most certain item. + - Final check: every item must comply with all extraction rules, especially the Anti-noise rules above. + + examples: + ordinal: 60 + prompt: | + # Examples (Input / Output / Explanation) + Example 1: User Information Extraction + ## Input + user: Hi, are you busy? I just got off work and I'm going to the supermarket to buy some groceries. + assistant: Not busy. Are you cooking for yourself? + user: Yes. It's healthier. I work as a product manager in an internet company. I'm 30 this year. After work I like experimenting with cooking, I often figure out dishes by myself. + assistant: Being a PM is tough. You're so disciplined to cook at 30! + user: It's fine. Cooking relaxes me. It's better than takeout. Also I'm traveling next weekend. + assistant: You can check the weather ahead. Your sunscreen can finally be used. + user: I haven't started packing yet. It's annoying. + ## Output + + + The user works as a product manager at an internet company + + Basic Information + + + + The user is 30 years old + + Basic Information + + + + The user likes experimenting with cooking after work + + Basic Information + + + + ## Explanation + Only stable user facts explicitly stated by the user are extracted. + The travel plan and packing annoyance are events/temporary states, so they are not extracted as User Information. + + Example 2: ANTI-NOISE — What NOT to extract + ## Input + user: 我叫什么名字? + assistant: 很抱歉,我不确定您的名字。您能告诉我吗? + user: /new + assistant: 好的,我们开始新的对话吧!有什么可以帮您的? + user: 你好啊 + assistant: 你好!很高兴见到你,有什么需要帮忙的吗? + ## Output + (empty — no items to extract) + ## Explanation + - "我叫什么名字?" is a QUESTION, not a fact about the user. Do NOT extract. + - "我不确定您的名字" is the ASSISTANT's lack of knowledge, NOT a user trait. Do NOT extract "The user does not know their own name". + - "/new" is a meta-conversation command. Do NOT extract as a memory item. + - "你好啊" is a greeting. Do NOT extract as meaningful user information. + +event: + rules: + ordinal: 30 + prompt: | + # Rules + ## General requirements (must satisfy all) + - Use "user" to refer to the user consistently. + - Each memory item must be complete and self-contained, written as a declarative descriptive sentence. + - Each memory item must express one single complete piece of information and be understandable without context. + - Similar/redundant items must be merged into one, and assigned to only one category. + - Each memory item must be < 50 words worth of length (keep it concise but include relevant details). + - Focus on specific events that happened at a particular time or period. + - Include relevant details such as time, location, and participants where available. + Important: Extract only events directly stated or confirmed by the user. No guesses, no suggestions, and no content introduced only by the assistant. + Important: Accurately reflect whether the subject is the user or someone around the user. + + ## Special rules for Event Information + - Behavioral patterns, habits, preferences, or factual knowledge are forbidden in Event Information. + - Focus on concrete happenings, activities, and experiences. + - Do not extract content that was obtained only through the model's follow-up questions unless the user shows strong proactive intent. + + ## Anti-noise rules (CRITICAL — violate these = invalid extraction) + - NEVER extract a question or inquiry as an event. Questions are not events. + BAD: "The user asked about their schedule" — asking is not an event that happened. + - NEVER extract the assistant's lack of knowledge as an event about the user. + BAD: "The user was unable to recall their plans" — if the ASSISTANT couldn't answer, this is NOT a user event. + - NEVER extract meta-conversation actions as events. + BAD: "The user started a new conversation" — not a life event. + BAD: "The user initiated a session reset" — not a meaningful event. + - NEVER extract greetings, farewells, or social pleasantries as events. + - NEVER extract the existence of a conversation as an event. + + ## Forbidden content + - Knowledge Q&A without a clear user event. + - Trivial daily activities unless significant (e.g., routine meals, commuting). + - Temporary, ephemeral situations that lack meaningful significance. + - Turns where the user did not respond and only the assistant spoke. + - Illegal / harmful sensitive topics (violence, politics, drugs, etc.). + - Private financial accounts, IDs, addresses, military/defense/government job details, precise street addresses—unless explicitly requested by the user (still avoid if not necessary). + - Any content mentioned only by the assistant and not explicitly confirmed by the user. + + ## Review & validation rules + - Merge similar items: keep only one and assign a single category. + - Resolve conflicts: keep the latest / most certain item. + - Final check: every item must comply with all extraction rules, especially the Anti-noise rules above. + + examples: + ordinal: 60 + prompt: | + # Examples (Input / Output / Explanation) + Example 1: Event Information Extraction + ## Input + user: Hi, are you busy? I just got off work and I'm going to the supermarket to buy some groceries. + assistant: Not busy. Are you cooking for yourself? + user: Yes. It's healthier. I work as a product manager in an internet company. I'm 30 this year. After work I like experimenting with cooking, I often figure out dishes by myself. + assistant: Being a PM is tough. You're so disciplined to cook at 30! + user: It's fine. Cooking relaxes me. It's better than takeout. Also I'm traveling next weekend. + assistant: You can check the weather ahead. Your sunscreen can finally be used. + user: I haven't started packing yet. It's annoying. + ## Output + + + The user is planning a trip next weekend and hasn't started packing yet + + Travel + + + + ## Explanation + Only specific events explicitly stated by the user are extracted. + The travel plan is an event with a specific time reference (next weekend). + User's job, age, and cooking habits are stable user traits, so they are not extracted as Event Information. + + Example 2: ANTI-NOISE — What NOT to extract as events + ## Input + user: 我明天有什么安排? + assistant: 很抱歉,我没有您明天的日程安排信息。 + user: /new + assistant: 好的,新对话开始了。 + ## Output + (empty — no items to extract) + ## Explanation + - "我明天有什么安排?" is a QUESTION, not an event. Do NOT extract. + - "我没有您的日程安排信息" is the ASSISTANT's lack of knowledge, NOT a user event. Do NOT extract "The user has no plans tomorrow". + - "/new" is a meta-conversation command. Do NOT extract as an event. diff --git a/config/settings.py b/config/settings.py index 834c536..9471962 100644 --- a/config/settings.py +++ b/config/settings.py @@ -42,6 +42,11 @@ class Settings(BaseSettings): # ── Storage ── STORAGE_PATH: str = "./data/storage" + # ── Prompt Configuration ── + # Path to a YAML file containing custom prompt overrides for memu-py. + # If empty or the file does not exist, memu-py defaults are used. + PROMPT_CONFIG_PATH: str = "" + @field_validator("DATABASE_URL", mode="after") @classmethod def assemble_db_url(cls, v: str, info: ValidationInfo) -> str: From 37e6a18501b33cb9e632866a3decd50a9cb008f3 Mon Sep 17 00:00:00 2001 From: beatwade Date: Wed, 8 Apr 2026 16:33:20 +0800 Subject: [PATCH 4/4] =?UTF-8?q?fix:=20=E5=8A=A0=E5=BC=BA=E5=8F=8D=E5=99=AA?= =?UTF-8?q?=E9=9F=B3=E6=8F=90=E5=8F=96=20prompt=20=E2=80=94=20workflow=20?= =?UTF-8?q?=E6=AD=A5=E9=AA=A4=E5=89=8D=E7=BD=AE=E8=BF=87=E6=BB=A4=20+=20gr?= =?UTF-8?q?eeting=20=E8=A7=84=E5=88=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/prompts.yaml | 302 +++++++++++++++++++------------------------- 1 file changed, 131 insertions(+), 171 deletions(-) diff --git a/config/prompts.yaml b/config/prompts.yaml index 0dd94aa..f73be7b 100644 --- a/config/prompts.yaml +++ b/config/prompts.yaml @@ -1,193 +1,153 @@ # memU 自定义提取 prompt 覆盖 -# 用于防止 memu-py 提取噪音信息(问题当事实、AI 无知当用户特征、元动作当事实) # 结构:memory_type → block_name → {ordinal, prompt} -# ordinal 控制拼接顺序(30=rules, 60=examples) -# 完整覆盖整个 block,不会与默认 block 合并 +# ordinal 控制拼接顺序(20=workflow, 30=rules, 60=examples) profile: + workflow: + ordinal: 20 + prompt: | + # Workflow + For each potential memory item, follow this strict 4-step process: + + Step 1 — SOURCE CHECK: Identify WHO said it. + - Did the USER state this as a fact about themselves? → Proceed to Step 2. + - Did the ASSISTANT say it? → DISCARD immediately. + - Is it a QUESTION from either side? → DISCARD immediately. + - Is it a command (e.g., /new, /help)? → DISCARD immediately. + - Is it a greeting (你好, hi, hello, 你好啊)? → DISCARD immediately. + + Step 2 — FACT VERIFICATION: Is this a declarative fact about the user? + - "我叫小明" → YES, fact → Proceed to Step 3. + - "我叫什么名字?" → NO, this is a question → DISCARD. + - "我不知道" (said by user) → borderline, only if user explicitly states ignorance as a trait. + - "我不确定" (said by assistant) → NO, this is assistant's state → DISCARD. + + Step 3 — PERSISTENCE CHECK: Is this stable, lasting information? + - Preferences, name, age, job, hobbies → YES. + - Temporary mood, current action, situational comment → NO, DISCARD. + + Step 4 — OUTPUT: Only items that passed ALL three steps are extracted. + If no items pass all checks, output NOTHING. Empty output is valid and preferred over noise. + rules: ordinal: 30 prompt: | # Rules - ## General requirements (must satisfy all) - - Use "user" to refer to the user consistently. - - Each memory item must be complete and self-contained, written as a declarative descriptive sentence. - - Each memory item must express one single complete piece of information and be understandable without context. - - Similar/redundant items must be merged into one, and assigned to only one category. - - Each memory item must be < 30 words worth of length (keep it as concise as possible). - - A single memory item must NOT contain timestamps. - Important: Extract only facts directly stated or confirmed by the user. No guesses, no suggestions, and no content introduced only by the assistant. - Important: Accurately reflect whether the subject is the user or someone around the user. - Important: Do not record temporary/one-off situational information; focus on meaningful, persistent information. - - ## Special rules for User Information - - Any event-related item is forbidden in User Information. - - Do not extract content that was obtained only through the model's follow-up questions unless the user shows strong proactive intent. - - ## Anti-noise rules (CRITICAL — violate these = invalid extraction) - - NEVER extract a question or inquiry as a memory item. Questions do not convey facts. - BAD: "The user asked about their own name" — this is a question, not a fact. - BAD: "The user wants to know the weather" — this is a question, not a fact. - - NEVER extract the assistant's lack of knowledge or uncertainty as a user trait. - BAD: "The user does not know their own name" — if the ASSISTANT said "I don't know", this is NOT a user trait. - BAD: "The user is unaware of X" — unless the user themselves stated "I don't know X". - - NEVER extract meta-conversation actions as memory items. - BAD: "The user started a new conversation" — conversational commands are not user facts. - BAD: "The user said hello" — greetings are not meaningful user information. - BAD: "The user ended the conversation" — not a user trait. - - NEVER extract the existence of a conversation itself as a memory item. - BAD: "The user had a conversation about programming" — the topic of conversation is not a user fact unless a specific preference or belief was stated. - - NEVER extract summaries of what was discussed. Only extract explicit, declarative facts about the user. - - ## Forbidden content - - Knowledge Q&A without a clear user fact. - - Trivial updates that do not add meaningful value (e.g., "full → too full"). - - Turns where the user did not respond and only the assistant spoke. - - Illegal / harmful sensitive topics (violence, politics, drugs, etc.). - - Private financial accounts, IDs, addresses, military/defense/government job details, precise street addresses—unless explicitly requested by the user (still avoid if not necessary). - - Any content mentioned only by the assistant and not explicitly confirmed by the user. - - ## Review & validation rules - - Merge similar items: keep only one and assign a single category. - - Resolve conflicts: keep the latest / most certain item. - - Final check: every item must comply with all extraction rules, especially the Anti-noise rules above. + ## Absolute prohibitions (ANY violation = invalid extraction) + 1. NEVER extract anything said by the ASSISTANT as a user fact. + 2. NEVER extract questions as facts. A question contains no factual information. + 3. NEVER extract greetings, farewells, or pleasantries. + 4. NEVER extract slash commands or meta-conversation actions. + 5. NEVER extract "the user asked/wanted to know/inquired about" — asking is not a fact. + 6. NEVER extract "the user does not know X" unless the user EXPLICITLY stated "我不知道X". + If the ASSISTANT said "I don't know", that is NOT the user's trait. + + ## What to extract + - Only declarative facts the user stated about themselves: name, age, job, preferences, habits, relationships. + - Each item must be < 30 words, self-contained, and written as a declarative sentence. + - Use "The user" as the subject consistently. + - Merge similar items into one. examples: ordinal: 60 prompt: | - # Examples (Input / Output / Explanation) - Example 1: User Information Extraction - ## Input - user: Hi, are you busy? I just got off work and I'm going to the supermarket to buy some groceries. - assistant: Not busy. Are you cooking for yourself? - user: Yes. It's healthier. I work as a product manager in an internet company. I'm 30 this year. After work I like experimenting with cooking, I often figure out dishes by myself. - assistant: Being a PM is tough. You're so disciplined to cook at 30! - user: It's fine. Cooking relaxes me. It's better than takeout. Also I'm traveling next weekend. - assistant: You can check the weather ahead. Your sunscreen can finally be used. - user: I haven't started packing yet. It's annoying. - ## Output - - - The user works as a product manager at an internet company - - Basic Information - - - - The user is 30 years old - - Basic Information - - - - The user likes experimenting with cooking after work - - Basic Information - - - - ## Explanation - Only stable user facts explicitly stated by the user are extracted. - The travel plan and packing annoyance are events/temporary states, so they are not extracted as User Information. - - Example 2: ANTI-NOISE — What NOT to extract - ## Input - user: 我叫什么名字? - assistant: 很抱歉,我不确定您的名字。您能告诉我吗? - user: /new - assistant: 好的,我们开始新的对话吧!有什么可以帮您的? - user: 你好啊 - assistant: 你好!很高兴见到你,有什么需要帮忙的吗? - ## Output - (empty — no items to extract) - ## Explanation - - "我叫什么名字?" is a QUESTION, not a fact about the user. Do NOT extract. - - "我不确定您的名字" is the ASSISTANT's lack of knowledge, NOT a user trait. Do NOT extract "The user does not know their own name". - - "/new" is a meta-conversation command. Do NOT extract as a memory item. - - "你好啊" is a greeting. Do NOT extract as meaningful user information. + # Examples + Example 1 — GOOD extraction + Input: + user: 我叫小明,今年28岁,在一家互联网公司做产品经理。我喜欢编程和打篮球。 + assistant: 很高兴认识你,小明! + Output: + + The user's name is 小明Basic Information + The user is 28 years oldBasic Information + The user works as a product manager at an internet companyWork + The user likes programming and basketballPreferences + + + Example 2 — NOISE (output must be EMPTY) + Input: + user: 我叫什么名字? + assistant: 很抱歉,我不确定您的名字。您能告诉我吗? + user: /new + assistant: 好的,我们开始新的对话吧! + user: 你好啊 + assistant: 你好!有什么可以帮您的? + Output: + (empty) + Reason: "我叫什么名字?"=question. "我不确定"=assistant speech. "/new"=command. "你好啊"=greeting. ZERO facts about the user. + + Example 3 — NOISE (output must be EMPTY) + Input: + user: 你知道明天天气怎么样吗? + assistant: 我无法获取实时天气信息。 + user: 好的谢谢 + assistant: 不客气! + Output: + (empty) + Reason: All utterances are questions, assistant limitations, and pleasantries. ZERO user facts. event: + workflow: + ordinal: 20 + prompt: | + # Workflow + For each potential event, follow this strict 4-step process: + + Step 1 — SOURCE CHECK: Identify WHO said it and WHAT was said. + - Did the USER describe a concrete event that happened? → Proceed to Step 2. + - Did the ASSISTANT say it? → DISCARD immediately. + - Is it a QUESTION about something? → DISCARD immediately. + - Is it a command (/new, /help) or greeting (你好, hi, hello, 你好啊, hey)? → DISCARD immediately. + - Is it a response to a greeting (不客气, glad to meet you)? → DISCARD immediately. + - Is it a social pleasantry (谢谢, 好的, sure)? → DISCARD immediately. + + Step 2 — EVENT VERIFICATION: Is this a specific, significant happening? + - "I traveled to Japan last month" → YES, event → Proceed. + - "I asked about my schedule" → NO, asking is not an event → DISCARD. + - "I don't know my plans" (user said) → borderline, not an event → DISCARD. + + Step 3 — SIGNIFICANCE CHECK: Does this event matter long-term? + - Life milestones, travel, major decisions → YES. + - Routine daily activities → NO, DISCARD. + + Step 4 — OUTPUT: Only events that passed ALL three steps. + Empty output is valid and preferred over noise. + rules: ordinal: 30 prompt: | # Rules - ## General requirements (must satisfy all) - - Use "user" to refer to the user consistently. - - Each memory item must be complete and self-contained, written as a declarative descriptive sentence. - - Each memory item must express one single complete piece of information and be understandable without context. - - Similar/redundant items must be merged into one, and assigned to only one category. - - Each memory item must be < 50 words worth of length (keep it concise but include relevant details). - - Focus on specific events that happened at a particular time or period. - - Include relevant details such as time, location, and participants where available. - Important: Extract only events directly stated or confirmed by the user. No guesses, no suggestions, and no content introduced only by the assistant. - Important: Accurately reflect whether the subject is the user or someone around the user. - - ## Special rules for Event Information - - Behavioral patterns, habits, preferences, or factual knowledge are forbidden in Event Information. - - Focus on concrete happenings, activities, and experiences. - - Do not extract content that was obtained only through the model's follow-up questions unless the user shows strong proactive intent. - - ## Anti-noise rules (CRITICAL — violate these = invalid extraction) - - NEVER extract a question or inquiry as an event. Questions are not events. - BAD: "The user asked about their schedule" — asking is not an event that happened. - - NEVER extract the assistant's lack of knowledge as an event about the user. - BAD: "The user was unable to recall their plans" — if the ASSISTANT couldn't answer, this is NOT a user event. - - NEVER extract meta-conversation actions as events. - BAD: "The user started a new conversation" — not a life event. - BAD: "The user initiated a session reset" — not a meaningful event. - - NEVER extract greetings, farewells, or social pleasantries as events. - - NEVER extract the existence of a conversation as an event. - - ## Forbidden content - - Knowledge Q&A without a clear user event. - - Trivial daily activities unless significant (e.g., routine meals, commuting). - - Temporary, ephemeral situations that lack meaningful significance. - - Turns where the user did not respond and only the assistant spoke. - - Illegal / harmful sensitive topics (violence, politics, drugs, etc.). - - Private financial accounts, IDs, addresses, military/defense/government job details, precise street addresses—unless explicitly requested by the user (still avoid if not necessary). - - Any content mentioned only by the assistant and not explicitly confirmed by the user. - - ## Review & validation rules - - Merge similar items: keep only one and assign a single category. - - Resolve conflicts: keep the latest / most certain item. - - Final check: every item must comply with all extraction rules, especially the Anti-noise rules above. + ## Absolute prohibitions (ANY violation = invalid extraction) + 1. NEVER extract anything said by the ASSISTANT as a user event. + 2. NEVER extract questions as events. + 3. NEVER extract greetings (你好, hi, hello), farewells, commands, or social pleasantries as events. + 4. NEVER extract "the user asked about X" — asking is not an event. + 5. NEVER extract "the user does not know X" — ignorance is not an event. + 6. NEVER extract "the user greeted" or "the user said hello" — greetings are NOT events. + + ## What to extract + - Only concrete events with time, place, or participants that the user described. + - Focus on significant happenings, not routine activities. + - Each item < 50 words, self-contained, declarative sentence. examples: ordinal: 60 prompt: | - # Examples (Input / Output / Explanation) - Example 1: Event Information Extraction - ## Input - user: Hi, are you busy? I just got off work and I'm going to the supermarket to buy some groceries. - assistant: Not busy. Are you cooking for yourself? - user: Yes. It's healthier. I work as a product manager in an internet company. I'm 30 this year. After work I like experimenting with cooking, I often figure out dishes by myself. - assistant: Being a PM is tough. You're so disciplined to cook at 30! - user: It's fine. Cooking relaxes me. It's better than takeout. Also I'm traveling next weekend. - assistant: You can check the weather ahead. Your sunscreen can finally be used. - user: I haven't started packing yet. It's annoying. - ## Output - - - The user is planning a trip next weekend and hasn't started packing yet - - Travel - - - - ## Explanation - Only specific events explicitly stated by the user are extracted. - The travel plan is an event with a specific time reference (next weekend). - User's job, age, and cooking habits are stable user traits, so they are not extracted as Event Information. - - Example 2: ANTI-NOISE — What NOT to extract as events - ## Input - user: 我明天有什么安排? - assistant: 很抱歉,我没有您明天的日程安排信息。 - user: /new - assistant: 好的,新对话开始了。 - ## Output - (empty — no items to extract) - ## Explanation - - "我明天有什么安排?" is a QUESTION, not an event. Do NOT extract. - - "我没有您的日程安排信息" is the ASSISTANT's lack of knowledge, NOT a user event. Do NOT extract "The user has no plans tomorrow". - - "/new" is a meta-conversation command. Do NOT extract as an event. + # Examples + Example 1 — GOOD extraction + Input: + user: 上周我去了东京旅游,玩了5天,吃了好多寿司。 + assistant: 听起来很棒!东京好玩吗? + Output: + + The user traveled to Tokyo for 5 days last week and ate a lot of sushiTravel + + + Example 2 — NOISE (output must be EMPTY) + Input: + user: 我明天有什么安排? + assistant: 很抱歉,我没有您明天的日程安排信息。 + user: /new + Output: + (empty) + Reason: "我明天有什么安排?"=question. "我没有信息"=assistant speech. "/new"=command. ZERO events.