forked from Selig/openclaw-skill
Initial commit: OpenClaw Skill Collection
6 custom skills (assign-task, dispatch-webhook, daily-briefing, task-capture, qmd-brain, tts-voice) with technical documentation. Compatible with Claude Code, OpenClaw, Codex CLI, and OpenCode.
This commit is contained in:
78
skills/qmd-brain/SKILL.md
Normal file
78
skills/qmd-brain/SKILL.md
Normal file
@@ -0,0 +1,78 @@
|
||||
---
|
||||
name: qmd-brain
|
||||
description: 第二大腦知識庫搜尋與索引。用 qmd(BM25+向量混合)搜尋本地 markdown 文件,並透過 PostgreSQL pgvector 進行深度語意搜尋。
|
||||
triggers:
|
||||
- "搜尋"
|
||||
- "查找"
|
||||
- "找資料"
|
||||
- "recall"
|
||||
- "記憶"
|
||||
- "之前說過"
|
||||
- "知識庫"
|
||||
- "找到"
|
||||
- "我之前"
|
||||
- "幫我找"
|
||||
- "查一下"
|
||||
- "有沒有"
|
||||
- "搜一下"
|
||||
- "brain search"
|
||||
- "qmd"
|
||||
tools:
|
||||
- exec
|
||||
- memory
|
||||
---
|
||||
|
||||
# qmd-brain Skill
|
||||
|
||||
## 功能說明
|
||||
|
||||
第二大腦(Second Brain)知識庫搜尋工具,整合兩層搜尋:
|
||||
|
||||
| 層次 | 工具 | 特色 |
|
||||
|------|------|------|
|
||||
| Layer 1 | qmd (BM25 全文搜尋) | 快速關鍵字比對,本地 SQLite |
|
||||
| Layer 2 | embed_to_pg (語意搜尋) | 向量相似度,PostgreSQL pgvector |
|
||||
|
||||
## 觸發範例
|
||||
|
||||
```
|
||||
使用者:「幫我找關於 nginx 設定的資料」
|
||||
→ qmd search "nginx 設定" → 返回相關文件段落
|
||||
|
||||
使用者:「之前有記過 Telegram bot 的設定嗎?」
|
||||
→ embed_to_pg search "Telegram bot token 設定" → 語意搜尋
|
||||
|
||||
使用者:「更新知識庫索引」
|
||||
→ qmd embed + embed_to_pg embed
|
||||
|
||||
使用者:「查知識庫統計」
|
||||
→ qmd collection list + embed_to_pg stats
|
||||
```
|
||||
|
||||
## 搜尋策略
|
||||
|
||||
1. **關鍵字搜尋**(qmd search):適合確定的詞彙、指令、設定名稱
|
||||
2. **語意搜尋**(embed_to_pg search):適合概念性問題、模糊記憶
|
||||
3. **混合搜尋**:先用 qmd 快速篩選,再用 pgvector 重排
|
||||
|
||||
## 輸出格式
|
||||
|
||||
搜尋結果包含:
|
||||
- 文件來源(檔案路徑)
|
||||
- 相關段落(前 200 字)
|
||||
- 相似度分數
|
||||
|
||||
## 重要路徑
|
||||
|
||||
```
|
||||
qmd 索引: ~/.cache/qmd/index.sqlite
|
||||
pgvector DB: postgresql://qmd_user@localhost/qmd_brain
|
||||
embed 腳本: /home/selig/apps/qmd-pg/embed_to_pg.py
|
||||
qmd collections:selig-home (/home/selig)
|
||||
```
|
||||
|
||||
## 每日排程
|
||||
|
||||
凌晨 02:00 自動執行:
|
||||
1. `qmd embed`(更新 BM25 + 本地向量索引)
|
||||
2. `embed_to_pg embed`(更新 PostgreSQL 向量庫)
|
||||
196
skills/qmd-brain/handler.ts
Normal file
196
skills/qmd-brain/handler.ts
Normal file
@@ -0,0 +1,196 @@
|
||||
/**
|
||||
* qmd-brain skill
|
||||
* 第二大腦知識庫:BM25 全文搜尋 + PostgreSQL pgvector 語意搜尋
|
||||
*
|
||||
* 依賴:
|
||||
* - qmd CLI (npm install -g @tobilu/qmd)
|
||||
* - embed_to_pg.py (Python venv at /home/selig/apps/qmd-pg/)
|
||||
*/
|
||||
|
||||
import { execSync, exec } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
|
||||
const QMD_CMD = '/home/selig/.nvm/versions/node/v24.13.1/bin/qmd';
|
||||
const EMBED_PY = '/home/selig/apps/qmd-pg/venv/bin/python3 /home/selig/apps/qmd-pg/embed_to_pg.py';
|
||||
const MAX_SEARCH_LEN = 1500; // 回覆中搜尋結果最大字數
|
||||
|
||||
interface SearchResult {
|
||||
source: string;
|
||||
chunk: number;
|
||||
text: string;
|
||||
similarity?: number;
|
||||
}
|
||||
|
||||
interface QmdResult {
|
||||
path: string;
|
||||
text?: string;
|
||||
score?: number;
|
||||
}
|
||||
|
||||
/** 執行 qmd BM25 全文搜尋 */
|
||||
async function qmdSearch(query: string, topK = 5): Promise<string> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`${QMD_CMD} search ${JSON.stringify(query)} --output markdown --limit ${topK}`,
|
||||
{ timeout: 15000, env: { ...process.env, HOME: '/home/selig' } }
|
||||
);
|
||||
return stdout.trim() || '(無結果)';
|
||||
} catch (e: any) {
|
||||
return `qmd search 錯誤: ${e.message?.split('\n')[0]}`;
|
||||
}
|
||||
}
|
||||
|
||||
/** 執行 PostgreSQL 向量語意搜尋 */
|
||||
async function pgSearch(query: string, topK = 5): Promise<SearchResult[]> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`${EMBED_PY} search ${JSON.stringify(query)} --top-k ${topK} --json`,
|
||||
{ timeout: 20000 }
|
||||
);
|
||||
return JSON.parse(stdout) as SearchResult[];
|
||||
} catch (e: any) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/** 格式化 pgvector 搜尋結果 */
|
||||
function formatPgResults(results: SearchResult[]): string {
|
||||
if (!results.length) return '(向量庫無相關結果)';
|
||||
return results.map((r, i) => {
|
||||
const fname = r.source.split('/').pop() || r.source;
|
||||
const snippet = r.text.slice(0, 200).replace(/\n/g, ' ');
|
||||
const score = r.similarity ? `${(r.similarity * 100).toFixed(1)}%` : '';
|
||||
return `**[${i + 1}] ${fname}** ${score}\n> ${snippet}...`;
|
||||
}).join('\n\n');
|
||||
}
|
||||
|
||||
/** 觸發向量索引更新 */
|
||||
async function triggerEmbed(): Promise<string> {
|
||||
try {
|
||||
// 背景執行,不等待完成
|
||||
exec(
|
||||
`${QMD_CMD} embed 2>&1 >> /tmp/qmd-embed.log & ${EMBED_PY} embed 2>&1 >> /tmp/qmd-embed.log &`,
|
||||
{ env: { ...process.env, HOME: '/home/selig' } }
|
||||
);
|
||||
return '✅ 索引更新已在背景啟動,約需 1-5 分鐘完成。';
|
||||
} catch (e: any) {
|
||||
return `❌ 索引啟動失敗: ${e.message}`;
|
||||
}
|
||||
}
|
||||
|
||||
/** 取得向量庫統計 */
|
||||
async function getStats(): Promise<string> {
|
||||
const results: string[] = [];
|
||||
|
||||
// qmd collection list
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`${QMD_CMD} collection list`,
|
||||
{ timeout: 5000, env: { ...process.env, HOME: '/home/selig' } }
|
||||
);
|
||||
results.push(`**qmd Collections:**\n\`\`\`\n${stdout.trim()}\n\`\`\``);
|
||||
} catch (e: any) {
|
||||
results.push(`qmd: ${e.message?.split('\n')[0]}`);
|
||||
}
|
||||
|
||||
// pgvector stats
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`${EMBED_PY} stats`,
|
||||
{ timeout: 10000 }
|
||||
);
|
||||
results.push(`**PostgreSQL pgvector:**\n\`\`\`\n${stdout.trim()}\n\`\`\``);
|
||||
} catch (e: any) {
|
||||
results.push(`pgvector: ${e.message?.split('\n')[0]}`);
|
||||
}
|
||||
|
||||
return results.join('\n\n');
|
||||
}
|
||||
|
||||
/** 判斷使用者意圖 */
|
||||
function detectIntent(message: string): 'search' | 'embed' | 'stats' | 'vsearch' {
|
||||
const lower = message.toLowerCase();
|
||||
|
||||
if (lower.match(/更新|重新索引|rebuild|index|embed|掃描/)) return 'embed';
|
||||
if (lower.match(/統計|stat|幾個|多少|collection/)) return 'stats';
|
||||
if (lower.match(/語意|向量|vsearch|概念|類似/)) return 'vsearch';
|
||||
return 'search';
|
||||
}
|
||||
|
||||
/** 從訊息提取搜尋關鍵字 */
|
||||
function extractQuery(message: string): string {
|
||||
return message
|
||||
.replace(/^(搜尋|查找|找資料|幫我找|查一下|搜一下|找到|有沒有|之前說過|我之前|recall|brain search|qmd)[:::]?\s*/i, '')
|
||||
.replace(/(請|幫我|的資料|的內容|相關|嗎|?|\?)/g, '')
|
||||
.trim() || message.trim();
|
||||
}
|
||||
|
||||
// ─── 主 handler ─────────────────────────────────────────────────────────────
|
||||
|
||||
export async function handler(ctx: any) {
|
||||
const message = ctx.message?.text || ctx.message?.content || '';
|
||||
const intent = detectIntent(message);
|
||||
const query = extractQuery(message);
|
||||
|
||||
// 更新索引
|
||||
if (intent === 'embed') {
|
||||
const result = await triggerEmbed();
|
||||
return { reply: `🧠 **第二大腦索引更新**\n\n${result}` };
|
||||
}
|
||||
|
||||
// 統計
|
||||
if (intent === 'stats') {
|
||||
const stats = await getStats();
|
||||
return { reply: `📊 **知識庫統計**\n\n${stats}` };
|
||||
}
|
||||
|
||||
// 語意搜尋(只用 pgvector)
|
||||
if (intent === 'vsearch') {
|
||||
if (!query) {
|
||||
return { reply: '請提供搜尋關鍵字,例如:「語意搜尋 Telegram 機器人設定」' };
|
||||
}
|
||||
const results = await pgSearch(query, 5);
|
||||
const formatted = formatPgResults(results);
|
||||
return {
|
||||
reply: `🔍 **語意搜尋**:${query}\n\n${formatted}`,
|
||||
metadata: { query, results_count: results.length, engine: 'pgvector' },
|
||||
};
|
||||
}
|
||||
|
||||
// 混合搜尋(BM25 + 向量)
|
||||
if (!query) {
|
||||
return { reply: '請提供搜尋關鍵字,例如:「搜尋 nginx 設定」' };
|
||||
}
|
||||
|
||||
// 並行執行兩種搜尋
|
||||
const [bm25Result, vectorResults] = await Promise.all([
|
||||
qmdSearch(query, 3),
|
||||
pgSearch(query, 3),
|
||||
]);
|
||||
|
||||
const vectorFormatted = formatPgResults(vectorResults);
|
||||
const hasVector = vectorResults.length > 0 && !vectorFormatted.includes('無相關結果');
|
||||
|
||||
let reply = `🧠 **第二大腦搜尋**:${query}\n\n`;
|
||||
reply += `### 全文搜尋 (BM25)\n${bm25Result}`;
|
||||
|
||||
if (hasVector) {
|
||||
reply += `\n\n### 語意搜尋 (向量)\n${vectorFormatted}`;
|
||||
}
|
||||
|
||||
// 截斷過長回覆
|
||||
if (reply.length > MAX_SEARCH_LEN) {
|
||||
reply = reply.slice(0, MAX_SEARCH_LEN) + '\n\n...(結果已截斷,輸入「語意搜尋 [關鍵字]」可專注向量搜尋)';
|
||||
}
|
||||
|
||||
return {
|
||||
reply,
|
||||
metadata: {
|
||||
query,
|
||||
bm25_chars: bm25Result.length,
|
||||
vector_results: vectorResults.length,
|
||||
},
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user