Files
openclaw-skill/skills/qmd-brain/handler.ts
Selig 4c966a3ad2 Initial commit: OpenClaw Skill Collection
6 custom skills (assign-task, dispatch-webhook, daily-briefing,
task-capture, qmd-brain, tts-voice) with technical documentation.
Compatible with Claude Code, OpenClaw, Codex CLI, and OpenCode.
2026-03-13 10:58:30 +08:00

197 lines
6.1 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* qmd-brain skill
* 第二大腦知識庫BM25 全文搜尋 + PostgreSQL pgvector 語意搜尋
*
* 依賴:
* - qmd CLI (npm install -g @tobilu/qmd)
* - embed_to_pg.py (Python venv at /home/selig/apps/qmd-pg/)
*/
import { execSync, exec } from 'child_process';
import { promisify } from 'util';
const execAsync = promisify(exec);
const QMD_CMD = '/home/selig/.nvm/versions/node/v24.13.1/bin/qmd';
const EMBED_PY = '/home/selig/apps/qmd-pg/venv/bin/python3 /home/selig/apps/qmd-pg/embed_to_pg.py';
const MAX_SEARCH_LEN = 1500; // 回覆中搜尋結果最大字數
interface SearchResult {
source: string;
chunk: number;
text: string;
similarity?: number;
}
interface QmdResult {
path: string;
text?: string;
score?: number;
}
/** 執行 qmd BM25 全文搜尋 */
async function qmdSearch(query: string, topK = 5): Promise<string> {
try {
const { stdout } = await execAsync(
`${QMD_CMD} search ${JSON.stringify(query)} --output markdown --limit ${topK}`,
{ timeout: 15000, env: { ...process.env, HOME: '/home/selig' } }
);
return stdout.trim() || '(無結果)';
} catch (e: any) {
return `qmd search 錯誤: ${e.message?.split('\n')[0]}`;
}
}
/** 執行 PostgreSQL 向量語意搜尋 */
async function pgSearch(query: string, topK = 5): Promise<SearchResult[]> {
try {
const { stdout } = await execAsync(
`${EMBED_PY} search ${JSON.stringify(query)} --top-k ${topK} --json`,
{ timeout: 20000 }
);
return JSON.parse(stdout) as SearchResult[];
} catch (e: any) {
return [];
}
}
/** 格式化 pgvector 搜尋結果 */
function formatPgResults(results: SearchResult[]): string {
if (!results.length) return '(向量庫無相關結果)';
return results.map((r, i) => {
const fname = r.source.split('/').pop() || r.source;
const snippet = r.text.slice(0, 200).replace(/\n/g, ' ');
const score = r.similarity ? `${(r.similarity * 100).toFixed(1)}%` : '';
return `**[${i + 1}] ${fname}** ${score}\n> ${snippet}...`;
}).join('\n\n');
}
/** 觸發向量索引更新 */
async function triggerEmbed(): Promise<string> {
try {
// 背景執行,不等待完成
exec(
`${QMD_CMD} embed 2>&1 >> /tmp/qmd-embed.log & ${EMBED_PY} embed 2>&1 >> /tmp/qmd-embed.log &`,
{ env: { ...process.env, HOME: '/home/selig' } }
);
return '✅ 索引更新已在背景啟動,約需 1-5 分鐘完成。';
} catch (e: any) {
return `❌ 索引啟動失敗: ${e.message}`;
}
}
/** 取得向量庫統計 */
async function getStats(): Promise<string> {
const results: string[] = [];
// qmd collection list
try {
const { stdout } = await execAsync(
`${QMD_CMD} collection list`,
{ timeout: 5000, env: { ...process.env, HOME: '/home/selig' } }
);
results.push(`**qmd Collections:**\n\`\`\`\n${stdout.trim()}\n\`\`\``);
} catch (e: any) {
results.push(`qmd: ${e.message?.split('\n')[0]}`);
}
// pgvector stats
try {
const { stdout } = await execAsync(
`${EMBED_PY} stats`,
{ timeout: 10000 }
);
results.push(`**PostgreSQL pgvector:**\n\`\`\`\n${stdout.trim()}\n\`\`\``);
} catch (e: any) {
results.push(`pgvector: ${e.message?.split('\n')[0]}`);
}
return results.join('\n\n');
}
/** 判斷使用者意圖 */
function detectIntent(message: string): 'search' | 'embed' | 'stats' | 'vsearch' {
const lower = message.toLowerCase();
if (lower.match(/更新|重新索引|rebuild|index|embed|掃描/)) return 'embed';
if (lower.match(/統計|stat|幾個|多少|collection/)) return 'stats';
if (lower.match(/語意|向量|vsearch|概念|類似/)) return 'vsearch';
return 'search';
}
/** 從訊息提取搜尋關鍵字 */
function extractQuery(message: string): string {
return message
.replace(/^(搜尋|查找|找資料|幫我找|查一下|搜一下|找到|有沒有|之前說過|我之前|recall|brain search|qmd)[:]?\s*/i, '')
.replace(/(請|幫我|的資料|的內容|相關|嗎||\?)/g, '')
.trim() || message.trim();
}
// ─── 主 handler ─────────────────────────────────────────────────────────────
export async function handler(ctx: any) {
const message = ctx.message?.text || ctx.message?.content || '';
const intent = detectIntent(message);
const query = extractQuery(message);
// 更新索引
if (intent === 'embed') {
const result = await triggerEmbed();
return { reply: `🧠 **第二大腦索引更新**\n\n${result}` };
}
// 統計
if (intent === 'stats') {
const stats = await getStats();
return { reply: `📊 **知識庫統計**\n\n${stats}` };
}
// 語意搜尋(只用 pgvector
if (intent === 'vsearch') {
if (!query) {
return { reply: '請提供搜尋關鍵字,例如:「語意搜尋 Telegram 機器人設定」' };
}
const results = await pgSearch(query, 5);
const formatted = formatPgResults(results);
return {
reply: `🔍 **語意搜尋**${query}\n\n${formatted}`,
metadata: { query, results_count: results.length, engine: 'pgvector' },
};
}
// 混合搜尋BM25 + 向量)
if (!query) {
return { reply: '請提供搜尋關鍵字,例如:「搜尋 nginx 設定」' };
}
// 並行執行兩種搜尋
const [bm25Result, vectorResults] = await Promise.all([
qmdSearch(query, 3),
pgSearch(query, 3),
]);
const vectorFormatted = formatPgResults(vectorResults);
const hasVector = vectorResults.length > 0 && !vectorFormatted.includes('無相關結果');
let reply = `🧠 **第二大腦搜尋**${query}\n\n`;
reply += `### 全文搜尋 (BM25)\n${bm25Result}`;
if (hasVector) {
reply += `\n\n### 語意搜尋 (向量)\n${vectorFormatted}`;
}
// 截斷過長回覆
if (reply.length > MAX_SEARCH_LEN) {
reply = reply.slice(0, MAX_SEARCH_LEN) + '\n\n...(結果已截斷,輸入「語意搜尋 [關鍵字]」可專注向量搜尋)';
}
return {
reply,
metadata: {
query,
bm25_chars: bm25Result.length,
vector_results: vectorResults.length,
},
};
}