/** * qmd-brain skill * 第二大腦知識庫:BM25 全文搜尋 + PostgreSQL pgvector 語意搜尋 * * 依賴: * - qmd CLI (npm install -g @tobilu/qmd) * - embed_to_pg.py (Python venv at /home/selig/apps/qmd-pg/) */ import { execSync, exec } from 'child_process'; import { promisify } from 'util'; const execAsync = promisify(exec); const QMD_CMD = '/home/selig/.nvm/versions/node/v24.13.1/bin/qmd'; const EMBED_PY = '/home/selig/apps/qmd-pg/venv/bin/python3 /home/selig/apps/qmd-pg/embed_to_pg.py'; const MAX_SEARCH_LEN = 1500; // 回覆中搜尋結果最大字數 interface SearchResult { source: string; chunk: number; text: string; similarity?: number; } interface QmdResult { path: string; text?: string; score?: number; } /** 執行 qmd BM25 全文搜尋 */ async function qmdSearch(query: string, topK = 5): Promise { try { const { stdout } = await execAsync( `${QMD_CMD} search ${JSON.stringify(query)} --output markdown --limit ${topK}`, { timeout: 15000, env: { ...process.env, HOME: '/home/selig' } } ); return stdout.trim() || '(無結果)'; } catch (e: any) { return `qmd search 錯誤: ${e.message?.split('\n')[0]}`; } } /** 執行 PostgreSQL 向量語意搜尋 */ async function pgSearch(query: string, topK = 5): Promise { try { const { stdout } = await execAsync( `${EMBED_PY} search ${JSON.stringify(query)} --top-k ${topK} --json`, { timeout: 20000 } ); return JSON.parse(stdout) as SearchResult[]; } catch (e: any) { return []; } } /** 格式化 pgvector 搜尋結果 */ function formatPgResults(results: SearchResult[]): string { if (!results.length) return '(向量庫無相關結果)'; return results.map((r, i) => { const fname = r.source.split('/').pop() || r.source; const snippet = r.text.slice(0, 200).replace(/\n/g, ' '); const score = r.similarity ? `${(r.similarity * 100).toFixed(1)}%` : ''; return `**[${i + 1}] ${fname}** ${score}\n> ${snippet}...`; }).join('\n\n'); } /** 觸發向量索引更新 */ async function triggerEmbed(): Promise { try { // 背景執行,不等待完成 exec( `${QMD_CMD} embed 2>&1 >> /tmp/qmd-embed.log & ${EMBED_PY} embed 2>&1 >> /tmp/qmd-embed.log &`, { env: { ...process.env, HOME: '/home/selig' } } ); return '✅ 索引更新已在背景啟動,約需 1-5 分鐘完成。'; } catch (e: any) { return `❌ 索引啟動失敗: ${e.message}`; } } /** 取得向量庫統計 */ async function getStats(): Promise { const results: string[] = []; // qmd collection list try { const { stdout } = await execAsync( `${QMD_CMD} collection list`, { timeout: 5000, env: { ...process.env, HOME: '/home/selig' } } ); results.push(`**qmd Collections:**\n\`\`\`\n${stdout.trim()}\n\`\`\``); } catch (e: any) { results.push(`qmd: ${e.message?.split('\n')[0]}`); } // pgvector stats try { const { stdout } = await execAsync( `${EMBED_PY} stats`, { timeout: 10000 } ); results.push(`**PostgreSQL pgvector:**\n\`\`\`\n${stdout.trim()}\n\`\`\``); } catch (e: any) { results.push(`pgvector: ${e.message?.split('\n')[0]}`); } return results.join('\n\n'); } /** 判斷使用者意圖 */ function detectIntent(message: string): 'search' | 'embed' | 'stats' | 'vsearch' { const lower = message.toLowerCase(); if (lower.match(/更新|重新索引|rebuild|index|embed|掃描/)) return 'embed'; if (lower.match(/統計|stat|幾個|多少|collection/)) return 'stats'; if (lower.match(/語意|向量|vsearch|概念|類似/)) return 'vsearch'; return 'search'; } /** 從訊息提取搜尋關鍵字 */ function extractQuery(message: string): string { return message .replace(/^(搜尋|查找|找資料|幫我找|查一下|搜一下|找到|有沒有|之前說過|我之前|recall|brain search|qmd)[:::]?\s*/i, '') .replace(/(請|幫我|的資料|的內容|相關|嗎|?|\?)/g, '') .trim() || message.trim(); } // ─── 主 handler ───────────────────────────────────────────────────────────── export async function handler(ctx: any) { const message = ctx.message?.text || ctx.message?.content || ''; const intent = detectIntent(message); const query = extractQuery(message); // 更新索引 if (intent === 'embed') { const result = await triggerEmbed(); return { reply: `🧠 **第二大腦索引更新**\n\n${result}` }; } // 統計 if (intent === 'stats') { const stats = await getStats(); return { reply: `📊 **知識庫統計**\n\n${stats}` }; } // 語意搜尋(只用 pgvector) if (intent === 'vsearch') { if (!query) { return { reply: '請提供搜尋關鍵字,例如:「語意搜尋 Telegram 機器人設定」' }; } const results = await pgSearch(query, 5); const formatted = formatPgResults(results); return { reply: `🔍 **語意搜尋**:${query}\n\n${formatted}`, metadata: { query, results_count: results.length, engine: 'pgvector' }, }; } // 混合搜尋(BM25 + 向量) if (!query) { return { reply: '請提供搜尋關鍵字,例如:「搜尋 nginx 設定」' }; } // 並行執行兩種搜尋 const [bm25Result, vectorResults] = await Promise.all([ qmdSearch(query, 3), pgSearch(query, 3), ]); const vectorFormatted = formatPgResults(vectorResults); const hasVector = vectorResults.length > 0 && !vectorFormatted.includes('無相關結果'); let reply = `🧠 **第二大腦搜尋**:${query}\n\n`; reply += `### 全文搜尋 (BM25)\n${bm25Result}`; if (hasVector) { reply += `\n\n### 語意搜尋 (向量)\n${vectorFormatted}`; } // 截斷過長回覆 if (reply.length > MAX_SEARCH_LEN) { reply = reply.slice(0, MAX_SEARCH_LEN) + '\n\n...(結果已截斷,輸入「語意搜尋 [關鍵字]」可專注向量搜尋)'; } return { reply, metadata: { query, bm25_chars: bm25Result.length, vector_results: vectorResults.length, }, }; }