6 custom skills (assign-task, dispatch-webhook, daily-briefing, task-capture, qmd-brain, tts-voice) with technical documentation. Compatible with Claude Code, OpenClaw, Codex CLI, and OpenCode.
197 lines
6.1 KiB
TypeScript
197 lines
6.1 KiB
TypeScript
/**
|
||
* qmd-brain skill
|
||
* 第二大腦知識庫:BM25 全文搜尋 + PostgreSQL pgvector 語意搜尋
|
||
*
|
||
* 依賴:
|
||
* - qmd CLI (npm install -g @tobilu/qmd)
|
||
* - embed_to_pg.py (Python venv at /home/selig/apps/qmd-pg/)
|
||
*/
|
||
|
||
import { execSync, exec } from 'child_process';
|
||
import { promisify } from 'util';
|
||
|
||
const execAsync = promisify(exec);
|
||
|
||
const QMD_CMD = '/home/selig/.nvm/versions/node/v24.13.1/bin/qmd';
|
||
const EMBED_PY = '/home/selig/apps/qmd-pg/venv/bin/python3 /home/selig/apps/qmd-pg/embed_to_pg.py';
|
||
const MAX_SEARCH_LEN = 1500; // 回覆中搜尋結果最大字數
|
||
|
||
interface SearchResult {
|
||
source: string;
|
||
chunk: number;
|
||
text: string;
|
||
similarity?: number;
|
||
}
|
||
|
||
interface QmdResult {
|
||
path: string;
|
||
text?: string;
|
||
score?: number;
|
||
}
|
||
|
||
/** 執行 qmd BM25 全文搜尋 */
|
||
async function qmdSearch(query: string, topK = 5): Promise<string> {
|
||
try {
|
||
const { stdout } = await execAsync(
|
||
`${QMD_CMD} search ${JSON.stringify(query)} --output markdown --limit ${topK}`,
|
||
{ timeout: 15000, env: { ...process.env, HOME: '/home/selig' } }
|
||
);
|
||
return stdout.trim() || '(無結果)';
|
||
} catch (e: any) {
|
||
return `qmd search 錯誤: ${e.message?.split('\n')[0]}`;
|
||
}
|
||
}
|
||
|
||
/** 執行 PostgreSQL 向量語意搜尋 */
|
||
async function pgSearch(query: string, topK = 5): Promise<SearchResult[]> {
|
||
try {
|
||
const { stdout } = await execAsync(
|
||
`${EMBED_PY} search ${JSON.stringify(query)} --top-k ${topK} --json`,
|
||
{ timeout: 20000 }
|
||
);
|
||
return JSON.parse(stdout) as SearchResult[];
|
||
} catch (e: any) {
|
||
return [];
|
||
}
|
||
}
|
||
|
||
/** 格式化 pgvector 搜尋結果 */
|
||
function formatPgResults(results: SearchResult[]): string {
|
||
if (!results.length) return '(向量庫無相關結果)';
|
||
return results.map((r, i) => {
|
||
const fname = r.source.split('/').pop() || r.source;
|
||
const snippet = r.text.slice(0, 200).replace(/\n/g, ' ');
|
||
const score = r.similarity ? `${(r.similarity * 100).toFixed(1)}%` : '';
|
||
return `**[${i + 1}] ${fname}** ${score}\n> ${snippet}...`;
|
||
}).join('\n\n');
|
||
}
|
||
|
||
/** 觸發向量索引更新 */
|
||
async function triggerEmbed(): Promise<string> {
|
||
try {
|
||
// 背景執行,不等待完成
|
||
exec(
|
||
`${QMD_CMD} embed 2>&1 >> /tmp/qmd-embed.log & ${EMBED_PY} embed 2>&1 >> /tmp/qmd-embed.log &`,
|
||
{ env: { ...process.env, HOME: '/home/selig' } }
|
||
);
|
||
return '✅ 索引更新已在背景啟動,約需 1-5 分鐘完成。';
|
||
} catch (e: any) {
|
||
return `❌ 索引啟動失敗: ${e.message}`;
|
||
}
|
||
}
|
||
|
||
/** 取得向量庫統計 */
|
||
async function getStats(): Promise<string> {
|
||
const results: string[] = [];
|
||
|
||
// qmd collection list
|
||
try {
|
||
const { stdout } = await execAsync(
|
||
`${QMD_CMD} collection list`,
|
||
{ timeout: 5000, env: { ...process.env, HOME: '/home/selig' } }
|
||
);
|
||
results.push(`**qmd Collections:**\n\`\`\`\n${stdout.trim()}\n\`\`\``);
|
||
} catch (e: any) {
|
||
results.push(`qmd: ${e.message?.split('\n')[0]}`);
|
||
}
|
||
|
||
// pgvector stats
|
||
try {
|
||
const { stdout } = await execAsync(
|
||
`${EMBED_PY} stats`,
|
||
{ timeout: 10000 }
|
||
);
|
||
results.push(`**PostgreSQL pgvector:**\n\`\`\`\n${stdout.trim()}\n\`\`\``);
|
||
} catch (e: any) {
|
||
results.push(`pgvector: ${e.message?.split('\n')[0]}`);
|
||
}
|
||
|
||
return results.join('\n\n');
|
||
}
|
||
|
||
/** 判斷使用者意圖 */
|
||
function detectIntent(message: string): 'search' | 'embed' | 'stats' | 'vsearch' {
|
||
const lower = message.toLowerCase();
|
||
|
||
if (lower.match(/更新|重新索引|rebuild|index|embed|掃描/)) return 'embed';
|
||
if (lower.match(/統計|stat|幾個|多少|collection/)) return 'stats';
|
||
if (lower.match(/語意|向量|vsearch|概念|類似/)) return 'vsearch';
|
||
return 'search';
|
||
}
|
||
|
||
/** 從訊息提取搜尋關鍵字 */
|
||
function extractQuery(message: string): string {
|
||
return message
|
||
.replace(/^(搜尋|查找|找資料|幫我找|查一下|搜一下|找到|有沒有|之前說過|我之前|recall|brain search|qmd)[:::]?\s*/i, '')
|
||
.replace(/(請|幫我|的資料|的內容|相關|嗎|?|\?)/g, '')
|
||
.trim() || message.trim();
|
||
}
|
||
|
||
// ─── 主 handler ─────────────────────────────────────────────────────────────
|
||
|
||
export async function handler(ctx: any) {
|
||
const message = ctx.message?.text || ctx.message?.content || '';
|
||
const intent = detectIntent(message);
|
||
const query = extractQuery(message);
|
||
|
||
// 更新索引
|
||
if (intent === 'embed') {
|
||
const result = await triggerEmbed();
|
||
return { reply: `🧠 **第二大腦索引更新**\n\n${result}` };
|
||
}
|
||
|
||
// 統計
|
||
if (intent === 'stats') {
|
||
const stats = await getStats();
|
||
return { reply: `📊 **知識庫統計**\n\n${stats}` };
|
||
}
|
||
|
||
// 語意搜尋(只用 pgvector)
|
||
if (intent === 'vsearch') {
|
||
if (!query) {
|
||
return { reply: '請提供搜尋關鍵字,例如:「語意搜尋 Telegram 機器人設定」' };
|
||
}
|
||
const results = await pgSearch(query, 5);
|
||
const formatted = formatPgResults(results);
|
||
return {
|
||
reply: `🔍 **語意搜尋**:${query}\n\n${formatted}`,
|
||
metadata: { query, results_count: results.length, engine: 'pgvector' },
|
||
};
|
||
}
|
||
|
||
// 混合搜尋(BM25 + 向量)
|
||
if (!query) {
|
||
return { reply: '請提供搜尋關鍵字,例如:「搜尋 nginx 設定」' };
|
||
}
|
||
|
||
// 並行執行兩種搜尋
|
||
const [bm25Result, vectorResults] = await Promise.all([
|
||
qmdSearch(query, 3),
|
||
pgSearch(query, 3),
|
||
]);
|
||
|
||
const vectorFormatted = formatPgResults(vectorResults);
|
||
const hasVector = vectorResults.length > 0 && !vectorFormatted.includes('無相關結果');
|
||
|
||
let reply = `🧠 **第二大腦搜尋**:${query}\n\n`;
|
||
reply += `### 全文搜尋 (BM25)\n${bm25Result}`;
|
||
|
||
if (hasVector) {
|
||
reply += `\n\n### 語意搜尋 (向量)\n${vectorFormatted}`;
|
||
}
|
||
|
||
// 截斷過長回覆
|
||
if (reply.length > MAX_SEARCH_LEN) {
|
||
reply = reply.slice(0, MAX_SEARCH_LEN) + '\n\n...(結果已截斷,輸入「語意搜尋 [關鍵字]」可專注向量搜尋)';
|
||
}
|
||
|
||
return {
|
||
reply,
|
||
metadata: {
|
||
query,
|
||
bm25_chars: bm25Result.length,
|
||
vector_results: vectorResults.length,
|
||
},
|
||
};
|
||
}
|