airefactor #19
@ -60,7 +60,7 @@ FORENSIC_AUDIT_MAX_ENTRIES=50
|
|||||||
|
|
||||||
# === AI SEMANTIC SEARCH ===
|
# === AI SEMANTIC SEARCH ===
|
||||||
# Enable semantic search (highly recommended for better results)
|
# Enable semantic search (highly recommended for better results)
|
||||||
AI_EMBEDDINGS_ENABLED=true
|
REMOVE_AI_EMBEDDINGS_ENABLED=true
|
||||||
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
|
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
|
||||||
AI_EMBEDDINGS_API_KEY=your-embeddings-api-key-here
|
AI_EMBEDDINGS_API_KEY=your-embeddings-api-key-here
|
||||||
AI_EMBEDDINGS_MODEL=mistral-embed
|
AI_EMBEDDINGS_MODEL=mistral-embed
|
||||||
@ -122,8 +122,8 @@ AI_EMBEDDINGS_BATCH_SIZE=10
|
|||||||
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
|
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
|
||||||
|
|
||||||
# === Context Management ===
|
# === Context Management ===
|
||||||
AI_MAX_CONTEXT_TOKENS=4000
|
REMOVE_AI_MAX_CONTEXT_TOKENS=4000
|
||||||
AI_MAX_PROMPT_TOKENS=2500
|
REMOVE_AI_MAX_PROMPT_TOKENS=2500
|
||||||
|
|
||||||
# === Confidence Scoring ===
|
# === Confidence Scoring ===
|
||||||
CONFIDENCE_SEMANTIC_WEIGHT=0.5
|
CONFIDENCE_SEMANTIC_WEIGHT=0.5
|
||||||
|
333
find-duplicates.mjs
Normal file
333
find-duplicates.mjs
Normal file
@ -0,0 +1,333 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
// find-duplicate-functions.mjs
|
||||||
|
// Usage:
|
||||||
|
// node find-duplicate-functions.mjs [rootDir] [--mode exact|struct] [--min-lines N] [--json]
|
||||||
|
// Example:
|
||||||
|
// node find-duplicate-functions.mjs . --mode struct --min-lines 3
|
||||||
|
|
||||||
|
import fs from "fs";
|
||||||
|
import path from "path";
|
||||||
|
import * as url from "url";
|
||||||
|
import ts from "typescript";
|
||||||
|
|
||||||
|
const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
|
||||||
|
|
||||||
|
/** -------- CLI OPTIONS -------- */
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
let rootDir = ".";
|
||||||
|
let mode = "struct"; // "exact" | "struct"
|
||||||
|
let minLines = 3;
|
||||||
|
let outputJson = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < args.length; i++) {
|
||||||
|
const a = args[i];
|
||||||
|
if (!a.startsWith("--") && rootDir === ".") {
|
||||||
|
rootDir = a;
|
||||||
|
} else if (a === "--mode") {
|
||||||
|
mode = (args[++i] || "struct").toLowerCase();
|
||||||
|
if (!["exact", "struct"].includes(mode)) {
|
||||||
|
console.error("Invalid --mode. Use 'exact' or 'struct'.");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
} else if (a === "--min-lines") {
|
||||||
|
minLines = parseInt(args[++i] || "3", 10);
|
||||||
|
} else if (a === "--json") {
|
||||||
|
outputJson = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** -------- FILE DISCOVERY -------- */
|
||||||
|
const DEFAULT_IGNORES = new Set([
|
||||||
|
"node_modules",
|
||||||
|
".git",
|
||||||
|
".next",
|
||||||
|
".vercel",
|
||||||
|
"dist",
|
||||||
|
"build",
|
||||||
|
".astro", // Astro's generated cache dir
|
||||||
|
]);
|
||||||
|
|
||||||
|
const VALID_EXTS = new Set([".ts", ".tsx", ".astro", ".mts", ".cts"]);
|
||||||
|
|
||||||
|
function walk(dir) {
|
||||||
|
/** @type {string[]} */
|
||||||
|
const out = [];
|
||||||
|
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
||||||
|
for (const e of entries) {
|
||||||
|
const p = path.join(dir, e.name);
|
||||||
|
if (e.isDirectory()) {
|
||||||
|
if (DEFAULT_IGNORES.has(e.name)) continue;
|
||||||
|
out.push(...walk(p));
|
||||||
|
} else if (e.isFile() && VALID_EXTS.has(path.extname(e.name))) {
|
||||||
|
out.push(p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** -------- ASTRO CODE EXTRACTION --------
|
||||||
|
* Extract TS/JS code from:
|
||||||
|
* - frontmatter: --- ... ---
|
||||||
|
* - <script ...> ... </script>
|
||||||
|
*/
|
||||||
|
function extractCodeFromAstro(source) {
|
||||||
|
/** @type {{code:string, offset:number}[]} */
|
||||||
|
const blocks = [];
|
||||||
|
|
||||||
|
// Frontmatter (must be at top in Astro)
|
||||||
|
// Match the FIRST pair of --- ... ---
|
||||||
|
const fm = source.startsWith("---")
|
||||||
|
? (() => {
|
||||||
|
const end = source.indexOf("\n---", 3);
|
||||||
|
if (end !== -1) {
|
||||||
|
const front = source.slice(3, end + 1); // include trailing \n
|
||||||
|
return { start: 0, end: end + 4, code: front };
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
})()
|
||||||
|
: null;
|
||||||
|
if (fm) {
|
||||||
|
// offset for line numbers is after the first '---\n'
|
||||||
|
blocks.push({ code: fm.code, offset: 4 }); // rough; we’ll fix line numbers via positions later
|
||||||
|
}
|
||||||
|
|
||||||
|
// <script ...> ... </script>
|
||||||
|
const scriptRe = /<script\b[^>]*>([\s\S]*?)<\/script>/gi;
|
||||||
|
let m;
|
||||||
|
while ((m = scriptRe.exec(source))) {
|
||||||
|
const code = m[1] || "";
|
||||||
|
blocks.push({ code, offset: indexToLine(source, m.index) });
|
||||||
|
}
|
||||||
|
|
||||||
|
return blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** -------- UTIL: index -> 1-based line -------- */
|
||||||
|
function indexToLine(text, idx) {
|
||||||
|
let line = 1;
|
||||||
|
for (let i = 0; i < idx && i < text.length; i++) {
|
||||||
|
if (text.charCodeAt(i) === 10) line++;
|
||||||
|
}
|
||||||
|
return line;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** -------- AST HELPERS -------- */
|
||||||
|
function createSourceFile(virtualPath, code) {
|
||||||
|
return ts.createSourceFile(
|
||||||
|
virtualPath,
|
||||||
|
code,
|
||||||
|
ts.ScriptTarget.Latest,
|
||||||
|
/*setParentNodes*/ true,
|
||||||
|
virtualPath.endsWith(".tsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize AST to a structural signature string
|
||||||
|
function structuralSignature(node) {
|
||||||
|
/** @type {string[]} */
|
||||||
|
const parts = [];
|
||||||
|
const visit = (n) => {
|
||||||
|
// Skip trivia: comments/whitespace are already not in AST
|
||||||
|
const kindName = ts.SyntaxKind[n.kind] || `K${n.kind}`;
|
||||||
|
switch (n.kind) {
|
||||||
|
case ts.SyntaxKind.Identifier:
|
||||||
|
parts.push("Id");
|
||||||
|
return;
|
||||||
|
case ts.SyntaxKind.PrivateIdentifier:
|
||||||
|
parts.push("PrivId");
|
||||||
|
return;
|
||||||
|
case ts.SyntaxKind.StringLiteral:
|
||||||
|
case ts.SyntaxKind.NoSubstitutionTemplateLiteral:
|
||||||
|
case ts.SyntaxKind.TemplateHead:
|
||||||
|
case ts.SyntaxKind.TemplateMiddle:
|
||||||
|
case ts.SyntaxKind.TemplateTail:
|
||||||
|
parts.push("Str");
|
||||||
|
return;
|
||||||
|
case ts.SyntaxKind.NumericLiteral:
|
||||||
|
parts.push("Num");
|
||||||
|
return;
|
||||||
|
case ts.SyntaxKind.TrueKeyword:
|
||||||
|
case ts.SyntaxKind.FalseKeyword:
|
||||||
|
parts.push("Bool");
|
||||||
|
return;
|
||||||
|
case ts.SyntaxKind.NullKeyword:
|
||||||
|
case ts.SyntaxKind.UndefinedKeyword:
|
||||||
|
parts.push("Nil");
|
||||||
|
return;
|
||||||
|
case ts.SyntaxKind.PropertyAssignment:
|
||||||
|
case ts.SyntaxKind.ShorthandPropertyAssignment:
|
||||||
|
case ts.SyntaxKind.MethodDeclaration:
|
||||||
|
case ts.SyntaxKind.MethodSignature:
|
||||||
|
parts.push("Prop");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
parts.push(kindName);
|
||||||
|
}
|
||||||
|
n.forEachChild(visit);
|
||||||
|
};
|
||||||
|
visit(node);
|
||||||
|
return parts.join("|");
|
||||||
|
}
|
||||||
|
|
||||||
|
function getFunctionInfo(sf, filePath) {
|
||||||
|
/** @type {Array<{
|
||||||
|
name: string,
|
||||||
|
bodyText: string,
|
||||||
|
structKey: string,
|
||||||
|
start: number,
|
||||||
|
end: number,
|
||||||
|
startLine: number,
|
||||||
|
endLine: number
|
||||||
|
}>} */
|
||||||
|
const out = [];
|
||||||
|
|
||||||
|
const addFunc = (nameNode, bodyNode) => {
|
||||||
|
if (!bodyNode) return;
|
||||||
|
const bodyText = bodyNode.getText(sf).trim();
|
||||||
|
const start = bodyNode.getStart(sf);
|
||||||
|
const end = bodyNode.getEnd();
|
||||||
|
const { line: startLine } = sf.getLineAndCharacterOfPosition(start);
|
||||||
|
const { line: endLine } = sf.getLineAndCharacterOfPosition(end);
|
||||||
|
const name =
|
||||||
|
nameNode && ts.isIdentifier(nameNode) ? nameNode.escapedText.toString() : "(anonymous)";
|
||||||
|
|
||||||
|
// min-lines filter
|
||||||
|
const lines = bodyText.split(/\r?\n/).filter(Boolean);
|
||||||
|
if (lines.length < minLines) return;
|
||||||
|
|
||||||
|
// structural signature from the body
|
||||||
|
const structKey = structuralSignature(bodyNode);
|
||||||
|
|
||||||
|
out.push({
|
||||||
|
name,
|
||||||
|
bodyText,
|
||||||
|
structKey,
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
startLine: startLine + 1,
|
||||||
|
endLine: endLine + 1,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const visit = (node) => {
|
||||||
|
if (ts.isFunctionDeclaration(node) && node.body) {
|
||||||
|
addFunc(node.name ?? null, node.body);
|
||||||
|
} else if (
|
||||||
|
ts.isFunctionExpression(node) ||
|
||||||
|
ts.isArrowFunction(node)
|
||||||
|
) {
|
||||||
|
// find name if it’s assigned: const foo = () => {}
|
||||||
|
let name = null;
|
||||||
|
if (node.parent && ts.isVariableDeclaration(node.parent) && node.parent.name) {
|
||||||
|
name = node.parent.name;
|
||||||
|
} else if (
|
||||||
|
node.parent &&
|
||||||
|
ts.isPropertyAssignment(node.parent) &&
|
||||||
|
ts.isIdentifier(node.parent.name)
|
||||||
|
) {
|
||||||
|
name = node.parent.name;
|
||||||
|
} else if (node.name) {
|
||||||
|
name = node.name;
|
||||||
|
}
|
||||||
|
if (node.body) addFunc(name, node.body);
|
||||||
|
} else if (ts.isMethodDeclaration(node) && node.body) {
|
||||||
|
addFunc(node.name, node.body);
|
||||||
|
}
|
||||||
|
node.forEachChild(visit);
|
||||||
|
};
|
||||||
|
|
||||||
|
visit(sf);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** -------- MAIN SCAN -------- */
|
||||||
|
const files = walk(path.resolve(process.cwd(), rootDir));
|
||||||
|
|
||||||
|
/** Maps from hash -> occurrences */
|
||||||
|
const groups = new Map();
|
||||||
|
/** Helper for exact hash */
|
||||||
|
import crypto from "crypto";
|
||||||
|
const exactHash = (text) => crypto.createHash("sha1").update(text.replace(/\s+/g, " ").trim()).digest("hex");
|
||||||
|
|
||||||
|
for (const file of files) {
|
||||||
|
try {
|
||||||
|
const ext = path.extname(file).toLowerCase();
|
||||||
|
const raw = fs.readFileSync(file, "utf8");
|
||||||
|
|
||||||
|
/** @type {Array<{virtualPath:string, code:string, lineOffset:number}>} */
|
||||||
|
const codeUnits = [];
|
||||||
|
|
||||||
|
if (ext === ".astro") {
|
||||||
|
const blocks = extractCodeFromAstro(raw);
|
||||||
|
blocks.forEach((b, i) => {
|
||||||
|
codeUnits.push({
|
||||||
|
virtualPath: file + `#astro${i + 1}.ts`,
|
||||||
|
code: b.code,
|
||||||
|
lineOffset: b.offset || 1,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
codeUnits.push({ virtualPath: file, code: raw, lineOffset: 1 });
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const { virtualPath, code, lineOffset } of codeUnits) {
|
||||||
|
const sf = createSourceFile(virtualPath, code);
|
||||||
|
const funcs = getFunctionInfo(sf, file);
|
||||||
|
for (const f of funcs) {
|
||||||
|
const key =
|
||||||
|
mode === "exact" ? exactHash(f.bodyText) : crypto.createHash("sha1").update(f.structKey).digest("hex");
|
||||||
|
const item = {
|
||||||
|
file,
|
||||||
|
where:
|
||||||
|
ext === ".astro"
|
||||||
|
? `${path.relative(process.cwd(), file)}:${f.startLine + lineOffset - 1}-${f.endLine + lineOffset - 1}`
|
||||||
|
: `${path.relative(process.cwd(), file)}:${f.startLine}-${f.endLine}`,
|
||||||
|
name: f.name,
|
||||||
|
lines: f.endLine - f.startLine + 1,
|
||||||
|
preview: f.bodyText.split(/\r?\n/).slice(0, 5).join("\n") + (f.endLine - f.startLine + 1 > 5 ? "\n..." : ""),
|
||||||
|
};
|
||||||
|
if (!groups.has(key)) groups.set(key, []);
|
||||||
|
groups.get(key).push(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`⚠️ Skipping ${file}: ${e.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** -------- REPORT -------- */
|
||||||
|
const dupes = [...groups.entries()]
|
||||||
|
.map(([key, arr]) => ({ key, items: arr }))
|
||||||
|
.filter((g) => g.items.length > 1)
|
||||||
|
.sort((a, b) => b.items.length - a.items.length);
|
||||||
|
|
||||||
|
if (outputJson) {
|
||||||
|
console.log(JSON.stringify({ mode, minLines, groups: dupes }, null, 2));
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dupes.length === 0) {
|
||||||
|
console.log(`✅ No duplicate functions found (mode=${mode}, min-lines=${minLines}).`);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\nFound ${dupes.length} duplicate group(s) (mode=${mode}, min-lines=${minLines}):\n`);
|
||||||
|
dupes.forEach((g, i) => {
|
||||||
|
console.log(`== Group ${i + 1} (${g.items.length} matches) ==`);
|
||||||
|
const example = g.items[0];
|
||||||
|
console.log(` Sample (${example.lines} lines) from ${example.where}${example.name ? ` [${example.name}]` : ""}`);
|
||||||
|
console.log(" ---");
|
||||||
|
console.log(indent(example.preview, " "));
|
||||||
|
console.log(" ---");
|
||||||
|
g.items.forEach((it) => {
|
||||||
|
console.log(` • ${it.where}${it.name ? ` [${it.name}]` : ""} (${it.lines} lines)`);
|
||||||
|
});
|
||||||
|
console.log();
|
||||||
|
});
|
||||||
|
|
||||||
|
function indent(s, pre) {
|
||||||
|
return s
|
||||||
|
.split("\n")
|
||||||
|
.map((l) => pre + l)
|
||||||
|
.join("\n");
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
// src/components/ContributionButton.astro - CLEANED: Removed duplicate auth script
|
// src/components/ContributionButton.astro
|
||||||
export interface Props {
|
export interface Props {
|
||||||
type: 'edit' | 'new' | 'write';
|
type: 'edit' | 'new' | 'write';
|
||||||
toolName?: string;
|
toolName?: string;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
import { createToolSlug } from '../utils/toolHelpers.js';
|
import { createToolSlug } from '../utils/clientUtils.js';
|
||||||
|
|
||||||
export interface Props {
|
export interface Props {
|
||||||
toolName: string;
|
toolName: string;
|
||||||
|
@ -4,7 +4,6 @@ import { getToolsData } from '../utils/dataService.js';
|
|||||||
const data = await getToolsData();
|
const data = await getToolsData();
|
||||||
const scenarios = data.scenarios || [];
|
const scenarios = data.scenarios || [];
|
||||||
|
|
||||||
// Configuration
|
|
||||||
const maxDisplayed = 9;
|
const maxDisplayed = 9;
|
||||||
const displayedScenarios = scenarios.slice(0, maxDisplayed);
|
const displayedScenarios = scenarios.slice(0, maxDisplayed);
|
||||||
---
|
---
|
||||||
|
@ -307,7 +307,6 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
|
|
||||||
<script define:vars={{ toolsData: data.tools, tagFrequency, sortedTags }}>
|
<script define:vars={{ toolsData: data.tools, tagFrequency, sortedTags }}>
|
||||||
window.toolsData = toolsData;
|
window.toolsData = toolsData;
|
||||||
|
|
||||||
document.addEventListener('DOMContentLoaded', () => {
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
const elements = {
|
const elements = {
|
||||||
searchInput: document.getElementById('search-input'),
|
searchInput: document.getElementById('search-input'),
|
||||||
@ -394,6 +393,13 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isToolHosted(tool) {
|
||||||
|
return tool.projectUrl !== undefined &&
|
||||||
|
tool.projectUrl !== null &&
|
||||||
|
tool.projectUrl !== "" &&
|
||||||
|
tool.projectUrl.trim() !== "";
|
||||||
|
}
|
||||||
|
|
||||||
function toggleCollapsible(toggleBtn, content, storageKey) {
|
function toggleCollapsible(toggleBtn, content, storageKey) {
|
||||||
const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true';
|
const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true';
|
||||||
const newState = !isCollapsed;
|
const newState = !isCollapsed;
|
||||||
@ -433,13 +439,6 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function isToolHosted(tool) {
|
|
||||||
return tool.projectUrl !== undefined &&
|
|
||||||
tool.projectUrl !== null &&
|
|
||||||
tool.projectUrl !== "" &&
|
|
||||||
tool.projectUrl.trim() !== "";
|
|
||||||
}
|
|
||||||
|
|
||||||
function initTagCloud() {
|
function initTagCloud() {
|
||||||
const visibleCount = 20;
|
const visibleCount = 20;
|
||||||
elements.tagCloudItems.forEach((item, index) => {
|
elements.tagCloudItems.forEach((item, index) => {
|
||||||
|
@ -2,17 +2,15 @@
|
|||||||
|
|
||||||
export const AI_PROMPTS = {
|
export const AI_PROMPTS = {
|
||||||
|
|
||||||
toolSelection: (mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number) => {
|
toolSelection: (mode: string, userQuery: string, maxSelectedItems: number) => {
|
||||||
const modeInstruction = mode === 'workflow'
|
const modeInstruction = mode === 'workflow'
|
||||||
? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
|
? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
|
||||||
: 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
|
: 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
|
||||||
|
|
||||||
return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.
|
return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.
|
||||||
|
|
||||||
AUSWAHLMETHODE: ${selectionMethod}
|
AUSWAHLMETHODE:
|
||||||
${selectionMethod === 'embeddings_candidates' ?
|
'✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe'}
|
||||||
'✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe' :
|
|
||||||
'✓ Vollständige Datenbank verfügbar\n✓ Wähle die relevantesten Items'}
|
|
||||||
|
|
||||||
${modeInstruction}
|
${modeInstruction}
|
||||||
|
|
||||||
@ -36,6 +34,11 @@ AUSWAHLSTRATEGIE:
|
|||||||
- Lieber weniger perfekte Items als viele mittelmäßige
|
- Lieber weniger perfekte Items als viele mittelmäßige
|
||||||
- Jedes Item muss begründbar sein
|
- Jedes Item muss begründbar sein
|
||||||
|
|
||||||
|
4. **TASK RELEVANCE REALISM**
|
||||||
|
- Gib realistische Bewertungen (50-85% typisch)
|
||||||
|
- Vermeide übertriebene 90-100% Scores
|
||||||
|
- Nur bei perfekter Übereinstimmung >85%
|
||||||
|
|
||||||
AUSWAHLREGELN:
|
AUSWAHLREGELN:
|
||||||
- Wähle ${mode === 'workflow' ? '15-25' : '4-10'} Items total, max ${maxSelectedItems}
|
- Wähle ${mode === 'workflow' ? '15-25' : '4-10'} Items total, max ${maxSelectedItems}
|
||||||
- BEIDE Arrays füllen: selectedTools UND selectedConcepts
|
- BEIDE Arrays füllen: selectedTools UND selectedConcepts
|
||||||
@ -59,7 +62,13 @@ ${JSON.stringify(toolsToSend, null, 2)}
|
|||||||
VERFÜGBARE KONZEPTE (${conceptsToSend.length} Items - theoretisches Wissen):
|
VERFÜGBARE KONZEPTE (${conceptsToSend.length} Items - theoretisches Wissen):
|
||||||
${JSON.stringify(conceptsToSend, null, 2)}
|
${JSON.stringify(conceptsToSend, null, 2)}
|
||||||
|
|
||||||
WICHTIGER HINWEIS: Wähle sowohl aus TOOLS als auch aus KONZEPTEN aus! Konzepte sind essentiell für methodische Fundierung.`;
|
WICHTIGER HINWEIS: Wähle sowohl aus TOOLS als auch aus KONZEPTEN aus! Konzepte sind essentiell für methodische Fundierung.
|
||||||
|
|
||||||
|
TASK RELEVANCE GUIDELINES:
|
||||||
|
- 50-65%: Grundlegend relevant, aber nicht optimal
|
||||||
|
- 66-75%: Gut geeignet für die Aufgabe
|
||||||
|
- 76-85%: Sehr gut geeignet, klare Vorteile
|
||||||
|
- 86-100%: NUR für perfekte Übereinstimmung verwenden`;
|
||||||
},
|
},
|
||||||
|
|
||||||
scenarioAnalysis: (isWorkflow: boolean, userQuery: string) => {
|
scenarioAnalysis: (isWorkflow: boolean, userQuery: string) => {
|
||||||
@ -148,13 +157,19 @@ AUSWAHLREGELN FÜR PHASE "${phase.name}":
|
|||||||
3. Mindestens 1 Methode wenn verfügbar, Rest Software-Tools
|
3. Mindestens 1 Methode wenn verfügbar, Rest Software-Tools
|
||||||
4. Begründe WARUM jedes Item für diese Phase optimal ist
|
4. Begründe WARUM jedes Item für diese Phase optimal ist
|
||||||
|
|
||||||
|
TASK RELEVANCE GUIDELINES:
|
||||||
|
- 60-70%: Grundlegend für diese Phase geeignet
|
||||||
|
- 71-80%: Gut geeignet, klare Phasenrelevanz
|
||||||
|
- 81-90%: Sehr gut geeignet, optimal für Phase
|
||||||
|
- 91-100%: NUR für perfekte Phasenübereinstimmung
|
||||||
|
|
||||||
WICHTIG: Verwende EXAKT die Namen wie oben aufgelistet (ohne Präfixe wie M1./T2.)!
|
WICHTIG: Verwende EXAKT die Namen wie oben aufgelistet (ohne Präfixe wie M1./T2.)!
|
||||||
|
|
||||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB:
|
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB:
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"toolName": "Exakter Name aus der Liste oben",
|
"toolName": "Exakter Name aus der Liste oben",
|
||||||
"taskRelevance": 85,
|
"taskRelevance": 75,
|
||||||
"justification": "Detaillierte Begründung (60-80 Wörter) warum optimal für ${phase.name} - erkläre Anwendung, Vorteile und spezifische Relevanz",
|
"justification": "Detaillierte Begründung (60-80 Wörter) warum optimal für ${phase.name} - erkläre Anwendung, Vorteile und spezifische Relevanz",
|
||||||
"limitations": ["Mögliche Einschränkung für diese Phase"]
|
"limitations": ["Mögliche Einschränkung für diese Phase"]
|
||||||
}
|
}
|
||||||
@ -266,6 +281,7 @@ AUSWAHLREGELN FÜR NACHERGÄNZUNG:
|
|||||||
1. Wähle 1-2 BESTE Methoden/Tools die die ${phase.name}-Phase optimal ergänzen
|
1. Wähle 1-2 BESTE Methoden/Tools die die ${phase.name}-Phase optimal ergänzen
|
||||||
2. Methoden/Tools müssen für die ursprüngliche Anfrage relevant sein
|
2. Methoden/Tools müssen für die ursprüngliche Anfrage relevant sein
|
||||||
3. Ergänzen, nicht ersetzen - erweitere die zu spezifische Erstauswahl
|
3. Ergänzen, nicht ersetzen - erweitere die zu spezifische Erstauswahl
|
||||||
|
4. Realistische Task Relevance (70-85% typisch für Nachergänzungen)
|
||||||
|
|
||||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
|
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
|
||||||
{
|
{
|
||||||
@ -291,7 +307,7 @@ Antwort: Fließtext ohne Listen, max ${isWorkflow ? '100' : '80'} Wörter.`;
|
|||||||
}
|
}
|
||||||
} as const;
|
} as const;
|
||||||
|
|
||||||
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number): string;
|
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, maxSelectedItems: number): string;
|
||||||
export function getPrompt(key: 'toolSelectionWithData', basePrompt: string, toolsToSend: any[], conceptsToSend: any[]): string;
|
export function getPrompt(key: 'toolSelectionWithData', basePrompt: string, toolsToSend: any[], conceptsToSend: any[]): string;
|
||||||
export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuery: string): string;
|
export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuery: string): string;
|
||||||
export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;
|
export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;
|
||||||
|
@ -16,7 +16,7 @@ const knowledgebaseCollection = defineCollection({
|
|||||||
tags: z.array(z.string()).default([]),
|
tags: z.array(z.string()).default([]),
|
||||||
|
|
||||||
published: z.boolean().default(true),
|
published: z.boolean().default(true),
|
||||||
gated_content: z.boolean().default(false), // NEW: Gated content flag
|
gated_content: z.boolean().default(false),
|
||||||
|
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
|
@ -1,17 +1,19 @@
|
|||||||
// src/pages/api/ai/embeddings-status.ts
|
// src/pages/api/ai/embeddings-status.ts
|
||||||
import type { APIRoute } from 'astro';
|
import type { APIRoute } from 'astro';
|
||||||
|
import { embeddingsService } from '../../../utils/embeddings.js';
|
||||||
|
|
||||||
export const prerender = false;
|
export const prerender = false;
|
||||||
|
|
||||||
export const GET: APIRoute = async () => {
|
export const GET: APIRoute = async () => {
|
||||||
try {
|
try {
|
||||||
const { embeddingsService } = await import('../../../utils/embeddings.js');
|
|
||||||
await embeddingsService.waitForInitialization();
|
await embeddingsService.waitForInitialization();
|
||||||
|
|
||||||
const stats = embeddingsService.getStats();
|
const stats = embeddingsService.getStats();
|
||||||
const status = stats.enabled && stats.initialized ? 'ready' :
|
const status = stats.enabled && stats.initialized ? 'ready' :
|
||||||
stats.enabled && !stats.initialized ? 'initializing' : 'disabled';
|
stats.enabled && !stats.initialized ? 'initializing' : 'disabled';
|
||||||
|
|
||||||
|
console.log(`[EMBEDDINGS-STATUS-API] Service status: ${status}, stats:`, stats);
|
||||||
|
|
||||||
return new Response(JSON.stringify({
|
return new Response(JSON.stringify({
|
||||||
success: true,
|
success: true,
|
||||||
embeddings: stats,
|
embeddings: stats,
|
||||||
@ -23,6 +25,8 @@ export const GET: APIRoute = async () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
console.error('[EMBEDDINGS-STATUS-API] Error checking embeddings status:', error);
|
||||||
|
|
||||||
return new Response(JSON.stringify({
|
return new Response(JSON.stringify({
|
||||||
success: false,
|
success: false,
|
||||||
embeddings: { enabled: false, initialized: false, count: 0 },
|
embeddings: { enabled: false, initialized: false, count: 0 },
|
||||||
|
@ -1,23 +1,13 @@
|
|||||||
// src/pages/api/ai/enhance-input.ts - Enhanced AI service compatibility
|
// src/pages/api/ai/enhance-input.ts
|
||||||
import type { APIRoute } from 'astro';
|
import type { APIRoute } from 'astro';
|
||||||
import { withAPIAuth } from '../../../utils/auth.js';
|
import { withAPIAuth } from '../../../utils/auth.js';
|
||||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
||||||
|
import { aiService } from '../../../utils/aiService.js';
|
||||||
|
import { JSONParser } from '../../../utils/jsonUtils.js';
|
||||||
|
|
||||||
export const prerender = false;
|
export const prerender = false;
|
||||||
|
|
||||||
function getEnv(key: string): string {
|
|
||||||
const value = process.env[key];
|
|
||||||
if (!value) {
|
|
||||||
throw new Error(`Missing environment variable: ${key}`);
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
|
|
||||||
const AI_ANALYZER_API_KEY = getEnv('AI_ANALYZER_API_KEY');
|
|
||||||
const AI_ANALYZER_MODEL = getEnv('AI_ANALYZER_MODEL');
|
|
||||||
|
|
||||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
||||||
const RATE_LIMIT_WINDOW = 60 * 1000;
|
const RATE_LIMIT_WINDOW = 60 * 1000;
|
||||||
const RATE_LIMIT_MAX = 5;
|
const RATE_LIMIT_MAX = 5;
|
||||||
@ -49,7 +39,7 @@ function checkRateLimit(userId: string): boolean {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function cleanupExpiredRateLimits() {
|
function cleanupExpiredRateLimits(): void {
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
for (const [userId, limit] of rateLimitStore.entries()) {
|
for (const [userId, limit] of rateLimitStore.entries()) {
|
||||||
if (now > limit.resetTime) {
|
if (now > limit.resetTime) {
|
||||||
@ -61,7 +51,7 @@ function cleanupExpiredRateLimits() {
|
|||||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||||
|
|
||||||
function createEnhancementPrompt(input: string): string {
|
function createEnhancementPrompt(input: string): string {
|
||||||
return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein forensisches Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte forensische Untersuchung.
|
return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte Untersuchung.
|
||||||
|
|
||||||
ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
|
ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
|
||||||
1. **Incident Context**: Was ist passiert? Welche Angriffsvektoren oder technischen Probleme liegen vor?
|
1. **Incident Context**: Was ist passiert? Welche Angriffsvektoren oder technischen Probleme liegen vor?
|
||||||
@ -74,12 +64,12 @@ ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
|
|||||||
|
|
||||||
WENN die Beschreibung alle kritischen forensischen Aspekte abdeckt: Geben Sie eine leere Liste [] zurück.
|
WENN die Beschreibung alle kritischen forensischen Aspekte abdeckt: Geben Sie eine leere Liste [] zurück.
|
||||||
|
|
||||||
WENN wichtige forensische Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte forensische Analyse schließen.
|
WENN wichtige Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte Analyse schließen.
|
||||||
|
|
||||||
QUALITÄTSKRITERIEN FÜR FRAGEN:
|
QUALITÄTSKRITERIEN FÜR FRAGEN:
|
||||||
- Forensisch spezifisch, nicht allgemein (NICHT: "Mehr Details?")
|
- Forensisch spezifisch, nicht allgemein (NICHT: "Mehr Details?")
|
||||||
- Methodisch relevant (NICHT: "Wann passierte das?")
|
- Methodisch relevant (NICHT: "Wann passierte das?")
|
||||||
- Priorisiert nach Auswirkung auf die forensische Untersuchungsqualität
|
- Priorisiert nach Auswirkung auf die Untersuchungsqualität
|
||||||
- Die Frage soll maximal 20 Wörter umfassen
|
- Die Frage soll maximal 20 Wörter umfassen
|
||||||
|
|
||||||
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
|
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
|
||||||
@ -94,39 +84,6 @@ ${input}
|
|||||||
`.trim();
|
`.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function callAIService(prompt: string): Promise<Response> {
|
|
||||||
const endpoint = AI_ENDPOINT;
|
|
||||||
const apiKey = AI_ANALYZER_API_KEY;
|
|
||||||
const model = AI_ANALYZER_MODEL;
|
|
||||||
|
|
||||||
let headers: Record<string, string> = {
|
|
||||||
'Content-Type': 'application/json'
|
|
||||||
};
|
|
||||||
|
|
||||||
if (apiKey) {
|
|
||||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
|
||||||
console.log('[ENHANCE API] Using API key authentication');
|
|
||||||
} else {
|
|
||||||
console.log('[ENHANCE API] No API key - making request without authentication');
|
|
||||||
}
|
|
||||||
|
|
||||||
const requestBody = {
|
|
||||||
model,
|
|
||||||
messages: [{ role: 'user', content: prompt }],
|
|
||||||
max_tokens: 300,
|
|
||||||
temperature: 0.7,
|
|
||||||
top_p: 0.9,
|
|
||||||
frequency_penalty: 0.2,
|
|
||||||
presence_penalty: 0.1
|
|
||||||
};
|
|
||||||
|
|
||||||
return fetch(`${endpoint}/v1/chat/completions`, {
|
|
||||||
method: 'POST',
|
|
||||||
headers,
|
|
||||||
body: JSON.stringify(requestBody)
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
export const POST: APIRoute = async ({ request }) => {
|
export const POST: APIRoute = async ({ request }) => {
|
||||||
try {
|
try {
|
||||||
const authResult = await withAPIAuth(request, 'ai');
|
const authResult = await withAPIAuth(request, 'ai');
|
||||||
@ -155,28 +112,25 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
const systemPrompt = createEnhancementPrompt(sanitizedInput);
|
const systemPrompt = createEnhancementPrompt(sanitizedInput);
|
||||||
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
|
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
|
||||||
|
|
||||||
const aiResponse = await enqueueApiCall(() => callAIService(systemPrompt), taskId);
|
console.log(`[ENHANCE-API] Processing enhancement request for user: ${userId}`);
|
||||||
|
|
||||||
if (!aiResponse.ok) {
|
const aiResponse = await enqueueApiCall(() =>
|
||||||
const errorText = await aiResponse.text();
|
aiService.callAI(systemPrompt, {
|
||||||
console.error('[ENHANCE API] AI enhancement error:', errorText, 'Status:', aiResponse.status);
|
temperature: 0.7
|
||||||
return apiServerError.unavailable('Enhancement service unavailable');
|
}), taskId);
|
||||||
}
|
|
||||||
|
|
||||||
const aiData = await aiResponse.json();
|
if (!aiResponse.content) {
|
||||||
const aiContent = aiData.choices?.[0]?.message?.content;
|
|
||||||
|
|
||||||
if (!aiContent) {
|
|
||||||
return apiServerError.unavailable('No enhancement response');
|
return apiServerError.unavailable('No enhancement response');
|
||||||
}
|
}
|
||||||
|
|
||||||
let questions;
|
let questions;
|
||||||
try {
|
try {
|
||||||
const cleanedContent = aiContent
|
const cleanedContent = aiResponse.content
|
||||||
.replace(/^```json\s*/i, '')
|
.replace(/^```json\s*/i, '')
|
||||||
.replace(/\s*```\s*$/, '')
|
.replace(/\s*```\s*$/, '')
|
||||||
.trim();
|
.trim();
|
||||||
questions = JSON.parse(cleanedContent);
|
|
||||||
|
questions = JSONParser.safeParseJSON(cleanedContent, []);
|
||||||
|
|
||||||
if (!Array.isArray(questions)) {
|
if (!Array.isArray(questions)) {
|
||||||
throw new Error('Response is not an array');
|
throw new Error('Response is not an array');
|
||||||
@ -198,11 +152,11 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Failed to parse enhancement response:', aiContent);
|
console.error('[ENHANCE-API] Failed to parse enhancement response:', aiResponse.content);
|
||||||
questions = [];
|
questions = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[ENHANCE API] User: ${userId}, Forensics Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
console.log(`[ENHANCE-API] User: ${userId}, Questions generated: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
||||||
|
|
||||||
return new Response(JSON.stringify({
|
return new Response(JSON.stringify({
|
||||||
success: true,
|
success: true,
|
||||||
@ -215,7 +169,7 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Enhancement error:', error);
|
console.error('[ENHANCE-API] Enhancement error:', error);
|
||||||
return apiServerError.internal('Enhancement processing failed');
|
return apiServerError.internal('Enhancement processing failed');
|
||||||
}
|
}
|
||||||
};
|
};
|
@ -20,15 +20,14 @@ const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '
|
|||||||
const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10);
|
const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10);
|
||||||
|
|
||||||
function sanitizeInput(input: string): string {
|
function sanitizeInput(input: string): string {
|
||||||
let sanitized = input
|
return input
|
||||||
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
|
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
|
||||||
.replace(/\<\/?[^>]+(>|$)/g, '')
|
.replace(/\<\/?[^>]+(>|$)/g, '')
|
||||||
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
||||||
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
||||||
.trim();
|
.trim()
|
||||||
|
.slice(0, 2000)
|
||||||
sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
|
.replace(/\s+/g, ' ');
|
||||||
return sanitized;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
|
function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
|
||||||
@ -77,7 +76,7 @@ function incrementMicroTaskCount(userId: string, aiCallsMade: number): void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function cleanupExpiredRateLimits() {
|
function cleanupExpiredRateLimits(): void {
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
const maxStoreSize = 1000;
|
const maxStoreSize = 1000;
|
||||||
|
|
||||||
@ -117,51 +116,52 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
const body = await request.json();
|
const body = await request.json();
|
||||||
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
||||||
|
|
||||||
console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
console.log(`[AI-API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
||||||
console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
|
console.log(`[AI-API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
|
||||||
|
|
||||||
if (!query || typeof query !== 'string') {
|
if (!query || typeof query !== 'string') {
|
||||||
console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
|
console.log(`[AI-API] Invalid query for task ${clientTaskId}`);
|
||||||
return apiError.badRequest('Query required');
|
return apiError.badRequest('Query required');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!['workflow', 'tool'].includes(mode)) {
|
if (!['workflow', 'tool'].includes(mode)) {
|
||||||
console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
console.log(`[AI-API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
||||||
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
||||||
}
|
}
|
||||||
|
|
||||||
const sanitizedQuery = sanitizeInput(query);
|
const sanitizedQuery = sanitizeInput(query);
|
||||||
if (sanitizedQuery.includes('[FILTERED]')) {
|
if (sanitizedQuery.includes('[FILTERED]')) {
|
||||||
console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
|
console.log(`[AI-API] Filtered input detected for task ${clientTaskId}`);
|
||||||
return apiError.badRequest('Invalid input detected');
|
return apiError.badRequest('Invalid input detected');
|
||||||
}
|
}
|
||||||
|
|
||||||
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
||||||
|
|
||||||
console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
|
console.log(`[AI-API] Enqueueing pipeline task ${taskId}`);
|
||||||
|
|
||||||
const result = await enqueueApiCall(() =>
|
const result = await enqueueApiCall(() =>
|
||||||
aiPipeline.processQuery(sanitizedQuery, mode)
|
aiPipeline.processQuery(sanitizedQuery, mode)
|
||||||
, taskId);
|
, taskId);
|
||||||
|
|
||||||
if (!result || !result.recommendation) {
|
if (!result || !result.recommendation) {
|
||||||
return apiServerError.unavailable('No response from micro-task AI pipeline');
|
return apiServerError.unavailable('No response from AI pipeline');
|
||||||
}
|
}
|
||||||
|
|
||||||
const stats = result.processingStats;
|
const stats = result.processingStats;
|
||||||
const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
|
const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
|
||||||
incrementMicroTaskCount(userId, estimatedAICallsMade);
|
incrementMicroTaskCount(userId, estimatedAICallsMade);
|
||||||
|
|
||||||
console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
|
console.log(`[AI-API] Pipeline completed for ${taskId}:`, {
|
||||||
console.log(` - Mode: ${mode}`);
|
mode,
|
||||||
console.log(` - User: ${userId}`);
|
user: userId,
|
||||||
console.log(` - Query length: ${sanitizedQuery.length}`);
|
queryLength: sanitizedQuery.length,
|
||||||
console.log(` - Processing time: ${stats.processingTimeMs}ms`);
|
processingTime: stats.processingTimeMs,
|
||||||
console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`);
|
microTasksCompleted: stats.microTasksCompleted,
|
||||||
console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`);
|
microTasksFailed: stats.microTasksFailed,
|
||||||
console.log(` - Estimated AI calls: ${estimatedAICallsMade}`);
|
estimatedAICalls: estimatedAICallsMade,
|
||||||
console.log(` - Embeddings used: ${stats.embeddingsUsed}`);
|
embeddingsUsed: stats.embeddingsUsed,
|
||||||
console.log(` - Final items: ${stats.finalSelectedItems}`);
|
finalItems: stats.finalSelectedItems
|
||||||
|
});
|
||||||
|
|
||||||
const currentLimit = rateLimitStore.get(userId);
|
const currentLimit = rateLimitStore.get(userId);
|
||||||
const remainingMicroTasks = currentLimit ?
|
const remainingMicroTasks = currentLimit ?
|
||||||
@ -175,7 +175,7 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
query: sanitizedQuery,
|
query: sanitizedQuery,
|
||||||
processingStats: {
|
processingStats: {
|
||||||
...result.processingStats,
|
...result.processingStats,
|
||||||
pipelineType: 'micro-task',
|
pipelineType: 'refactored',
|
||||||
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
|
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||||
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
|
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||||
estimatedAICallsMade
|
estimatedAICallsMade
|
||||||
@ -191,18 +191,16 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[MICRO-TASK API] Pipeline error:', error);
|
console.error('[AI-API] Pipeline error:', error);
|
||||||
|
|
||||||
if (error.message.includes('embeddings')) {
|
if (error.message.includes('embeddings')) {
|
||||||
return apiServerError.unavailable('Embeddings service error - using AI fallback');
|
return apiServerError.unavailable('Embeddings service error');
|
||||||
} else if (error.message.includes('micro-task')) {
|
} else if (error.message.includes('AI')) {
|
||||||
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed');
|
return apiServerError.unavailable('AI service error');
|
||||||
} else if (error.message.includes('selector')) {
|
|
||||||
return apiServerError.unavailable('AI selector service error');
|
|
||||||
} else if (error.message.includes('rate limit')) {
|
} else if (error.message.includes('rate limit')) {
|
||||||
return apiError.rateLimit('AI service rate limits exceeded during micro-task processing');
|
return apiError.rateLimit('AI service rate limits exceeded');
|
||||||
} else {
|
} else {
|
||||||
return apiServerError.internal('Micro-task AI pipeline error');
|
return apiServerError.internal('AI pipeline error');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
// src/pages/contribute/index.astro - Consolidated Auth
|
// src/pages/contribute/index.astro
|
||||||
import BaseLayout from '../../layouts/BaseLayout.astro';
|
import BaseLayout from '../../layouts/BaseLayout.astro';
|
||||||
import { withAuth } from '../../utils/auth.js';
|
import { withAuth } from '../../utils/auth.js';
|
||||||
|
|
||||||
|
@ -511,8 +511,6 @@ if (aiAuthRequired) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
function handleSharedURL() {
|
function handleSharedURL() {
|
||||||
console.log('[SHARE] Handling shared URL:', window.location.search);
|
|
||||||
|
|
||||||
const urlParams = new URLSearchParams(window.location.search);
|
const urlParams = new URLSearchParams(window.location.search);
|
||||||
const toolParam = urlParams.get('tool');
|
const toolParam = urlParams.get('tool');
|
||||||
const viewParam = urlParams.get('view');
|
const viewParam = urlParams.get('view');
|
||||||
|
@ -675,6 +675,7 @@ input[type="checkbox"] {
|
|||||||
border-radius: 0.25rem;
|
border-radius: 0.25rem;
|
||||||
font-size: 0.75rem;
|
font-size: 0.75rem;
|
||||||
margin: 0.125rem;
|
margin: 0.125rem;
|
||||||
|
max-height: 1.5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ===================================================================
|
/* ===================================================================
|
||||||
@ -1806,11 +1807,44 @@ input[type="checkbox"] {
|
|||||||
.ai-textarea-section {
|
.ai-textarea-section {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
min-width: 0;
|
min-width: 0;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ai-textarea-section textarea {
|
||||||
|
width: 100%;
|
||||||
|
height: 180px;
|
||||||
|
min-height: 180px;
|
||||||
|
max-height: 300px;
|
||||||
|
resize: vertical;
|
||||||
|
font-size: 0.9375rem;
|
||||||
|
line-height: 1.5;
|
||||||
|
padding: 0.75rem;
|
||||||
|
border: 1px solid var(--color-border);
|
||||||
|
border-radius: 0.375rem;
|
||||||
|
background-color: var(--color-bg);
|
||||||
|
color: var(--color-text);
|
||||||
|
transition: var(--transition-fast);
|
||||||
|
flex: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.confidence-tooltip {
|
||||||
|
background: var(--color-bg) !important;
|
||||||
|
border: 2px solid var(--color-border) !important;
|
||||||
|
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15) !important;
|
||||||
|
z-index: 2000 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.ai-textarea-section textarea:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: var(--color-primary);
|
||||||
|
box-shadow: 0 0 0 3px rgb(37 99 235 / 10%);
|
||||||
}
|
}
|
||||||
|
|
||||||
.ai-suggestions-section {
|
.ai-suggestions-section {
|
||||||
flex: 0 0 320px;
|
flex: 0 0 320px;
|
||||||
min-height: 120px;
|
min-height: 180px;
|
||||||
|
height: auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
.ai-input-container textarea {
|
.ai-input-container textarea {
|
||||||
@ -2186,12 +2220,20 @@ input[type="checkbox"] {
|
|||||||
border-radius: 1rem;
|
border-radius: 1rem;
|
||||||
font-weight: 500;
|
font-weight: 500;
|
||||||
text-transform: uppercase;
|
text-transform: uppercase;
|
||||||
|
position: relative;
|
||||||
|
z-index: 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
.tool-rec-priority.high { background-color: var(--color-error); color: white; }
|
.tool-rec-priority.high { background-color: var(--color-error); color: white; }
|
||||||
.tool-rec-priority.medium { background-color: var(--color-warning); color: white; }
|
.tool-rec-priority.medium { background-color: var(--color-warning); color: white; }
|
||||||
.tool-rec-priority.low { background-color: var(--color-accent); color: white; }
|
.tool-rec-priority.low { background-color: var(--color-accent); color: white; }
|
||||||
|
|
||||||
|
[data-theme="dark"] .confidence-tooltip {
|
||||||
|
background: var(--color-bg-secondary) !important;
|
||||||
|
border-color: var(--color-border) !important;
|
||||||
|
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.4) !important;
|
||||||
|
}
|
||||||
|
|
||||||
.tool-rec-justification {
|
.tool-rec-justification {
|
||||||
font-size: 0.875rem;
|
font-size: 0.875rem;
|
||||||
line-height: 1.5;
|
line-height: 1.5;
|
||||||
@ -2610,7 +2652,8 @@ footer {
|
|||||||
================================================================= */
|
================================================================= */
|
||||||
|
|
||||||
.smart-prompting-container {
|
.smart-prompting-container {
|
||||||
height: 100%;
|
height: auto;
|
||||||
|
min-height: 180px;
|
||||||
animation: smartPromptSlideIn 0.4s cubic-bezier(0.4, 0, 0.2, 1);
|
animation: smartPromptSlideIn 0.4s cubic-bezier(0.4, 0, 0.2, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2619,8 +2662,10 @@ footer {
|
|||||||
border: 1px solid var(--color-border);
|
border: 1px solid var(--color-border);
|
||||||
border-radius: 0.5rem;
|
border-radius: 0.5rem;
|
||||||
padding: 1rem;
|
padding: 1rem;
|
||||||
height: 100%;
|
height: auto;
|
||||||
min-height: 120px;
|
min-height: 180px;
|
||||||
|
max-height: 400px;
|
||||||
|
overflow-y: auto;
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
opacity: 0.85;
|
opacity: 0.85;
|
||||||
@ -2660,8 +2705,8 @@ footer {
|
|||||||
|
|
||||||
/* Smart Prompting Hint */
|
/* Smart Prompting Hint */
|
||||||
.smart-prompting-hint {
|
.smart-prompting-hint {
|
||||||
height: 100%;
|
height: 180px;
|
||||||
min-height: 120px;
|
min-height: 180px;
|
||||||
display: flex;
|
display: flex;
|
||||||
align-items: center;
|
align-items: center;
|
||||||
animation: hintFadeIn 0.3s ease-in-out;
|
animation: hintFadeIn 0.3s ease-in-out;
|
||||||
@ -3375,8 +3420,8 @@ footer {
|
|||||||
|
|
||||||
.ai-suggestions-section {
|
.ai-suggestions-section {
|
||||||
flex: 0 0 auto;
|
flex: 0 0 auto;
|
||||||
width: 100%;
|
height: auto;
|
||||||
max-width: none;
|
min-height: 120px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.ai-textarea-section {
|
.ai-textarea-section {
|
||||||
@ -3386,6 +3431,11 @@ footer {
|
|||||||
min-height: 100px;
|
min-height: 100px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.ai-textarea-section textarea {
|
||||||
|
height: 150px;
|
||||||
|
min-height: 150px;
|
||||||
|
}
|
||||||
|
|
||||||
.ai-spotlight-content {
|
.ai-spotlight-content {
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
gap: 0.75rem;
|
gap: 0.75rem;
|
||||||
|
File diff suppressed because it is too large
Load Diff
137
src/utils/aiService.ts
Normal file
137
src/utils/aiService.ts
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
// src/utils/aiService.ts
|
||||||
|
import 'dotenv/config';
|
||||||
|
|
||||||
|
export interface AIServiceConfig {
|
||||||
|
endpoint: string;
|
||||||
|
apiKey: string;
|
||||||
|
model: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AICallOptions {
|
||||||
|
temperature?: number;
|
||||||
|
timeout?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AIResponse {
|
||||||
|
content: string;
|
||||||
|
usage?: {
|
||||||
|
promptTokens: number;
|
||||||
|
completionTokens: number;
|
||||||
|
totalTokens: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
class AIService {
|
||||||
|
private config: AIServiceConfig;
|
||||||
|
private defaultOptions: AICallOptions;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.config = {
|
||||||
|
endpoint: this.getRequiredEnv('AI_ANALYZER_ENDPOINT'),
|
||||||
|
apiKey: this.getRequiredEnv('AI_ANALYZER_API_KEY'),
|
||||||
|
model: this.getRequiredEnv('AI_ANALYZER_MODEL')
|
||||||
|
};
|
||||||
|
|
||||||
|
this.defaultOptions = {
|
||||||
|
temperature: 0.3,
|
||||||
|
timeout: 60000
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('[AI-SERVICE] Initialized with model:', this.config.model);
|
||||||
|
}
|
||||||
|
|
||||||
|
private getRequiredEnv(key: string): string {
|
||||||
|
const value = process.env[key];
|
||||||
|
if (!value) {
|
||||||
|
throw new Error(`Missing required environment variable: ${key}`);
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
async callAI(prompt: string, options: AICallOptions = {}): Promise<AIResponse> {
|
||||||
|
const mergedOptions = { ...this.defaultOptions, ...options };
|
||||||
|
|
||||||
|
console.log('[AI-SERVICE] Making API call:', {
|
||||||
|
promptLength: prompt.length,
|
||||||
|
temperature: mergedOptions.temperature
|
||||||
|
});
|
||||||
|
|
||||||
|
const headers: Record<string, string> = {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
};
|
||||||
|
|
||||||
|
if (this.config.apiKey) {
|
||||||
|
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
const requestBody = {
|
||||||
|
model: this.config.model,
|
||||||
|
messages: [{ role: 'user', content: prompt }],
|
||||||
|
temperature: mergedOptions.temperature
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeoutId = setTimeout(() => controller.abort(), mergedOptions.timeout);
|
||||||
|
|
||||||
|
const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers,
|
||||||
|
body: JSON.stringify(requestBody),
|
||||||
|
signal: controller.signal
|
||||||
|
});
|
||||||
|
|
||||||
|
clearTimeout(timeoutId);
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text();
|
||||||
|
console.error('[AI-SERVICE] API Error:', response.status, errorText);
|
||||||
|
throw new Error(`AI API error: ${response.status} - ${errorText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
const content = data.choices?.[0]?.message?.content;
|
||||||
|
|
||||||
|
if (!content) {
|
||||||
|
console.error('[AI-SERVICE] No response content from AI model');
|
||||||
|
throw new Error('No response from AI model');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[AI-SERVICE] API call successful:', {
|
||||||
|
responseLength: content.length,
|
||||||
|
usage: data.usage
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: content.trim(),
|
||||||
|
usage: data.usage
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
if (error.name === 'AbortError') {
|
||||||
|
console.error('[AI-SERVICE] Request timeout');
|
||||||
|
throw new Error('AI request timeout');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.error('[AI-SERVICE] API call failed:', error.message);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async callMicroTaskAI(prompt: string): Promise<AIResponse> {
|
||||||
|
return this.callAI(prompt, {
|
||||||
|
temperature: 0.3,
|
||||||
|
timeout: 30000
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
estimateTokens(text: string): number {
|
||||||
|
return Math.ceil(text.length / 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
getConfig(): AIServiceConfig {
|
||||||
|
return { ...this.config };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const aiService = new AIService();
|
@ -83,26 +83,21 @@ export const apiServerError = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
export const apiSpecial = {
|
export const apiSpecial = {
|
||||||
// JSON parsing error
|
|
||||||
invalidJSON: (): Response =>
|
invalidJSON: (): Response =>
|
||||||
apiError.badRequest('Invalid JSON in request body'),
|
apiError.badRequest('Invalid JSON in request body'),
|
||||||
|
|
||||||
// Missing required fields
|
|
||||||
missingRequired: (fields: string[]): Response =>
|
missingRequired: (fields: string[]): Response =>
|
||||||
apiError.badRequest(`Missing required fields: ${fields.join(', ')}`),
|
apiError.badRequest(`Missing required fields: ${fields.join(', ')}`),
|
||||||
|
|
||||||
// Empty request body
|
|
||||||
emptyBody: (): Response =>
|
emptyBody: (): Response =>
|
||||||
apiError.badRequest('Request body cannot be empty'),
|
apiError.badRequest('Request body cannot be empty'),
|
||||||
|
|
||||||
// File upload responses
|
|
||||||
uploadSuccess: (data: { url: string; filename: string; size: number; storage: string }): Response =>
|
uploadSuccess: (data: { url: string; filename: string; size: number; storage: string }): Response =>
|
||||||
apiResponse.created(data),
|
apiResponse.created(data),
|
||||||
|
|
||||||
uploadFailed: (error: string): Response =>
|
uploadFailed: (error: string): Response =>
|
||||||
apiServerError.internal(`Upload failed: ${error}`),
|
apiServerError.internal(`Upload failed: ${error}`),
|
||||||
|
|
||||||
// Contribution responses
|
|
||||||
contributionSuccess: (data: { prUrl?: string; branchName?: string; message: string }): Response =>
|
contributionSuccess: (data: { prUrl?: string; branchName?: string; message: string }): Response =>
|
||||||
apiResponse.created({ success: true, ...data }),
|
apiResponse.created({ success: true, ...data }),
|
||||||
|
|
||||||
@ -114,7 +109,6 @@ export const apiWithHeaders = {
|
|||||||
successWithHeaders: (data: any, headers: Record<string, string>): Response =>
|
successWithHeaders: (data: any, headers: Record<string, string>): Response =>
|
||||||
createAPIResponse(data, 200, headers),
|
createAPIResponse(data, 200, headers),
|
||||||
|
|
||||||
// Redirect response
|
|
||||||
redirect: (location: string, temporary: boolean = true): Response =>
|
redirect: (location: string, temporary: boolean = true): Response =>
|
||||||
new Response(null, {
|
new Response(null, {
|
||||||
status: temporary ? 302 : 301,
|
status: temporary ? 302 : 301,
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,9 +1,8 @@
|
|||||||
// src/utils/clientUtils.ts
|
// src/utils/clientUtils.ts
|
||||||
|
|
||||||
|
|
||||||
export function createToolSlug(toolName: string): string {
|
export function createToolSlug(toolName: string): string {
|
||||||
if (!toolName || typeof toolName !== 'string') {
|
if (!toolName || typeof toolName !== 'string') {
|
||||||
console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
|
console.warn('[CLIENT-UTILS] Invalid toolName provided to createToolSlug:', toolName);
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -30,6 +29,81 @@ export function isToolHosted(tool: any): boolean {
|
|||||||
tool.projectUrl.trim() !== "";
|
tool.projectUrl.trim() !== "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function sanitizeText(text: string): string {
|
||||||
|
if (typeof text !== 'string') return '';
|
||||||
|
|
||||||
|
return text
|
||||||
|
.replace(/^#{1,6}\s+/gm, '')
|
||||||
|
.replace(/^\s*[-*+]\s+/gm, '')
|
||||||
|
.replace(/^\s*\d+\.\s+/gm, '')
|
||||||
|
.replace(/\*\*(.+?)\*\*/g, '$1')
|
||||||
|
.replace(/\*(.+?)\*/g, '$1')
|
||||||
|
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
|
||||||
|
.replace(/```[\s\S]*?```/g, '[CODE BLOCK]')
|
||||||
|
.replace(/`([^`]+)`/g, '$1')
|
||||||
|
.replace(/<[^>]+>/g, '')
|
||||||
|
.replace(/\n\s*\n\s*\n/g, '\n\n')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
export function escapeHtml(text: string): string {
|
||||||
|
if (typeof text !== 'string') return String(text);
|
||||||
|
const div = document.createElement('div');
|
||||||
|
div.textContent = text;
|
||||||
|
return div.innerHTML;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function truncateText(text: string, maxLength: number): string {
|
||||||
|
if (!text || text.length <= maxLength) return text;
|
||||||
|
return text.slice(0, maxLength) + '...';
|
||||||
|
}
|
||||||
|
|
||||||
|
export function summarizeData(data: any): string {
|
||||||
|
if (data === null || data === undefined) return 'null';
|
||||||
|
if (typeof data === 'string') {
|
||||||
|
return data.length > 100 ? data.slice(0, 100) + '...' : data;
|
||||||
|
}
|
||||||
|
if (typeof data === 'number' || typeof data === 'boolean') {
|
||||||
|
return data.toString();
|
||||||
|
}
|
||||||
|
if (Array.isArray(data)) {
|
||||||
|
if (data.length === 0) return '[]';
|
||||||
|
if (data.length <= 3) return JSON.stringify(data);
|
||||||
|
return `[${data.slice(0, 3).map(i => typeof i === 'string' ? i : JSON.stringify(i)).join(', ')}, ...+${data.length - 3}]`;
|
||||||
|
}
|
||||||
|
if (typeof data === 'object') {
|
||||||
|
const keys = Object.keys(data);
|
||||||
|
if (keys.length === 0) return '{}';
|
||||||
|
if (keys.length <= 3) {
|
||||||
|
return '{' + keys.map(k => `${k}: ${typeof data[k] === 'string' ? data[k].slice(0, 20) + (data[k].length > 20 ? '...' : '') : JSON.stringify(data[k])}`).join(', ') + '}';
|
||||||
|
}
|
||||||
|
return `{${keys.slice(0, 3).join(', ')}, ...+${keys.length - 3} keys}`;
|
||||||
|
}
|
||||||
|
return String(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function formatDuration(ms: number): string {
|
||||||
|
if (ms < 1000) return '< 1s';
|
||||||
|
if (ms < 60000) return `${Math.ceil(ms / 1000)}s`;
|
||||||
|
const minutes = Math.floor(ms / 60000);
|
||||||
|
const seconds = Math.ceil((ms % 60000) / 1000);
|
||||||
|
return seconds > 0 ? `${minutes}m ${seconds}s` : `${minutes}m`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function showElement(element: HTMLElement | null): void {
|
||||||
|
if (element) {
|
||||||
|
element.style.display = 'block';
|
||||||
|
element.classList.remove('hidden');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function hideElement(element: HTMLElement | null): void {
|
||||||
|
if (element) {
|
||||||
|
element.style.display = 'none';
|
||||||
|
element.classList.add('hidden');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
interface AutocompleteOptions {
|
interface AutocompleteOptions {
|
||||||
minLength?: number;
|
minLength?: number;
|
||||||
maxResults?: number;
|
maxResults?: number;
|
||||||
@ -202,7 +276,7 @@ export class AutocompleteManager {
|
|||||||
|
|
||||||
defaultRender(item: any): string {
|
defaultRender(item: any): string {
|
||||||
const text = typeof item === 'string' ? item : item.name || item.label || item.toString();
|
const text = typeof item === 'string' ? item : item.name || item.label || item.toString();
|
||||||
return `<div class="autocomplete-item">${this.escapeHtml(text)}</div>`;
|
return `<div class="autocomplete-item">${escapeHtml(text)}</div>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
renderDropdown(): void {
|
renderDropdown(): void {
|
||||||
@ -284,8 +358,8 @@ export class AutocompleteManager {
|
|||||||
align-items: center;
|
align-items: center;
|
||||||
gap: 0.25rem;
|
gap: 0.25rem;
|
||||||
">
|
">
|
||||||
${this.escapeHtml(item)}
|
${escapeHtml(item)}
|
||||||
<button type="button" class="autocomplete-remove" data-item="${this.escapeHtml(item)}" style="
|
<button type="button" class="autocomplete-remove" data-item="${escapeHtml(item)}" style="
|
||||||
background: none;
|
background: none;
|
||||||
border: none;
|
border: none;
|
||||||
color: white;
|
color: white;
|
||||||
@ -327,12 +401,6 @@ export class AutocompleteManager {
|
|||||||
this.selectedIndex = -1;
|
this.selectedIndex = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
escapeHtml(text: string): string {
|
|
||||||
const div = document.createElement('div');
|
|
||||||
div.textContent = text;
|
|
||||||
return div.innerHTML;
|
|
||||||
}
|
|
||||||
|
|
||||||
setDataSource(newDataSource: any[]): void {
|
setDataSource(newDataSource: any[]): void {
|
||||||
this.dataSource = newDataSource;
|
this.dataSource = newDataSource;
|
||||||
}
|
}
|
||||||
|
225
src/utils/confidenceScoring.ts
Normal file
225
src/utils/confidenceScoring.ts
Normal file
@ -0,0 +1,225 @@
|
|||||||
|
// src/utils/confidenceScoring.ts
|
||||||
|
import { isToolHosted } from './clientUtils.js';
|
||||||
|
import 'dotenv/config';
|
||||||
|
|
||||||
|
export interface ConfidenceMetrics {
|
||||||
|
overall: number;
|
||||||
|
semanticRelevance: number;
|
||||||
|
taskSuitability: number;
|
||||||
|
uncertaintyFactors: string[];
|
||||||
|
strengthIndicators: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ConfidenceConfig {
|
||||||
|
semanticWeight: number;
|
||||||
|
suitabilityWeight: number;
|
||||||
|
minimumThreshold: number;
|
||||||
|
mediumThreshold: number;
|
||||||
|
highThreshold: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface AnalysisContext {
|
||||||
|
userQuery: string;
|
||||||
|
mode: string;
|
||||||
|
embeddingsSimilarities: Map<string, number>;
|
||||||
|
selectedTools?: Array<{
|
||||||
|
tool: any;
|
||||||
|
phase: string;
|
||||||
|
priority: string;
|
||||||
|
justification?: string;
|
||||||
|
taskRelevance?: number;
|
||||||
|
limitations?: string[];
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
class ConfidenceScoring {
|
||||||
|
private config: ConfidenceConfig;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.config = {
|
||||||
|
semanticWeight: this.getEnvFloat('CONFIDENCE_SEMANTIC_WEIGHT', 0.3),
|
||||||
|
suitabilityWeight: this.getEnvFloat('CONFIDENCE_SUITABILITY_WEIGHT', 0.7),
|
||||||
|
minimumThreshold: this.getEnvInt('CONFIDENCE_MINIMUM_THRESHOLD', 40),
|
||||||
|
mediumThreshold: this.getEnvInt('CONFIDENCE_MEDIUM_THRESHOLD', 60),
|
||||||
|
highThreshold: this.getEnvInt('CONFIDENCE_HIGH_THRESHOLD', 80)
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('[CONFIDENCE-SCORING] Initialized with restored config:', this.config);
|
||||||
|
}
|
||||||
|
|
||||||
|
private getEnvFloat(key: string, defaultValue: number): number {
|
||||||
|
const value = process.env[key];
|
||||||
|
return value ? parseFloat(value) : defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
private getEnvInt(key: string, defaultValue: number): number {
|
||||||
|
const value = process.env[key];
|
||||||
|
return value ? parseInt(value, 10) : defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
calculateRecommendationConfidence(
|
||||||
|
tool: any,
|
||||||
|
context: AnalysisContext,
|
||||||
|
taskRelevance: number = 70,
|
||||||
|
limitations: string[] = []
|
||||||
|
): ConfidenceMetrics {
|
||||||
|
console.log('[CONFIDENCE-SCORING] Calculating confidence for tool:', tool.name);
|
||||||
|
|
||||||
|
const rawSemanticRelevance = context.embeddingsSimilarities.has(tool.name) ?
|
||||||
|
context.embeddingsSimilarities.get(tool.name)! * 100 : 50;
|
||||||
|
|
||||||
|
let enhancedTaskSuitability = taskRelevance;
|
||||||
|
|
||||||
|
if (context.mode === 'workflow') {
|
||||||
|
const toolSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name);
|
||||||
|
if (toolSelection && tool.phases && Array.isArray(tool.phases) && tool.phases.includes(toolSelection.phase)) {
|
||||||
|
const phaseBonus = Math.min(15, 100 - taskRelevance);
|
||||||
|
enhancedTaskSuitability = Math.min(100, taskRelevance + phaseBonus);
|
||||||
|
console.log('[CONFIDENCE-SCORING] Phase alignment bonus applied:', phaseBonus);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const overall = (
|
||||||
|
rawSemanticRelevance * this.config.semanticWeight +
|
||||||
|
enhancedTaskSuitability * this.config.suitabilityWeight
|
||||||
|
);
|
||||||
|
|
||||||
|
const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, limitations, overall);
|
||||||
|
const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall);
|
||||||
|
|
||||||
|
const result = {
|
||||||
|
overall: Math.round(overall),
|
||||||
|
semanticRelevance: Math.round(rawSemanticRelevance),
|
||||||
|
taskSuitability: Math.round(enhancedTaskSuitability),
|
||||||
|
uncertaintyFactors,
|
||||||
|
strengthIndicators
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('[CONFIDENCE-SCORING] Confidence calculated:', {
|
||||||
|
tool: tool.name,
|
||||||
|
overall: result.overall,
|
||||||
|
semantic: result.semanticRelevance,
|
||||||
|
task: result.taskSuitability,
|
||||||
|
uncertaintyCount: uncertaintyFactors.length,
|
||||||
|
strengthCount: strengthIndicators.length
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private identifyUncertaintyFactors(
|
||||||
|
tool: any,
|
||||||
|
context: AnalysisContext,
|
||||||
|
limitations: string[],
|
||||||
|
confidence: number
|
||||||
|
): string[] {
|
||||||
|
const factors: string[] = [];
|
||||||
|
|
||||||
|
if (limitations?.length > 0) {
|
||||||
|
factors.push(...limitations.slice(0, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
|
||||||
|
if (similarity < 0.7) {
|
||||||
|
factors.push('Geringe semantische Ähnlichkeit zur Anfrage');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tool.skillLevel === 'expert' && /schnell|rapid|triage|urgent|sofort/i.test(context.userQuery)) {
|
||||||
|
factors.push('Experten-Tool für zeitkritisches Szenario');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tool.skillLevel === 'novice' && /komplex|erweitert|tiefgehend|advanced|forensisch/i.test(context.userQuery)) {
|
||||||
|
factors.push('Einsteiger-Tool für komplexe Analyse');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') {
|
||||||
|
factors.push('Installation und Setup erforderlich');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tool.license === 'Proprietary') {
|
||||||
|
factors.push('Kommerzielle Software - Lizenzkosten zu beachten');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (confidence < 60) {
|
||||||
|
factors.push('Moderate Gesamtbewertung - alternative Ansätze empfohlen');
|
||||||
|
}
|
||||||
|
|
||||||
|
return factors.slice(0, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
|
||||||
|
const indicators: string[] = [];
|
||||||
|
|
||||||
|
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
|
||||||
|
if (similarity >= 0.7) {
|
||||||
|
indicators.push('Sehr gute semantische Übereinstimmung mit Ihrer Anfrage');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tool.knowledgebase === true) {
|
||||||
|
indicators.push('Umfassende Dokumentation und Wissensbasis verfügbar');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isToolHosted(tool)) {
|
||||||
|
indicators.push('Sofort verfügbar über gehostete Lösung');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') {
|
||||||
|
indicators.push('Ausgewogenes Verhältnis zwischen Funktionalität und Benutzerfreundlichkeit');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tool.type === 'method' && /methodik|vorgehen|prozess|ansatz/i.test(context.userQuery)) {
|
||||||
|
indicators.push('Methodischer Ansatz passt zu Ihrer prozeduralen Anfrage');
|
||||||
|
}
|
||||||
|
|
||||||
|
return indicators.slice(0, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
calculateSelectionConfidence(result: any, candidateCount: number): number {
|
||||||
|
if (!result?.selectedTools) {
|
||||||
|
console.log('[CONFIDENCE-SCORING] No selected tools for confidence calculation');
|
||||||
|
return 30;
|
||||||
|
}
|
||||||
|
|
||||||
|
const selectionRatio = result.selectedTools.length / candidateCount;
|
||||||
|
const hasReasoning = result.reasoning && result.reasoning.length > 50;
|
||||||
|
|
||||||
|
let confidence = 60;
|
||||||
|
|
||||||
|
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
|
||||||
|
else if (selectionRatio <= 0.05) confidence -= 10;
|
||||||
|
else confidence -= 15;
|
||||||
|
|
||||||
|
if (hasReasoning) confidence += 15;
|
||||||
|
if (result.selectedConcepts?.length > 0) confidence += 5;
|
||||||
|
|
||||||
|
const finalConfidence = Math.min(95, Math.max(25, confidence));
|
||||||
|
|
||||||
|
console.log('[CONFIDENCE-SCORING] Selection confidence calculated:', {
|
||||||
|
candidateCount,
|
||||||
|
selectedCount: result.selectedTools.length,
|
||||||
|
selectionRatio: selectionRatio.toFixed(3),
|
||||||
|
hasReasoning,
|
||||||
|
confidence: finalConfidence
|
||||||
|
});
|
||||||
|
|
||||||
|
return finalConfidence;
|
||||||
|
}
|
||||||
|
|
||||||
|
getConfidenceLevel(confidence: number): 'weak' | 'moderate' | 'strong' {
|
||||||
|
if (confidence >= this.config.highThreshold) return 'strong';
|
||||||
|
if (confidence >= this.config.mediumThreshold) return 'moderate';
|
||||||
|
return 'weak';
|
||||||
|
}
|
||||||
|
|
||||||
|
getConfidenceColor(confidence: number): string {
|
||||||
|
if (confidence >= this.config.highThreshold) return 'var(--color-accent)';
|
||||||
|
if (confidence >= this.config.mediumThreshold) return 'var(--color-warning)';
|
||||||
|
return 'var(--color-error)';
|
||||||
|
}
|
||||||
|
|
||||||
|
getConfig(): ConfidenceConfig {
|
||||||
|
return { ...this.config };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const confidenceScoring = new ConfidenceScoring();
|
@ -85,7 +85,7 @@ let cachedData: ToolsData | null = null;
|
|||||||
let cachedRandomizedData: ToolsData | null = null;
|
let cachedRandomizedData: ToolsData | null = null;
|
||||||
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
|
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
|
||||||
let lastRandomizationDate: string | null = null;
|
let lastRandomizationDate: string | null = null;
|
||||||
let dataVersion: string | null = null;
|
let cachedToolsHash: string | null = null;
|
||||||
|
|
||||||
function seededRandom(seed: number): () => number {
|
function seededRandom(seed: number): () => number {
|
||||||
let x = Math.sin(seed) * 10000;
|
let x = Math.sin(seed) * 10000;
|
||||||
@ -110,17 +110,6 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
|
|||||||
return shuffled;
|
return shuffled;
|
||||||
}
|
}
|
||||||
|
|
||||||
function generateDataVersion(data: any): string {
|
|
||||||
const str = JSON.stringify(data, Object.keys(data).sort());
|
|
||||||
let hash = 0;
|
|
||||||
for (let i = 0; i < str.length; i++) {
|
|
||||||
const char = str.charCodeAt(i);
|
|
||||||
hash = ((hash << 5) - hash) + char;
|
|
||||||
hash = hash & hash;
|
|
||||||
}
|
|
||||||
return Math.abs(hash).toString(36);
|
|
||||||
}
|
|
||||||
|
|
||||||
async function loadRawData(): Promise<ToolsData> {
|
async function loadRawData(): Promise<ToolsData> {
|
||||||
if (!cachedData) {
|
if (!cachedData) {
|
||||||
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
||||||
@ -142,8 +131,9 @@ async function loadRawData(): Promise<ToolsData> {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
dataVersion = generateDataVersion(cachedData);
|
const { getToolsFileHash } = await import('./hashUtils.js');
|
||||||
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
|
cachedToolsHash = await getToolsFileHash();
|
||||||
|
console.log(`[DATA SERVICE] Loaded data with hash: ${cachedToolsHash.slice(0, 12)}...`);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
if (error instanceof z.ZodError) {
|
if (error instanceof z.ZodError) {
|
||||||
@ -234,7 +224,7 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
|
|||||||
}
|
}
|
||||||
|
|
||||||
export function getDataVersion(): string | null {
|
export function getDataVersion(): string | null {
|
||||||
return dataVersion;
|
return cachedToolsHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function clearCache(): void {
|
export function clearCache(): void {
|
||||||
@ -242,7 +232,7 @@ export function clearCache(): void {
|
|||||||
cachedRandomizedData = null;
|
cachedRandomizedData = null;
|
||||||
cachedCompressedData = null;
|
cachedCompressedData = null;
|
||||||
lastRandomizationDate = null;
|
lastRandomizationDate = null;
|
||||||
dataVersion = null;
|
cachedToolsHash = null;
|
||||||
|
|
||||||
console.log('[DATA SERVICE] Enhanced cache cleared');
|
console.log('[DATA SERVICE] Enhanced cache cleared');
|
||||||
}
|
}
|
@ -1,11 +1,11 @@
|
|||||||
// src/utils/embeddings.ts
|
// src/utils/embeddings.ts - Refactored
|
||||||
import { promises as fs } from 'fs';
|
import { promises as fs } from 'fs';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
import { getCompressedToolsDataForAI } from './dataService.js';
|
import { getCompressedToolsDataForAI } from './dataService.js';
|
||||||
import 'dotenv/config';
|
import 'dotenv/config';
|
||||||
import crypto from 'crypto';
|
import crypto from 'crypto';
|
||||||
|
|
||||||
interface EmbeddingData {
|
export interface EmbeddingData {
|
||||||
id: string;
|
id: string;
|
||||||
type: 'tool' | 'concept';
|
type: 'tool' | 'concept';
|
||||||
name: string;
|
name: string;
|
||||||
@ -20,14 +20,22 @@ interface EmbeddingData {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface SimilarityResult extends EmbeddingData {
|
||||||
|
similarity: number;
|
||||||
|
}
|
||||||
|
|
||||||
interface EmbeddingsDatabase {
|
interface EmbeddingsDatabase {
|
||||||
version: string;
|
version: string;
|
||||||
lastUpdated: number;
|
lastUpdated: number;
|
||||||
embeddings: EmbeddingData[];
|
embeddings: EmbeddingData[];
|
||||||
}
|
}
|
||||||
|
|
||||||
interface SimilarityResult extends EmbeddingData {
|
interface EmbeddingsConfig {
|
||||||
similarity: number;
|
endpoint?: string;
|
||||||
|
apiKey?: string;
|
||||||
|
model?: string;
|
||||||
|
batchSize: number;
|
||||||
|
batchDelay: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
class EmbeddingsService {
|
class EmbeddingsService {
|
||||||
@ -35,48 +43,30 @@ class EmbeddingsService {
|
|||||||
private isInitialized = false;
|
private isInitialized = false;
|
||||||
private initializationPromise: Promise<void> | null = null;
|
private initializationPromise: Promise<void> | null = null;
|
||||||
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
|
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
|
||||||
private readonly batchSize: number;
|
private config: EmbeddingsConfig;
|
||||||
private readonly batchDelay: number;
|
|
||||||
private enabled: boolean = false;
|
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
|
this.config = this.loadConfig();
|
||||||
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
|
console.log('[EMBEDDINGS-SERVICE] Initialized:', {
|
||||||
|
hasEndpoint: !!this.config.endpoint,
|
||||||
this.enabled = true;
|
hasModel: !!this.config.model
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private async checkEnabledStatus(): Promise<void> {
|
private loadConfig(): EmbeddingsConfig {
|
||||||
try {
|
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
||||||
const envEnabled = process.env.AI_EMBEDDINGS_ENABLED;
|
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
|
||||||
|
const model = process.env.AI_EMBEDDINGS_MODEL;
|
||||||
|
const batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
|
||||||
|
const batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
|
||||||
|
|
||||||
if (envEnabled === 'true') {
|
return {
|
||||||
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
endpoint,
|
||||||
const model = process.env.AI_EMBEDDINGS_MODEL;
|
apiKey,
|
||||||
|
model,
|
||||||
if (!endpoint || !model) {
|
batchSize,
|
||||||
console.warn('[EMBEDDINGS] Embeddings enabled but API configuration missing - disabling');
|
batchDelay
|
||||||
this.enabled = false;
|
};
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('[EMBEDDINGS] All requirements met - enabling embeddings');
|
|
||||||
this.enabled = true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
await fs.stat(this.embeddingsPath);
|
|
||||||
console.log('[EMBEDDINGS] Existing embeddings file found - enabling');
|
|
||||||
this.enabled = true;
|
|
||||||
} catch {
|
|
||||||
console.log('[EMBEDDINGS] Embeddings not explicitly enabled - disabling');
|
|
||||||
this.enabled = false;
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.error('[EMBEDDINGS] Error checking enabled status:', error);
|
|
||||||
this.enabled = false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async initialize(): Promise<void> {
|
async initialize(): Promise<void> {
|
||||||
@ -93,63 +83,55 @@ class EmbeddingsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private async performInitialization(): Promise<void> {
|
private async performInitialization(): Promise<void> {
|
||||||
await this.checkEnabledStatus();
|
|
||||||
if (!this.enabled) {
|
|
||||||
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const initStart = Date.now();
|
const initStart = Date.now();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
console.log('[EMBEDDINGS] Initializing embeddings system…');
|
console.log('[EMBEDDINGS-SERVICE] Starting initialization');
|
||||||
|
|
||||||
|
/*if (!this.config.enabled) {
|
||||||
|
console.log('[EMBEDDINGS-SERVICE] Service disabled via configuration');
|
||||||
|
return;
|
||||||
|
}*/
|
||||||
|
|
||||||
await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
|
await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
|
||||||
|
|
||||||
const toolsData = await getCompressedToolsDataForAI();
|
const toolsData = await getCompressedToolsDataForAI();
|
||||||
const currentDataHash = await this.hashToolsFile();
|
const { getToolsFileHash } = await import('./hashUtils.js');
|
||||||
|
const currentDataHash = await getToolsFileHash();
|
||||||
|
|
||||||
const existing = await this.loadEmbeddings();
|
const existing = await this.loadEmbeddings();
|
||||||
console.log('[EMBEDDINGS] Current hash:', currentDataHash);
|
|
||||||
console.log('[EMBEDDINGS] Existing file version:', existing?.version);
|
|
||||||
console.log('[EMBEDDINGS] Existing embeddings length:', existing?.embeddings?.length);
|
|
||||||
|
|
||||||
const cacheIsUsable =
|
const cacheIsUsable = existing &&
|
||||||
existing &&
|
|
||||||
existing.version === currentDataHash &&
|
existing.version === currentDataHash &&
|
||||||
Array.isArray(existing.embeddings) &&
|
Array.isArray(existing.embeddings) &&
|
||||||
existing.embeddings.length > 0;
|
existing.embeddings.length > 0;
|
||||||
|
|
||||||
if (cacheIsUsable) {
|
if (cacheIsUsable) {
|
||||||
console.log('[EMBEDDINGS] Using cached embeddings');
|
console.log('[EMBEDDINGS-SERVICE] Using cached embeddings');
|
||||||
this.embeddings = existing.embeddings;
|
this.embeddings = existing.embeddings;
|
||||||
} else {
|
} else {
|
||||||
console.log('[EMBEDDINGS] Generating new embeddings…');
|
console.log('[EMBEDDINGS-SERVICE] Generating new embeddings');
|
||||||
await this.generateEmbeddings(toolsData, currentDataHash);
|
await this.generateEmbeddings(toolsData, currentDataHash);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.isInitialized = true;
|
this.isInitialized = true;
|
||||||
console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings in ${Date.now() - initStart} ms`);
|
console.log(`[EMBEDDINGS-SERVICE] Initialized successfully with ${this.embeddings.length} embeddings in ${Date.now() - initStart}ms`);
|
||||||
} catch (err) {
|
|
||||||
console.error('[EMBEDDINGS] Failed to initialize:', err);
|
} catch (error) {
|
||||||
|
console.error('[EMBEDDINGS-SERVICE] Initialization failed:', error);
|
||||||
this.isInitialized = false;
|
this.isInitialized = false;
|
||||||
throw err;
|
throw error;
|
||||||
} finally {
|
} finally {
|
||||||
this.initializationPromise = null;
|
this.initializationPromise = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private async hashToolsFile(): Promise<string> {
|
|
||||||
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
|
|
||||||
const raw = await fs.readFile(file, 'utf8');
|
|
||||||
return crypto.createHash('sha256').update(raw).digest('hex');
|
|
||||||
}
|
|
||||||
|
|
||||||
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
|
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
|
||||||
try {
|
try {
|
||||||
const data = await fs.readFile(this.embeddingsPath, 'utf8');
|
const data = await fs.readFile(this.embeddingsPath, 'utf8');
|
||||||
return JSON.parse(data);
|
return JSON.parse(data);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log('[EMBEDDINGS] No existing embeddings found');
|
console.log('[EMBEDDINGS-SERVICE] No existing embeddings file found');
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -162,7 +144,7 @@ class EmbeddingsService {
|
|||||||
};
|
};
|
||||||
|
|
||||||
await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
|
await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
|
||||||
console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
|
console.log(`[EMBEDDINGS-SERVICE] Saved ${this.embeddings.length} embeddings to disk`);
|
||||||
}
|
}
|
||||||
|
|
||||||
private createContentString(item: any): string {
|
private createContentString(item: any): string {
|
||||||
@ -178,30 +160,23 @@ class EmbeddingsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
|
private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
|
||||||
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
if (!this.config.endpoint || !this.config.model) {
|
||||||
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
|
throw new Error('Missing embeddings API configuration');
|
||||||
const model = process.env.AI_EMBEDDINGS_MODEL;
|
|
||||||
|
|
||||||
if (!endpoint || !model) {
|
|
||||||
const missing: string[] = [];
|
|
||||||
if (!endpoint) missing.push('AI_EMBEDDINGS_ENDPOINT');
|
|
||||||
if (!model) missing.push('AI_EMBEDDINGS_MODEL');
|
|
||||||
throw new Error(`Missing embeddings API configuration: ${missing.join(', ')}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const headers: Record<string, string> = {
|
const headers: Record<string, string> = {
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
};
|
};
|
||||||
|
|
||||||
if (apiKey) {
|
if (this.config.apiKey) {
|
||||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const response = await fetch(endpoint, {
|
const response = await fetch(this.config.endpoint, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers,
|
headers,
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model,
|
model: this.config.model,
|
||||||
input: contents
|
input: contents
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
@ -233,11 +208,16 @@ class EmbeddingsService {
|
|||||||
const contents = allItems.map(item => this.createContentString(item));
|
const contents = allItems.map(item => this.createContentString(item));
|
||||||
this.embeddings = [];
|
this.embeddings = [];
|
||||||
|
|
||||||
for (let i = 0; i < contents.length; i += this.batchSize) {
|
console.log(`[EMBEDDINGS-SERVICE] Generating embeddings for ${contents.length} items`);
|
||||||
const batch = contents.slice(i, i + this.batchSize);
|
|
||||||
const batchItems = allItems.slice(i, i + this.batchSize);
|
|
||||||
|
|
||||||
console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
|
for (let i = 0; i < contents.length; i += this.config.batchSize) {
|
||||||
|
const batch = contents.slice(i, i + this.config.batchSize);
|
||||||
|
const batchItems = allItems.slice(i, i + this.config.batchSize);
|
||||||
|
|
||||||
|
const batchNumber = Math.ceil((i + 1) / this.config.batchSize);
|
||||||
|
const totalBatches = Math.ceil(contents.length / this.config.batchSize);
|
||||||
|
|
||||||
|
console.log(`[EMBEDDINGS-SERVICE] Processing batch ${batchNumber}/${totalBatches}`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const embeddings = await this.generateEmbeddingsBatch(batch);
|
const embeddings = await this.generateEmbeddingsBatch(batch);
|
||||||
@ -260,12 +240,12 @@ class EmbeddingsService {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
if (i + this.batchSize < contents.length) {
|
if (i + this.config.batchSize < contents.length) {
|
||||||
await new Promise(resolve => setTimeout(resolve, this.batchDelay));
|
await new Promise(resolve => setTimeout(resolve, this.config.batchDelay));
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
|
console.error(`[EMBEDDINGS-SERVICE] Batch ${batchNumber} failed:`, error);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -273,18 +253,21 @@ class EmbeddingsService {
|
|||||||
await this.saveEmbeddings(version);
|
await this.saveEmbeddings(version);
|
||||||
}
|
}
|
||||||
|
|
||||||
public async embedText(text: string): Promise<number[]> {
|
async embedText(text: string): Promise<number[]> {
|
||||||
if (!this.enabled || !this.isInitialized) {
|
if (!this.isInitialized) {
|
||||||
throw new Error('Embeddings service not available');
|
throw new Error('Embeddings service not available');
|
||||||
}
|
}
|
||||||
|
|
||||||
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
|
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
|
||||||
return embedding;
|
return embedding;
|
||||||
}
|
}
|
||||||
|
|
||||||
async waitForInitialization(): Promise<void> {
|
async waitForInitialization(): Promise<void> {
|
||||||
await this.checkEnabledStatus();
|
/*if (!this.config.enabled) {
|
||||||
|
return Promise.resolve();
|
||||||
|
}*/
|
||||||
|
|
||||||
if (!this.enabled || this.isInitialized) {
|
if (this.isInitialized) {
|
||||||
return Promise.resolve();
|
return Promise.resolve();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -296,13 +279,6 @@ class EmbeddingsService {
|
|||||||
return this.initialize();
|
return this.initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
async forceRecheckEnvironment(): Promise<void> {
|
|
||||||
this.enabled = false;
|
|
||||||
this.isInitialized = false;
|
|
||||||
await this.checkEnabledStatus();
|
|
||||||
console.log('[EMBEDDINGS] Environment status re-checked, enabled:', this.enabled);
|
|
||||||
}
|
|
||||||
|
|
||||||
private cosineSimilarity(a: number[], b: number[]): number {
|
private cosineSimilarity(a: number[], b: number[]): number {
|
||||||
let dotProduct = 0;
|
let dotProduct = 0;
|
||||||
let normA = 0;
|
let normA = 0;
|
||||||
@ -318,145 +294,62 @@ class EmbeddingsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<SimilarityResult[]> {
|
async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<SimilarityResult[]> {
|
||||||
if (!this.enabled) {
|
/*if (!this.config.enabled) {
|
||||||
console.log('[EMBEDDINGS] Service disabled for similarity search');
|
console.log('[EMBEDDINGS-SERVICE] Service disabled, returning empty results');
|
||||||
|
return [];
|
||||||
|
}*/
|
||||||
|
|
||||||
|
if (!this.isInitialized || this.embeddings.length === 0) {
|
||||||
|
console.log('[EMBEDDINGS-SERVICE] Not initialized or no embeddings available');
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (this.isInitialized && this.embeddings.length > 0) {
|
console.log(`[EMBEDDINGS-SERVICE] Finding similar items for query: "${query}"`);
|
||||||
console.log(`[EMBEDDINGS] Using embeddings data for similarity search: ${query}`);
|
|
||||||
|
|
||||||
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
|
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
|
||||||
const queryEmbedding = queryEmbeddings[0];
|
const queryEmbedding = queryEmbeddings[0];
|
||||||
|
|
||||||
console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
|
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
|
||||||
|
...item,
|
||||||
|
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
||||||
|
}));
|
||||||
|
|
||||||
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
|
const topScore = Math.max(...similarities.map(s => s.similarity));
|
||||||
...item,
|
const dynamicThreshold = Math.max(threshold, topScore * 0.85);
|
||||||
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
|
||||||
}));
|
|
||||||
|
|
||||||
const topScore = Math.max(...similarities.map(s => s.similarity));
|
const results = similarities
|
||||||
const dynamicCutOff = Math.max(threshold, topScore * 0.85);
|
.filter(item => item.similarity >= dynamicThreshold)
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.slice(0, maxResults);
|
||||||
|
|
||||||
const results = similarities
|
console.log(`[EMBEDDINGS-SERVICE] Found ${results.length} similar items (threshold: ${dynamicThreshold.toFixed(3)})`);
|
||||||
.filter(item => item.similarity >= dynamicCutOff)
|
|
||||||
.sort((a, b) => b.similarity - a.similarity)
|
|
||||||
.slice(0, maxResults);
|
|
||||||
|
|
||||||
|
if (results.length > 0) {
|
||||||
const orderingValid = results.every((item, index) => {
|
console.log('[EMBEDDINGS-SERVICE] Top 5 matches:');
|
||||||
if (index === 0) return true;
|
results.slice(0, 5).forEach((item, idx) => {
|
||||||
return item.similarity <= results[index - 1].similarity;
|
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!orderingValid) {
|
|
||||||
console.error('[EMBEDDINGS] CRITICAL: Similarity ordering is broken!');
|
|
||||||
results.forEach((item, idx) => {
|
|
||||||
console.error(` ${idx}: ${item.name} = ${item.similarity.toFixed(4)}`);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
|
|
||||||
if (results.length > 0) {
|
|
||||||
console.log('[EMBEDDINGS] Top 10 similarity matches:');
|
|
||||||
results.slice(0, 10).forEach((item, idx) => {
|
|
||||||
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
|
|
||||||
});
|
|
||||||
|
|
||||||
const topSimilarity = results[0].similarity;
|
|
||||||
const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
|
|
||||||
if (hasHigherSimilarity) {
|
|
||||||
console.error('[EMBEDDINGS] CRITICAL: Top result is not actually the highest similarity!');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return results;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
console.log(`[EMBEDDINGS] No embeddings data, using fallback text matching: ${query}`);
|
|
||||||
|
|
||||||
const { getToolsData } = await import('./dataService.js');
|
|
||||||
const toolsData = await getToolsData();
|
|
||||||
|
|
||||||
const queryLower = query.toLowerCase();
|
|
||||||
const queryWords = queryLower.split(/\s+/).filter(w => w.length > 2);
|
|
||||||
|
|
||||||
const similarities: SimilarityResult[] = toolsData.tools
|
|
||||||
.map((tool: any) => {
|
|
||||||
let similarity = 0;
|
|
||||||
|
|
||||||
if (tool.name.toLowerCase().includes(queryLower)) {
|
|
||||||
similarity += 0.8;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tool.description && tool.description.toLowerCase().includes(queryLower)) {
|
|
||||||
similarity += 0.6;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tool.tags && Array.isArray(tool.tags)) {
|
|
||||||
const matchingTags = tool.tags.filter((tag: string) =>
|
|
||||||
tag.toLowerCase().includes(queryLower) || queryLower.includes(tag.toLowerCase())
|
|
||||||
);
|
|
||||||
if (tool.tags.length > 0) {
|
|
||||||
similarity += (matchingTags.length / tool.tags.length) * 0.4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const toolText = `${tool.name} ${tool.description || ''} ${(tool.tags || []).join(' ')}`.toLowerCase();
|
|
||||||
const matchingWords = queryWords.filter(word => toolText.includes(word));
|
|
||||||
if (queryWords.length > 0) {
|
|
||||||
similarity += (matchingWords.length / queryWords.length) * 0.3;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
id: `tool_${tool.name.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase()}`,
|
|
||||||
type: 'tool' as const,
|
|
||||||
name: tool.name,
|
|
||||||
content: toolText,
|
|
||||||
embedding: [],
|
|
||||||
metadata: {
|
|
||||||
domains: tool.domains || [],
|
|
||||||
phases: tool.phases || [],
|
|
||||||
tags: tool.tags || [],
|
|
||||||
skillLevel: tool.skillLevel,
|
|
||||||
type: tool.type
|
|
||||||
},
|
|
||||||
similarity: Math.min(similarity, 1.0)
|
|
||||||
};
|
|
||||||
})
|
|
||||||
.filter(item => item.similarity >= threshold)
|
|
||||||
.sort((a, b) => b.similarity - a.similarity)
|
|
||||||
.slice(0, maxResults);
|
|
||||||
|
|
||||||
console.log(`[EMBEDDINGS] Fallback found ${similarities.length} similar items`);
|
|
||||||
return similarities;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[EMBEDDINGS] Failed to find similar items:', error);
|
console.error('[EMBEDDINGS-SERVICE] Similarity search failed:', error);
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
isEnabled(): boolean {
|
getStats(): {initialized: boolean; count: number } {
|
||||||
if (!this.enabled && !this.isInitialized) {
|
|
||||||
this.checkEnabledStatus().catch(console.error);
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.enabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
getStats(): { enabled: boolean; initialized: boolean; count: number } {
|
|
||||||
return {
|
return {
|
||||||
enabled: this.enabled,
|
|
||||||
initialized: this.isInitialized,
|
initialized: this.isInitialized,
|
||||||
count: this.embeddings.length
|
count: this.embeddings.length
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getConfig(): EmbeddingsConfig {
|
||||||
|
return { ...this.config };
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const embeddingsService = new EmbeddingsService();
|
export const embeddingsService = new EmbeddingsService();
|
||||||
|
|
||||||
export { embeddingsService, type EmbeddingData, type SimilarityResult };
|
|
20
src/utils/hashUtils.ts
Normal file
20
src/utils/hashUtils.ts
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
// src/utils/hashUtils.ts
|
||||||
|
import { promises as fs } from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import crypto from 'crypto';
|
||||||
|
|
||||||
|
export async function getToolsFileHash(): Promise<string> {
|
||||||
|
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
|
||||||
|
const raw = await fs.readFile(file, 'utf8');
|
||||||
|
return crypto.createHash('sha256').update(raw).digest('hex');
|
||||||
|
}
|
||||||
|
|
||||||
|
export function getToolsFileHashSync(): string | null {
|
||||||
|
try {
|
||||||
|
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
|
||||||
|
const raw = require('fs').readFileSync(file, 'utf8');
|
||||||
|
return crypto.createHash('sha256').update(raw).digest('hex');
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
356
src/utils/jsonUtils.ts
Normal file
356
src/utils/jsonUtils.ts
Normal file
@ -0,0 +1,356 @@
|
|||||||
|
// src/utils/jsonUtils.ts
|
||||||
|
export class JSONParser {
|
||||||
|
static safeParseJSON(jsonString: string, fallback: any = null): any {
|
||||||
|
try {
|
||||||
|
let cleaned = jsonString.trim();
|
||||||
|
|
||||||
|
const jsonBlockPatterns = [
|
||||||
|
/```json\s*([\s\S]*?)\s*```/i,
|
||||||
|
/```\s*([\s\S]*?)\s*```/i,
|
||||||
|
/\{[\s\S]*\}/,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const pattern of jsonBlockPatterns) {
|
||||||
|
const match = cleaned.match(pattern);
|
||||||
|
if (match) {
|
||||||
|
cleaned = match[1] || match[0];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
|
||||||
|
console.warn('[JSON-PARSER] JSON appears truncated, attempting recovery');
|
||||||
|
cleaned = this.repairTruncatedJSON(cleaned);
|
||||||
|
}
|
||||||
|
|
||||||
|
const parsed = JSON.parse(cleaned);
|
||||||
|
|
||||||
|
if (parsed && typeof parsed === 'object') {
|
||||||
|
if (!parsed.selectedTools) parsed.selectedTools = [];
|
||||||
|
if (!parsed.selectedConcepts) parsed.selectedConcepts = [];
|
||||||
|
if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
|
||||||
|
if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return parsed;
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('[JSON-PARSER] JSON parsing failed:', error.message);
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static repairTruncatedJSON(cleaned: string): string {
|
||||||
|
let braceCount = 0;
|
||||||
|
let bracketCount = 0;
|
||||||
|
let inString = false;
|
||||||
|
let escaped = false;
|
||||||
|
let lastCompleteStructure = '';
|
||||||
|
|
||||||
|
for (let i = 0; i < cleaned.length; i++) {
|
||||||
|
const char = cleaned[i];
|
||||||
|
|
||||||
|
if (escaped) {
|
||||||
|
escaped = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === '\\') {
|
||||||
|
escaped = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === '"' && !escaped) {
|
||||||
|
inString = !inString;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inString) {
|
||||||
|
if (char === '{') braceCount++;
|
||||||
|
if (char === '}') braceCount--;
|
||||||
|
if (char === '[') bracketCount++;
|
||||||
|
if (char === ']') bracketCount--;
|
||||||
|
|
||||||
|
if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
|
||||||
|
lastCompleteStructure = cleaned.substring(0, i + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastCompleteStructure) {
|
||||||
|
return lastCompleteStructure;
|
||||||
|
} else {
|
||||||
|
if (braceCount > 0) cleaned += '}';
|
||||||
|
if (bracketCount > 0) cleaned += ']';
|
||||||
|
return cleaned;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static extractToolsFromMalformedJSON(jsonString: string): { selectedTools: string[]; selectedConcepts: string[] } {
|
||||||
|
const selectedTools: string[] = [];
|
||||||
|
const selectedConcepts: string[] = [];
|
||||||
|
|
||||||
|
const toolsMatch = jsonString.match(/"selectedTools"\s*:\s*\[([\s\S]*?)\]/i);
|
||||||
|
if (toolsMatch) {
|
||||||
|
const toolMatches = toolsMatch[1].match(/"([^"]+)"/g);
|
||||||
|
if (toolMatches) {
|
||||||
|
selectedTools.push(...toolMatches.map(match => match.replace(/"/g, '')));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const conceptsMatch = jsonString.match(/"selectedConcepts"\s*:\s*\[([\s\S]*?)\]/i);
|
||||||
|
if (conceptsMatch) {
|
||||||
|
const conceptMatches = conceptsMatch[1].match(/"([^"]+)"/g);
|
||||||
|
if (conceptMatches) {
|
||||||
|
selectedConcepts.push(...conceptMatches.map(match => match.replace(/"/g, '')));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (selectedTools.length === 0 && selectedConcepts.length === 0) {
|
||||||
|
const allMatches = jsonString.match(/"([^"]+)"/g);
|
||||||
|
if (allMatches) {
|
||||||
|
const possibleNames = allMatches
|
||||||
|
.map(match => match.replace(/"/g, ''))
|
||||||
|
.filter(name =>
|
||||||
|
name.length > 2 &&
|
||||||
|
!['selectedTools', 'selectedConcepts', 'reasoning'].includes(name) &&
|
||||||
|
!name.includes(':') &&
|
||||||
|
!name.match(/^\d+$/)
|
||||||
|
)
|
||||||
|
.slice(0, 15);
|
||||||
|
|
||||||
|
selectedTools.push(...possibleNames);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { selectedTools, selectedConcepts };
|
||||||
|
}
|
||||||
|
|
||||||
|
static secureParseJSON(jsonString: string, maxSize: number = 10 * 1024 * 1024): any {
|
||||||
|
if (typeof jsonString !== 'string') {
|
||||||
|
throw new Error('Input must be a string');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (jsonString.length > maxSize) {
|
||||||
|
throw new Error(`JSON string too large (${jsonString.length} bytes, max ${maxSize})`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const suspiciousPatterns = [
|
||||||
|
/<script/i,
|
||||||
|
/javascript:/i,
|
||||||
|
/eval\(/i,
|
||||||
|
/function\s*\(/i,
|
||||||
|
/__proto__/i,
|
||||||
|
/constructor/i
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const pattern of suspiciousPatterns) {
|
||||||
|
if (pattern.test(jsonString)) {
|
||||||
|
throw new Error('Potentially malicious content detected in JSON');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(jsonString);
|
||||||
|
|
||||||
|
if (typeof parsed !== 'object' || parsed === null) {
|
||||||
|
throw new Error('JSON must be an object');
|
||||||
|
}
|
||||||
|
|
||||||
|
return parsed;
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof SyntaxError) {
|
||||||
|
throw new Error(`Invalid JSON syntax: ${error.message}`);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static sanitizeForAudit(obj: any, maxDepth: number = 5, currentDepth: number = 0): any {
|
||||||
|
if (currentDepth >= maxDepth) {
|
||||||
|
return '[Max depth reached]';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (obj === null || obj === undefined) {
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof obj === 'string') {
|
||||||
|
if (obj.length > 500) {
|
||||||
|
return obj.slice(0, 500) + '...[truncated]';
|
||||||
|
}
|
||||||
|
return obj.replace(/<script[\s\S]*?<\/script>/gi, '[script removed]');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof obj === 'number' || typeof obj === 'boolean') {
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(obj)) {
|
||||||
|
if (obj.length > 20) {
|
||||||
|
return [
|
||||||
|
...obj.slice(0, 20).map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1)),
|
||||||
|
`...[${obj.length - 20} more items]`
|
||||||
|
];
|
||||||
|
}
|
||||||
|
return obj.map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof obj === 'object') {
|
||||||
|
const keys = Object.keys(obj);
|
||||||
|
if (keys.length > 50) {
|
||||||
|
const sanitized: any = {};
|
||||||
|
keys.slice(0, 50).forEach(key => {
|
||||||
|
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
|
||||||
|
});
|
||||||
|
sanitized['[truncated]'] = `${keys.length - 50} more properties`;
|
||||||
|
return sanitized;
|
||||||
|
}
|
||||||
|
|
||||||
|
const sanitized: any = {};
|
||||||
|
keys.forEach(key => {
|
||||||
|
if (['__proto__', 'constructor', 'prototype'].includes(key)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
|
||||||
|
});
|
||||||
|
return sanitized;
|
||||||
|
}
|
||||||
|
|
||||||
|
return String(obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
static validateAuditExportStructure(data: any): { isValid: boolean; errors: string[] } {
|
||||||
|
const errors: string[] = [];
|
||||||
|
|
||||||
|
if (!data || typeof data !== 'object') {
|
||||||
|
errors.push('Export data must be an object');
|
||||||
|
return { isValid: false, errors };
|
||||||
|
}
|
||||||
|
|
||||||
|
const requiredProps = ['metadata', 'recommendation', 'auditTrail'];
|
||||||
|
for (const prop of requiredProps) {
|
||||||
|
if (!(prop in data)) {
|
||||||
|
errors.push(`Missing required property: ${prop}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.metadata && typeof data.metadata === 'object') {
|
||||||
|
const requiredMetadataProps = ['timestamp', 'version', 'userQuery', 'mode'];
|
||||||
|
for (const prop of requiredMetadataProps) {
|
||||||
|
if (!(prop in data.metadata)) {
|
||||||
|
errors.push(`Missing required metadata property: ${prop}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
errors.push('Invalid metadata structure');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Array.isArray(data.auditTrail)) {
|
||||||
|
errors.push('auditTrail must be an array');
|
||||||
|
} else {
|
||||||
|
data.auditTrail.forEach((entry: any, index: number) => {
|
||||||
|
if (!entry || typeof entry !== 'object') {
|
||||||
|
errors.push(`Audit entry ${index} is not a valid object`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const requiredEntryProps = ['timestamp', 'phase', 'action', 'confidence', 'processingTimeMs'];
|
||||||
|
for (const prop of requiredEntryProps) {
|
||||||
|
if (!(prop in entry)) {
|
||||||
|
errors.push(`Audit entry ${index} missing required property: ${prop}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
isValid: errors.length === 0,
|
||||||
|
errors
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static prepareAuditExport(
|
||||||
|
recommendation: any,
|
||||||
|
userQuery: string,
|
||||||
|
mode: string,
|
||||||
|
auditTrail: any[] = [],
|
||||||
|
additionalMetadata: any = {}
|
||||||
|
): any {
|
||||||
|
return {
|
||||||
|
metadata: {
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
version: "1.0",
|
||||||
|
userQuery: userQuery.slice(0, 1000),
|
||||||
|
mode,
|
||||||
|
exportedBy: 'ForensicPathways',
|
||||||
|
toolsDataHash: additionalMetadata.toolsDataHash || 'unknown',
|
||||||
|
aiModel: additionalMetadata.aiModel || 'unknown',
|
||||||
|
aiParameters: additionalMetadata.aiParameters || {},
|
||||||
|
processingStats: additionalMetadata.processingStats || {}
|
||||||
|
},
|
||||||
|
recommendation: this.sanitizeForAudit(recommendation, 6),
|
||||||
|
auditTrail: auditTrail.map(entry => this.sanitizeForAudit(entry, 4)),
|
||||||
|
rawContext: {
|
||||||
|
selectedTools: additionalMetadata.selectedTools || [],
|
||||||
|
backgroundKnowledge: additionalMetadata.backgroundKnowledge || [],
|
||||||
|
contextHistory: additionalMetadata.contextHistory || [],
|
||||||
|
embeddingsSimilarities: additionalMetadata.embeddingsSimilarities || {}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static validateUploadedAnalysis(data: any): { isValid: boolean; issues: string[]; warnings: string[] } {
|
||||||
|
const issues: string[] = [];
|
||||||
|
const warnings: string[] = [];
|
||||||
|
|
||||||
|
const structureValidation = this.validateAuditExportStructure(data);
|
||||||
|
if (!structureValidation.isValid) {
|
||||||
|
issues.push(...structureValidation.errors);
|
||||||
|
return { isValid: false, issues, warnings };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.metadata) {
|
||||||
|
const timestamp = new Date(data.metadata.timestamp);
|
||||||
|
if (isNaN(timestamp.getTime())) {
|
||||||
|
warnings.push('Invalid timestamp in metadata');
|
||||||
|
} else {
|
||||||
|
const age = Date.now() - timestamp.getTime();
|
||||||
|
const maxAge = 30 * 24 * 60 * 60 * 1000; // 30 days
|
||||||
|
if (age > maxAge) {
|
||||||
|
warnings.push(`Analysis is ${Math.floor(age / (24 * 60 * 60 * 1000))} days old`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!['workflow', 'tool'].includes(data.metadata.mode)) {
|
||||||
|
warnings.push(`Unknown analysis mode: ${data.metadata.mode}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(data.auditTrail)) {
|
||||||
|
const aiDecisions = data.auditTrail.filter(e => e.action === 'ai-decision').length;
|
||||||
|
const toolSelections = data.auditTrail.filter(e => e.action === 'selection-decision').length;
|
||||||
|
|
||||||
|
if (aiDecisions === 0) {
|
||||||
|
warnings.push('No AI decisions found in audit trail');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (toolSelections === 0) {
|
||||||
|
warnings.push('No tool selections found in audit trail');
|
||||||
|
}
|
||||||
|
|
||||||
|
const entriesWithConfidence = data.auditTrail.filter(e => typeof e.confidence === 'number').length;
|
||||||
|
const confidenceRatio = entriesWithConfidence / data.auditTrail.length;
|
||||||
|
|
||||||
|
if (confidenceRatio < 0.8) {
|
||||||
|
warnings.push(`Only ${Math.round(confidenceRatio * 100)}% of audit entries have confidence scores`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
isValid: issues.length === 0,
|
||||||
|
issues,
|
||||||
|
warnings
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
@ -1,22 +0,0 @@
|
|||||||
// src/utils/toolHelpers.ts
|
|
||||||
|
|
||||||
export interface Tool {
|
|
||||||
name: string;
|
|
||||||
type?: 'software' | 'method' | 'concept';
|
|
||||||
projectUrl?: string | null;
|
|
||||||
license?: string;
|
|
||||||
knowledgebase?: boolean;
|
|
||||||
domains?: string[];
|
|
||||||
phases?: string[];
|
|
||||||
platforms?: string[];
|
|
||||||
skillLevel?: string;
|
|
||||||
description?: string;
|
|
||||||
tags?: string[];
|
|
||||||
related_concepts?: string[];
|
|
||||||
}
|
|
||||||
|
|
||||||
export {
|
|
||||||
createToolSlug,
|
|
||||||
findToolByIdentifier,
|
|
||||||
isToolHosted
|
|
||||||
} from './clientUtils.js';
|
|
346
src/utils/toolSelector.ts
Normal file
346
src/utils/toolSelector.ts
Normal file
@ -0,0 +1,346 @@
|
|||||||
|
// src/utils/toolSelector.ts
|
||||||
|
import { aiService } from './aiService.js';
|
||||||
|
import { embeddingsService, type SimilarityResult } from './embeddings.js';
|
||||||
|
import { confidenceScoring } from './confidenceScoring.js';
|
||||||
|
import { JSONParser } from './jsonUtils.js';
|
||||||
|
import { getPrompt } from '../config/prompts.js';
|
||||||
|
import 'dotenv/config';
|
||||||
|
|
||||||
|
export interface ToolSelectionConfig {
|
||||||
|
maxSelectedItems: number;
|
||||||
|
embeddingCandidates: number;
|
||||||
|
similarityThreshold: number;
|
||||||
|
embeddingSelectionLimit: number;
|
||||||
|
embeddingConceptsLimit: number;
|
||||||
|
noEmbeddingsToolLimit: number;
|
||||||
|
noEmbeddingsConceptLimit: number;
|
||||||
|
embeddingsMinTools: number;
|
||||||
|
embeddingsMaxReductionRatio: number;
|
||||||
|
methodSelectionRatio: number;
|
||||||
|
softwareSelectionRatio: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SelectionContext {
|
||||||
|
userQuery: string;
|
||||||
|
mode: string;
|
||||||
|
embeddingsSimilarities: Map<string, number>;
|
||||||
|
seenToolNames: Set<string>;
|
||||||
|
selectedTools?: Array<{
|
||||||
|
tool: any;
|
||||||
|
phase: string;
|
||||||
|
priority: string;
|
||||||
|
justification?: string;
|
||||||
|
taskRelevance?: number;
|
||||||
|
limitations?: string[];
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ToolSelectionResult {
|
||||||
|
selectedTools: any[];
|
||||||
|
selectedConcepts: any[];
|
||||||
|
confidence: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
class ToolSelector {
|
||||||
|
private config: ToolSelectionConfig;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.config = {
|
||||||
|
maxSelectedItems: this.getEnvInt('AI_MAX_SELECTED_ITEMS', 25),
|
||||||
|
embeddingCandidates: this.getEnvInt('AI_EMBEDDING_CANDIDATES', 50),
|
||||||
|
similarityThreshold: this.getEnvFloat('AI_SIMILARITY_THRESHOLD', 0.3),
|
||||||
|
embeddingSelectionLimit: this.getEnvInt('AI_EMBEDDING_SELECTION_LIMIT', 30),
|
||||||
|
embeddingConceptsLimit: this.getEnvInt('AI_EMBEDDING_CONCEPTS_LIMIT', 15),
|
||||||
|
noEmbeddingsToolLimit: this.getEnvInt('AI_NO_EMBEDDINGS_TOOL_LIMIT', 25),
|
||||||
|
noEmbeddingsConceptLimit: this.getEnvInt('AI_NO_EMBEDDINGS_CONCEPT_LIMIT', 10),
|
||||||
|
embeddingsMinTools: this.getEnvInt('AI_EMBEDDINGS_MIN_TOOLS', 8),
|
||||||
|
embeddingsMaxReductionRatio: this.getEnvFloat('AI_EMBEDDINGS_MAX_REDUCTION_RATIO', 0.75),
|
||||||
|
methodSelectionRatio: this.getEnvFloat('AI_METHOD_SELECTION_RATIO', 0.4),
|
||||||
|
softwareSelectionRatio: this.getEnvFloat('AI_SOFTWARE_SELECTION_RATIO', 0.5)
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('[TOOL-SELECTOR] Initialized with config:', this.config);
|
||||||
|
}
|
||||||
|
|
||||||
|
private getEnvInt(key: string, defaultValue: number): number {
|
||||||
|
const value = process.env[key];
|
||||||
|
return value ? parseInt(value, 10) : defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
private getEnvFloat(key: string, defaultValue: number): number {
|
||||||
|
const value = process.env[key];
|
||||||
|
return value ? parseFloat(value) : defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
async getIntelligentCandidates(
|
||||||
|
userQuery: string,
|
||||||
|
toolsData: any,
|
||||||
|
mode: string,
|
||||||
|
context: SelectionContext
|
||||||
|
): Promise<{
|
||||||
|
tools: any[];
|
||||||
|
concepts: any[];
|
||||||
|
domains: any[];
|
||||||
|
phases: any[];
|
||||||
|
'domain-agnostic-software': any[];
|
||||||
|
}> {
|
||||||
|
console.log('[TOOL-SELECTOR] Getting intelligent candidates for query');
|
||||||
|
|
||||||
|
let candidateTools: any[] = [];
|
||||||
|
let candidateConcepts: any[] = [];
|
||||||
|
|
||||||
|
context.embeddingsSimilarities.clear();
|
||||||
|
|
||||||
|
try {
|
||||||
|
await embeddingsService.waitForInitialization();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[TOOL-SELECTOR] Embeddings initialization failed:', error);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
|
||||||
|
|
||||||
|
const embeddingsSearchStart = Date.now();
|
||||||
|
|
||||||
|
const similarItems = await embeddingsService.findSimilar(
|
||||||
|
userQuery,
|
||||||
|
this.config.embeddingCandidates,
|
||||||
|
this.config.similarityThreshold
|
||||||
|
) as SimilarityResult[];
|
||||||
|
|
||||||
|
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
|
||||||
|
|
||||||
|
const { auditService } = await import('./auditService.js');
|
||||||
|
const { getDataVersion } = await import('./dataService.js');
|
||||||
|
|
||||||
|
const toolsDataHash = getDataVersion() || 'unknown';
|
||||||
|
|
||||||
|
auditService.addEmbeddingsSearch(
|
||||||
|
userQuery,
|
||||||
|
similarItems,
|
||||||
|
this.config.similarityThreshold,
|
||||||
|
embeddingsSearchStart,
|
||||||
|
{
|
||||||
|
toolsDataHash: toolsDataHash,
|
||||||
|
selectionPhase: 'initial-candidate-selection',
|
||||||
|
candidateLimit: this.config.embeddingCandidates,
|
||||||
|
mode: mode,
|
||||||
|
reasoning: `Initiale semantische Suche für ${mode}-Modus - Reduzierung der ${toolsData.tools.length} verfügbaren Tools auf ${similarItems.length} relevante Kandidaten`
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
similarItems.forEach(item => {
|
||||||
|
context.embeddingsSimilarities.set(item.name, item.similarity);
|
||||||
|
});
|
||||||
|
|
||||||
|
const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool]));
|
||||||
|
const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept]));
|
||||||
|
|
||||||
|
const similarTools = similarItems
|
||||||
|
.filter((item: any) => item.type === 'tool')
|
||||||
|
.map((item: any) => toolsMap.get(item.name))
|
||||||
|
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
|
||||||
|
|
||||||
|
const similarConcepts = similarItems
|
||||||
|
.filter((item: any) => item.type === 'concept')
|
||||||
|
.map((item: any) => conceptsMap.get(item.name))
|
||||||
|
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
|
||||||
|
|
||||||
|
const totalAvailableTools = toolsData.tools.length;
|
||||||
|
const reductionRatio = similarTools.length / totalAvailableTools;
|
||||||
|
|
||||||
|
if (similarTools.length >= this.config.embeddingsMinTools && reductionRatio <= this.config.embeddingsMaxReductionRatio) {
|
||||||
|
candidateTools = similarTools;
|
||||||
|
candidateConcepts = similarConcepts;
|
||||||
|
|
||||||
|
console.log('[TOOL-SELECTOR] Using embeddings filtering:', totalAvailableTools, '→', similarTools.length, 'tools');
|
||||||
|
} else {
|
||||||
|
console.log('[TOOL-SELECTOR] Embeddings filtering insufficient, using full dataset');
|
||||||
|
candidateTools = toolsData.tools;
|
||||||
|
candidateConcepts = toolsData.concepts;
|
||||||
|
}
|
||||||
|
|
||||||
|
const selection = await this.performAISelection(
|
||||||
|
userQuery,
|
||||||
|
candidateTools,
|
||||||
|
candidateConcepts,
|
||||||
|
mode,
|
||||||
|
context
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
tools: selection.selectedTools,
|
||||||
|
concepts: selection.selectedConcepts,
|
||||||
|
domains: toolsData.domains,
|
||||||
|
phases: toolsData.phases,
|
||||||
|
'domain-agnostic-software': toolsData['domain-agnostic-software']
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async performAISelection(
|
||||||
|
userQuery: string,
|
||||||
|
candidateTools: any[],
|
||||||
|
candidateConcepts: any[],
|
||||||
|
mode: string,
|
||||||
|
context: SelectionContext
|
||||||
|
): Promise<ToolSelectionResult> {
|
||||||
|
console.log('[TOOL-SELECTOR] Performing AI selection');
|
||||||
|
|
||||||
|
const candidateMethods = candidateTools.filter((tool: any) => tool && tool.type === 'method');
|
||||||
|
const candidateSoftware = candidateTools.filter((tool: any) => tool && tool.type === 'software');
|
||||||
|
|
||||||
|
console.log('[TOOL-SELECTOR] Candidates:', candidateMethods.length, 'methods,', candidateSoftware.length, 'software,', candidateConcepts.length, 'concepts');
|
||||||
|
|
||||||
|
const methodsWithFullData = candidateMethods.map(this.createToolData);
|
||||||
|
const softwareWithFullData = candidateSoftware.map(this.createToolData);
|
||||||
|
const conceptsWithFullData = candidateConcepts.map(this.createConceptData);
|
||||||
|
|
||||||
|
const maxTools = Math.min(this.config.embeddingSelectionLimit, this.config.noEmbeddingsToolLimit);
|
||||||
|
const maxConcepts = Math.min(this.config.embeddingConceptsLimit, this.config.noEmbeddingsConceptLimit);
|
||||||
|
const methodLimit = Math.ceil(maxTools * this.config.methodSelectionRatio);
|
||||||
|
const softwareLimit = Math.floor(maxTools * this.config.softwareSelectionRatio);
|
||||||
|
|
||||||
|
const toolsToSend: any[] = [
|
||||||
|
...methodsWithFullData.slice(0, methodLimit),
|
||||||
|
...softwareWithFullData.slice(0, softwareLimit),
|
||||||
|
];
|
||||||
|
|
||||||
|
const remainingCapacity = maxTools - toolsToSend.length;
|
||||||
|
if (remainingCapacity > 0) {
|
||||||
|
const extraMethods = methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity);
|
||||||
|
const extraSoftware = softwareWithFullData.slice(softwareLimit, softwareLimit + (remainingCapacity - extraMethods.length));
|
||||||
|
toolsToSend.push(...extraMethods, ...extraSoftware);
|
||||||
|
}
|
||||||
|
|
||||||
|
const conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
|
||||||
|
|
||||||
|
const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
|
||||||
|
const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
|
||||||
|
|
||||||
|
console.log('[TOOL-SELECTOR] Sending to AI:',
|
||||||
|
toolsToSend.filter((t: any) => t.type === 'method').length, 'methods,',
|
||||||
|
toolsToSend.filter((t: any) => t.type === 'software').length, 'software,',
|
||||||
|
conceptsToSend.length, 'concepts'
|
||||||
|
);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await aiService.callAI(prompt);
|
||||||
|
const result = JSONParser.safeParseJSON(response.content, null);
|
||||||
|
|
||||||
|
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
|
||||||
|
console.error('[TOOL-SELECTOR] AI selection returned invalid structure');
|
||||||
|
throw new Error('AI selection failed to return valid tool and concept selection');
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
|
||||||
|
if (totalSelected === 0) {
|
||||||
|
throw new Error('AI selection returned empty selection');
|
||||||
|
}
|
||||||
|
|
||||||
|
const toolsMap = new Map(candidateTools.map((tool: any) => [tool.name, tool]));
|
||||||
|
const conceptsMap = new Map(candidateConcepts.map((concept: any) => [concept.name, concept]));
|
||||||
|
|
||||||
|
const selectedTools = result.selectedTools
|
||||||
|
.map((name: string) => toolsMap.get(name))
|
||||||
|
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
|
||||||
|
|
||||||
|
const selectedConcepts = result.selectedConcepts
|
||||||
|
.map((name: string) => conceptsMap.get(name))
|
||||||
|
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
|
||||||
|
|
||||||
|
const selectedMethods = selectedTools.filter((t: any) => t && t.type === 'method');
|
||||||
|
const selectedSoftware = selectedTools.filter((t: any) => t && t.type === 'software');
|
||||||
|
|
||||||
|
console.log('[TOOL-SELECTOR] AI selected:', selectedMethods.length, 'methods,', selectedSoftware.length, 'software,', selectedConcepts.length, 'concepts');
|
||||||
|
|
||||||
|
const confidence = confidenceScoring.calculateSelectionConfidence(
|
||||||
|
result,
|
||||||
|
candidateTools.length + candidateConcepts.length
|
||||||
|
);
|
||||||
|
|
||||||
|
return { selectedTools, selectedConcepts, confidence };
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[TOOL-SELECTOR] AI selection failed:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async selectToolsForPhase(
|
||||||
|
userQuery: string,
|
||||||
|
phase: any,
|
||||||
|
availableTools: any[],
|
||||||
|
context: SelectionContext
|
||||||
|
): Promise<Array<{
|
||||||
|
toolName: string;
|
||||||
|
taskRelevance: number;
|
||||||
|
justification: string;
|
||||||
|
limitations: string[];
|
||||||
|
}>> {
|
||||||
|
console.log('[TOOL-SELECTOR] Selecting tools for phase:', phase.id);
|
||||||
|
|
||||||
|
if (availableTools.length === 0) {
|
||||||
|
console.log('[TOOL-SELECTOR] No tools available for phase:', phase.id);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const prompt = getPrompt('phaseToolSelection', userQuery, phase, availableTools);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await aiService.callMicroTaskAI(prompt);
|
||||||
|
const selections = JSONParser.safeParseJSON(response.content, []);
|
||||||
|
|
||||||
|
if (Array.isArray(selections)) {
|
||||||
|
const validSelections = selections.filter((sel: any) => {
|
||||||
|
const matchingTool = availableTools.find((tool: any) => tool && tool.name === sel.toolName);
|
||||||
|
if (!matchingTool) {
|
||||||
|
console.warn('[TOOL-SELECTOR] Invalid tool selection for phase:', phase.id, sel.toolName);
|
||||||
|
}
|
||||||
|
return !!matchingTool;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('[TOOL-SELECTOR] Valid selections for phase:', phase.id, validSelections.length);
|
||||||
|
return validSelections;
|
||||||
|
}
|
||||||
|
|
||||||
|
return [];
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[TOOL-SELECTOR] Phase tool selection failed:', error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private createToolData = (tool: any) => ({
|
||||||
|
name: tool.name,
|
||||||
|
type: tool.type,
|
||||||
|
description: tool.description,
|
||||||
|
domains: tool.domains,
|
||||||
|
phases: tool.phases,
|
||||||
|
platforms: tool.platforms || [],
|
||||||
|
tags: tool.tags || [],
|
||||||
|
skillLevel: tool.skillLevel,
|
||||||
|
license: tool.license,
|
||||||
|
accessType: tool.accessType,
|
||||||
|
projectUrl: tool.projectUrl,
|
||||||
|
knowledgebase: tool.knowledgebase,
|
||||||
|
related_concepts: tool.related_concepts || [],
|
||||||
|
related_software: tool.related_software || []
|
||||||
|
});
|
||||||
|
|
||||||
|
private createConceptData = (concept: any) => ({
|
||||||
|
name: concept.name,
|
||||||
|
type: 'concept',
|
||||||
|
description: concept.description,
|
||||||
|
domains: concept.domains,
|
||||||
|
phases: concept.phases,
|
||||||
|
tags: concept.tags || [],
|
||||||
|
skillLevel: concept.skillLevel,
|
||||||
|
related_concepts: concept.related_concepts || [],
|
||||||
|
related_software: concept.related_software || []
|
||||||
|
});
|
||||||
|
|
||||||
|
getConfig(): ToolSelectionConfig {
|
||||||
|
return { ...this.config };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const toolSelector = new ToolSelector();
|
Loading…
x
Reference in New Issue
Block a user