Compare commits

..

No commits in common. "05d957324a1df1428b19567460e6c49646e5c543" and "1d98dd32573c66af8416176ca088ec8a22e867bb" have entirely different histories.

27 changed files with 2605 additions and 4634 deletions

View File

@ -60,7 +60,7 @@ FORENSIC_AUDIT_MAX_ENTRIES=50
# === AI SEMANTIC SEARCH ===
# Enable semantic search (highly recommended for better results)
REMOVE_AI_EMBEDDINGS_ENABLED=true
AI_EMBEDDINGS_ENABLED=true
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
AI_EMBEDDINGS_API_KEY=your-embeddings-api-key-here
AI_EMBEDDINGS_MODEL=mistral-embed
@ -122,8 +122,8 @@ AI_EMBEDDINGS_BATCH_SIZE=10
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
# === Context Management ===
REMOVE_AI_MAX_CONTEXT_TOKENS=4000
REMOVE_AI_MAX_PROMPT_TOKENS=2500
AI_MAX_CONTEXT_TOKENS=4000
AI_MAX_PROMPT_TOKENS=2500
# === Confidence Scoring ===
CONFIDENCE_SEMANTIC_WEIGHT=0.5

View File

@ -1,333 +0,0 @@
#!/usr/bin/env node
// find-duplicate-functions.mjs
// Usage:
// node find-duplicate-functions.mjs [rootDir] [--mode exact|struct] [--min-lines N] [--json]
// Example:
// node find-duplicate-functions.mjs . --mode struct --min-lines 3
import fs from "fs";
import path from "path";
import * as url from "url";
import ts from "typescript";
const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
/** -------- CLI OPTIONS -------- */
const args = process.argv.slice(2);
let rootDir = ".";
let mode = "struct"; // "exact" | "struct"
let minLines = 3;
let outputJson = false;
for (let i = 0; i < args.length; i++) {
const a = args[i];
if (!a.startsWith("--") && rootDir === ".") {
rootDir = a;
} else if (a === "--mode") {
mode = (args[++i] || "struct").toLowerCase();
if (!["exact", "struct"].includes(mode)) {
console.error("Invalid --mode. Use 'exact' or 'struct'.");
process.exit(1);
}
} else if (a === "--min-lines") {
minLines = parseInt(args[++i] || "3", 10);
} else if (a === "--json") {
outputJson = true;
}
}
/** -------- FILE DISCOVERY -------- */
const DEFAULT_IGNORES = new Set([
"node_modules",
".git",
".next",
".vercel",
"dist",
"build",
".astro", // Astro's generated cache dir
]);
const VALID_EXTS = new Set([".ts", ".tsx", ".astro", ".mts", ".cts"]);
function walk(dir) {
/** @type {string[]} */
const out = [];
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const e of entries) {
const p = path.join(dir, e.name);
if (e.isDirectory()) {
if (DEFAULT_IGNORES.has(e.name)) continue;
out.push(...walk(p));
} else if (e.isFile() && VALID_EXTS.has(path.extname(e.name))) {
out.push(p);
}
}
return out;
}
/** -------- ASTRO CODE EXTRACTION --------
* Extract TS/JS code from:
* - frontmatter: --- ... ---
* - <script ...> ... </script>
*/
function extractCodeFromAstro(source) {
/** @type {{code:string, offset:number}[]} */
const blocks = [];
// Frontmatter (must be at top in Astro)
// Match the FIRST pair of --- ... ---
const fm = source.startsWith("---")
? (() => {
const end = source.indexOf("\n---", 3);
if (end !== -1) {
const front = source.slice(3, end + 1); // include trailing \n
return { start: 0, end: end + 4, code: front };
}
return null;
})()
: null;
if (fm) {
// offset for line numbers is after the first '---\n'
blocks.push({ code: fm.code, offset: 4 }); // rough; well fix line numbers via positions later
}
// <script ...> ... </script>
const scriptRe = /<script\b[^>]*>([\s\S]*?)<\/script>/gi;
let m;
while ((m = scriptRe.exec(source))) {
const code = m[1] || "";
blocks.push({ code, offset: indexToLine(source, m.index) });
}
return blocks;
}
/** -------- UTIL: index -> 1-based line -------- */
function indexToLine(text, idx) {
let line = 1;
for (let i = 0; i < idx && i < text.length; i++) {
if (text.charCodeAt(i) === 10) line++;
}
return line;
}
/** -------- AST HELPERS -------- */
function createSourceFile(virtualPath, code) {
return ts.createSourceFile(
virtualPath,
code,
ts.ScriptTarget.Latest,
/*setParentNodes*/ true,
virtualPath.endsWith(".tsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS
);
}
// Normalize AST to a structural signature string
function structuralSignature(node) {
/** @type {string[]} */
const parts = [];
const visit = (n) => {
// Skip trivia: comments/whitespace are already not in AST
const kindName = ts.SyntaxKind[n.kind] || `K${n.kind}`;
switch (n.kind) {
case ts.SyntaxKind.Identifier:
parts.push("Id");
return;
case ts.SyntaxKind.PrivateIdentifier:
parts.push("PrivId");
return;
case ts.SyntaxKind.StringLiteral:
case ts.SyntaxKind.NoSubstitutionTemplateLiteral:
case ts.SyntaxKind.TemplateHead:
case ts.SyntaxKind.TemplateMiddle:
case ts.SyntaxKind.TemplateTail:
parts.push("Str");
return;
case ts.SyntaxKind.NumericLiteral:
parts.push("Num");
return;
case ts.SyntaxKind.TrueKeyword:
case ts.SyntaxKind.FalseKeyword:
parts.push("Bool");
return;
case ts.SyntaxKind.NullKeyword:
case ts.SyntaxKind.UndefinedKeyword:
parts.push("Nil");
return;
case ts.SyntaxKind.PropertyAssignment:
case ts.SyntaxKind.ShorthandPropertyAssignment:
case ts.SyntaxKind.MethodDeclaration:
case ts.SyntaxKind.MethodSignature:
parts.push("Prop");
break;
default:
parts.push(kindName);
}
n.forEachChild(visit);
};
visit(node);
return parts.join("|");
}
function getFunctionInfo(sf, filePath) {
/** @type {Array<{
name: string,
bodyText: string,
structKey: string,
start: number,
end: number,
startLine: number,
endLine: number
}>} */
const out = [];
const addFunc = (nameNode, bodyNode) => {
if (!bodyNode) return;
const bodyText = bodyNode.getText(sf).trim();
const start = bodyNode.getStart(sf);
const end = bodyNode.getEnd();
const { line: startLine } = sf.getLineAndCharacterOfPosition(start);
const { line: endLine } = sf.getLineAndCharacterOfPosition(end);
const name =
nameNode && ts.isIdentifier(nameNode) ? nameNode.escapedText.toString() : "(anonymous)";
// min-lines filter
const lines = bodyText.split(/\r?\n/).filter(Boolean);
if (lines.length < minLines) return;
// structural signature from the body
const structKey = structuralSignature(bodyNode);
out.push({
name,
bodyText,
structKey,
start,
end,
startLine: startLine + 1,
endLine: endLine + 1,
});
};
const visit = (node) => {
if (ts.isFunctionDeclaration(node) && node.body) {
addFunc(node.name ?? null, node.body);
} else if (
ts.isFunctionExpression(node) ||
ts.isArrowFunction(node)
) {
// find name if its assigned: const foo = () => {}
let name = null;
if (node.parent && ts.isVariableDeclaration(node.parent) && node.parent.name) {
name = node.parent.name;
} else if (
node.parent &&
ts.isPropertyAssignment(node.parent) &&
ts.isIdentifier(node.parent.name)
) {
name = node.parent.name;
} else if (node.name) {
name = node.name;
}
if (node.body) addFunc(name, node.body);
} else if (ts.isMethodDeclaration(node) && node.body) {
addFunc(node.name, node.body);
}
node.forEachChild(visit);
};
visit(sf);
return out;
}
/** -------- MAIN SCAN -------- */
const files = walk(path.resolve(process.cwd(), rootDir));
/** Maps from hash -> occurrences */
const groups = new Map();
/** Helper for exact hash */
import crypto from "crypto";
const exactHash = (text) => crypto.createHash("sha1").update(text.replace(/\s+/g, " ").trim()).digest("hex");
for (const file of files) {
try {
const ext = path.extname(file).toLowerCase();
const raw = fs.readFileSync(file, "utf8");
/** @type {Array<{virtualPath:string, code:string, lineOffset:number}>} */
const codeUnits = [];
if (ext === ".astro") {
const blocks = extractCodeFromAstro(raw);
blocks.forEach((b, i) => {
codeUnits.push({
virtualPath: file + `#astro${i + 1}.ts`,
code: b.code,
lineOffset: b.offset || 1,
});
});
} else {
codeUnits.push({ virtualPath: file, code: raw, lineOffset: 1 });
}
for (const { virtualPath, code, lineOffset } of codeUnits) {
const sf = createSourceFile(virtualPath, code);
const funcs = getFunctionInfo(sf, file);
for (const f of funcs) {
const key =
mode === "exact" ? exactHash(f.bodyText) : crypto.createHash("sha1").update(f.structKey).digest("hex");
const item = {
file,
where:
ext === ".astro"
? `${path.relative(process.cwd(), file)}:${f.startLine + lineOffset - 1}-${f.endLine + lineOffset - 1}`
: `${path.relative(process.cwd(), file)}:${f.startLine}-${f.endLine}`,
name: f.name,
lines: f.endLine - f.startLine + 1,
preview: f.bodyText.split(/\r?\n/).slice(0, 5).join("\n") + (f.endLine - f.startLine + 1 > 5 ? "\n..." : ""),
};
if (!groups.has(key)) groups.set(key, []);
groups.get(key).push(item);
}
}
} catch (e) {
console.warn(`⚠️ Skipping ${file}: ${e.message}`);
}
}
/** -------- REPORT -------- */
const dupes = [...groups.entries()]
.map(([key, arr]) => ({ key, items: arr }))
.filter((g) => g.items.length > 1)
.sort((a, b) => b.items.length - a.items.length);
if (outputJson) {
console.log(JSON.stringify({ mode, minLines, groups: dupes }, null, 2));
process.exit(0);
}
if (dupes.length === 0) {
console.log(`✅ No duplicate functions found (mode=${mode}, min-lines=${minLines}).`);
process.exit(0);
}
console.log(`\nFound ${dupes.length} duplicate group(s) (mode=${mode}, min-lines=${minLines}):\n`);
dupes.forEach((g, i) => {
console.log(`== Group ${i + 1} (${g.items.length} matches) ==`);
const example = g.items[0];
console.log(` Sample (${example.lines} lines) from ${example.where}${example.name ? ` [${example.name}]` : ""}`);
console.log(" ---");
console.log(indent(example.preview, " "));
console.log(" ---");
g.items.forEach((it) => {
console.log(`${it.where}${it.name ? ` [${it.name}]` : ""} (${it.lines} lines)`);
});
console.log();
});
function indent(s, pre) {
return s
.split("\n")
.map((l) => pre + l)
.join("\n");
}

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
---
// src/components/ContributionButton.astro
// src/components/ContributionButton.astro - CLEANED: Removed duplicate auth script
export interface Props {
type: 'edit' | 'new' | 'write';
toolName?: string;

View File

@ -1,5 +1,5 @@
---
import { createToolSlug } from '../utils/clientUtils.js';
import { createToolSlug } from '../utils/toolHelpers.js';
export interface Props {
toolName: string;

View File

@ -4,6 +4,7 @@ import { getToolsData } from '../utils/dataService.js';
const data = await getToolsData();
const scenarios = data.scenarios || [];
// Configuration
const maxDisplayed = 9;
const displayedScenarios = scenarios.slice(0, maxDisplayed);
---

View File

@ -306,7 +306,8 @@ const sortedTags = Object.entries(tagFrequency)
</div>
<script define:vars={{ toolsData: data.tools, tagFrequency, sortedTags }}>
window.toolsData = toolsData;
window.toolsData = toolsData;
document.addEventListener('DOMContentLoaded', () => {
const elements = {
searchInput: document.getElementById('search-input'),
@ -392,13 +393,6 @@ const sortedTags = Object.entries(tagFrequency)
return null;
}
}
function isToolHosted(tool) {
return tool.projectUrl !== undefined &&
tool.projectUrl !== null &&
tool.projectUrl !== "" &&
tool.projectUrl.trim() !== "";
}
function toggleCollapsible(toggleBtn, content, storageKey) {
const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true';
@ -439,6 +433,13 @@ const sortedTags = Object.entries(tagFrequency)
}
}
function isToolHosted(tool) {
return tool.projectUrl !== undefined &&
tool.projectUrl !== null &&
tool.projectUrl !== "" &&
tool.projectUrl.trim() !== "";
}
function initTagCloud() {
const visibleCount = 20;
elements.tagCloudItems.forEach((item, index) => {

View File

@ -2,15 +2,17 @@
export const AI_PROMPTS = {
toolSelection: (mode: string, userQuery: string, maxSelectedItems: number) => {
toolSelection: (mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number) => {
const modeInstruction = mode === 'workflow'
? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
: 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.
AUSWAHLMETHODE:
'✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe'}
AUSWAHLMETHODE: ${selectionMethod}
${selectionMethod === 'embeddings_candidates' ?
'✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe' :
'✓ Vollständige Datenbank verfügbar\n✓ Wähle die relevantesten Items'}
${modeInstruction}
@ -34,11 +36,6 @@ AUSWAHLSTRATEGIE:
- Lieber weniger perfekte Items als viele mittelmäßige
- Jedes Item muss begründbar sein
4. **TASK RELEVANCE REALISM**
- Gib realistische Bewertungen (50-85% typisch)
- Vermeide übertriebene 90-100% Scores
- Nur bei perfekter Übereinstimmung >85%
AUSWAHLREGELN:
- Wähle ${mode === 'workflow' ? '15-25' : '4-10'} Items total, max ${maxSelectedItems}
- BEIDE Arrays füllen: selectedTools UND selectedConcepts
@ -62,13 +59,7 @@ ${JSON.stringify(toolsToSend, null, 2)}
VERFÜGBARE KONZEPTE (${conceptsToSend.length} Items - theoretisches Wissen):
${JSON.stringify(conceptsToSend, null, 2)}
WICHTIGER HINWEIS: Wähle sowohl aus TOOLS als auch aus KONZEPTEN aus! Konzepte sind essentiell für methodische Fundierung.
TASK RELEVANCE GUIDELINES:
- 50-65%: Grundlegend relevant, aber nicht optimal
- 66-75%: Gut geeignet für die Aufgabe
- 76-85%: Sehr gut geeignet, klare Vorteile
- 86-100%: NUR für perfekte Übereinstimmung verwenden`;
WICHTIGER HINWEIS: Wähle sowohl aus TOOLS als auch aus KONZEPTEN aus! Konzepte sind essentiell für methodische Fundierung.`;
},
scenarioAnalysis: (isWorkflow: boolean, userQuery: string) => {
@ -157,19 +148,13 @@ AUSWAHLREGELN FÜR PHASE "${phase.name}":
3. Mindestens 1 Methode wenn verfügbar, Rest Software-Tools
4. Begründe WARUM jedes Item für diese Phase optimal ist
TASK RELEVANCE GUIDELINES:
- 60-70%: Grundlegend für diese Phase geeignet
- 71-80%: Gut geeignet, klare Phasenrelevanz
- 81-90%: Sehr gut geeignet, optimal für Phase
- 91-100%: NUR für perfekte Phasenübereinstimmung
WICHTIG: Verwende EXAKT die Namen wie oben aufgelistet (ohne Präfixe wie M1./T2.)!
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB:
[
{
"toolName": "Exakter Name aus der Liste oben",
"taskRelevance": 75,
"taskRelevance": 85,
"justification": "Detaillierte Begründung (60-80 Wörter) warum optimal für ${phase.name} - erkläre Anwendung, Vorteile und spezifische Relevanz",
"limitations": ["Mögliche Einschränkung für diese Phase"]
}
@ -281,7 +266,6 @@ AUSWAHLREGELN FÜR NACHERGÄNZUNG:
1. Wähle 1-2 BESTE Methoden/Tools die die ${phase.name}-Phase optimal ergänzen
2. Methoden/Tools müssen für die ursprüngliche Anfrage relevant sein
3. Ergänzen, nicht ersetzen - erweitere die zu spezifische Erstauswahl
4. Realistische Task Relevance (70-85% typisch für Nachergänzungen)
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
{
@ -307,7 +291,7 @@ Antwort: Fließtext ohne Listen, max ${isWorkflow ? '100' : '80'} Wörter.`;
}
} as const;
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, maxSelectedItems: number): string;
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number): string;
export function getPrompt(key: 'toolSelectionWithData', basePrompt: string, toolsToSend: any[], conceptsToSend: any[]): string;
export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuery: string): string;
export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;

View File

@ -16,7 +16,7 @@ const knowledgebaseCollection = defineCollection({
tags: z.array(z.string()).default([]),
published: z.boolean().default(true),
gated_content: z.boolean().default(false),
gated_content: z.boolean().default(false), // NEW: Gated content flag
})
});

View File

@ -1,19 +1,17 @@
// src/pages/api/ai/embeddings-status.ts
import type { APIRoute } from 'astro';
import { embeddingsService } from '../../../utils/embeddings.js';
export const prerender = false;
export const GET: APIRoute = async () => {
try {
const { embeddingsService } = await import('../../../utils/embeddings.js');
await embeddingsService.waitForInitialization();
const stats = embeddingsService.getStats();
const status = stats.enabled && stats.initialized ? 'ready' :
stats.enabled && !stats.initialized ? 'initializing' : 'disabled';
console.log(`[EMBEDDINGS-STATUS-API] Service status: ${status}, stats:`, stats);
return new Response(JSON.stringify({
success: true,
embeddings: stats,
@ -25,8 +23,6 @@ export const GET: APIRoute = async () => {
});
} catch (error) {
console.error('[EMBEDDINGS-STATUS-API] Error checking embeddings status:', error);
return new Response(JSON.stringify({
success: false,
embeddings: { enabled: false, initialized: false, count: 0 },

View File

@ -1,13 +1,23 @@
// src/pages/api/ai/enhance-input.ts
// src/pages/api/ai/enhance-input.ts - Enhanced AI service compatibility
import type { APIRoute } from 'astro';
import { withAPIAuth } from '../../../utils/auth.js';
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
import { aiService } from '../../../utils/aiService.js';
import { JSONParser } from '../../../utils/jsonUtils.js';
export const prerender = false;
function getEnv(key: string): string {
const value = process.env[key];
if (!value) {
throw new Error(`Missing environment variable: ${key}`);
}
return value;
}
const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
const AI_ANALYZER_API_KEY = getEnv('AI_ANALYZER_API_KEY');
const AI_ANALYZER_MODEL = getEnv('AI_ANALYZER_MODEL');
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
const RATE_LIMIT_WINDOW = 60 * 1000;
const RATE_LIMIT_MAX = 5;
@ -39,7 +49,7 @@ function checkRateLimit(userId: string): boolean {
return true;
}
function cleanupExpiredRateLimits(): void {
function cleanupExpiredRateLimits() {
const now = Date.now();
for (const [userId, limit] of rateLimitStore.entries()) {
if (now > limit.resetTime) {
@ -51,7 +61,7 @@ function cleanupExpiredRateLimits(): void {
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
function createEnhancementPrompt(input: string): string {
return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte Untersuchung.
return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein forensisches Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte forensische Untersuchung.
ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
1. **Incident Context**: Was ist passiert? Welche Angriffsvektoren oder technischen Probleme liegen vor?
@ -64,12 +74,12 @@ ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
WENN die Beschreibung alle kritischen forensischen Aspekte abdeckt: Geben Sie eine leere Liste [] zurück.
WENN wichtige Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte Analyse schließen.
WENN wichtige forensische Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte forensische Analyse schließen.
QUALITÄTSKRITERIEN FÜR FRAGEN:
- Forensisch spezifisch, nicht allgemein (NICHT: "Mehr Details?")
- Methodisch relevant (NICHT: "Wann passierte das?")
- Priorisiert nach Auswirkung auf die Untersuchungsqualität
- Priorisiert nach Auswirkung auf die forensische Untersuchungsqualität
- Die Frage soll maximal 20 Wörter umfassen
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
@ -84,6 +94,39 @@ ${input}
`.trim();
}
async function callAIService(prompt: string): Promise<Response> {
const endpoint = AI_ENDPOINT;
const apiKey = AI_ANALYZER_API_KEY;
const model = AI_ANALYZER_MODEL;
let headers: Record<string, string> = {
'Content-Type': 'application/json'
};
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
console.log('[ENHANCE API] Using API key authentication');
} else {
console.log('[ENHANCE API] No API key - making request without authentication');
}
const requestBody = {
model,
messages: [{ role: 'user', content: prompt }],
max_tokens: 300,
temperature: 0.7,
top_p: 0.9,
frequency_penalty: 0.2,
presence_penalty: 0.1
};
return fetch(`${endpoint}/v1/chat/completions`, {
method: 'POST',
headers,
body: JSON.stringify(requestBody)
});
}
export const POST: APIRoute = async ({ request }) => {
try {
const authResult = await withAPIAuth(request, 'ai');
@ -112,25 +155,28 @@ export const POST: APIRoute = async ({ request }) => {
const systemPrompt = createEnhancementPrompt(sanitizedInput);
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
console.log(`[ENHANCE-API] Processing enhancement request for user: ${userId}`);
const aiResponse = await enqueueApiCall(() =>
aiService.callAI(systemPrompt, {
temperature: 0.7
}), taskId);
const aiResponse = await enqueueApiCall(() => callAIService(systemPrompt), taskId);
if (!aiResponse.content) {
if (!aiResponse.ok) {
const errorText = await aiResponse.text();
console.error('[ENHANCE API] AI enhancement error:', errorText, 'Status:', aiResponse.status);
return apiServerError.unavailable('Enhancement service unavailable');
}
const aiData = await aiResponse.json();
const aiContent = aiData.choices?.[0]?.message?.content;
if (!aiContent) {
return apiServerError.unavailable('No enhancement response');
}
let questions;
try {
const cleanedContent = aiResponse.content
const cleanedContent = aiContent
.replace(/^```json\s*/i, '')
.replace(/\s*```\s*$/, '')
.trim();
questions = JSONParser.safeParseJSON(cleanedContent, []);
questions = JSON.parse(cleanedContent);
if (!Array.isArray(questions)) {
throw new Error('Response is not an array');
@ -152,11 +198,11 @@ export const POST: APIRoute = async ({ request }) => {
}
} catch (error) {
console.error('[ENHANCE-API] Failed to parse enhancement response:', aiResponse.content);
console.error('Failed to parse enhancement response:', aiContent);
questions = [];
}
console.log(`[ENHANCE-API] User: ${userId}, Questions generated: ${questions.length}, Input length: ${sanitizedInput.length}`);
console.log(`[ENHANCE API] User: ${userId}, Forensics Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
return new Response(JSON.stringify({
success: true,
@ -169,7 +215,7 @@ export const POST: APIRoute = async ({ request }) => {
});
} catch (error) {
console.error('[ENHANCE-API] Enhancement error:', error);
console.error('Enhancement error:', error);
return apiServerError.internal('Enhancement processing failed');
}
};

View File

@ -20,14 +20,15 @@ const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '
const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10);
function sanitizeInput(input: string): string {
return input
let sanitized = input
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
.replace(/\<\/?[^>]+(>|$)/g, '')
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
.trim()
.slice(0, 2000)
.replace(/\s+/g, ' ');
.trim();
sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
return sanitized;
}
function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
@ -76,7 +77,7 @@ function incrementMicroTaskCount(userId: string, aiCallsMade: number): void {
}
}
function cleanupExpiredRateLimits(): void {
function cleanupExpiredRateLimits() {
const now = Date.now();
const maxStoreSize = 1000;
@ -116,52 +117,51 @@ export const POST: APIRoute = async ({ request }) => {
const body = await request.json();
const { query, mode = 'workflow', taskId: clientTaskId } = body;
console.log(`[AI-API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
console.log(`[AI-API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
if (!query || typeof query !== 'string') {
console.log(`[AI-API] Invalid query for task ${clientTaskId}`);
console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
return apiError.badRequest('Query required');
}
if (!['workflow', 'tool'].includes(mode)) {
console.log(`[AI-API] Invalid mode for task ${clientTaskId}: ${mode}`);
console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
}
const sanitizedQuery = sanitizeInput(query);
if (sanitizedQuery.includes('[FILTERED]')) {
console.log(`[AI-API] Filtered input detected for task ${clientTaskId}`);
console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
return apiError.badRequest('Invalid input detected');
}
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
console.log(`[AI-API] Enqueueing pipeline task ${taskId}`);
console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
const result = await enqueueApiCall(() =>
aiPipeline.processQuery(sanitizedQuery, mode)
, taskId);
if (!result || !result.recommendation) {
return apiServerError.unavailable('No response from AI pipeline');
return apiServerError.unavailable('No response from micro-task AI pipeline');
}
const stats = result.processingStats;
const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
incrementMicroTaskCount(userId, estimatedAICallsMade);
console.log(`[AI-API] Pipeline completed for ${taskId}:`, {
mode,
user: userId,
queryLength: sanitizedQuery.length,
processingTime: stats.processingTimeMs,
microTasksCompleted: stats.microTasksCompleted,
microTasksFailed: stats.microTasksFailed,
estimatedAICalls: estimatedAICallsMade,
embeddingsUsed: stats.embeddingsUsed,
finalItems: stats.finalSelectedItems
});
console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
console.log(` - Mode: ${mode}`);
console.log(` - User: ${userId}`);
console.log(` - Query length: ${sanitizedQuery.length}`);
console.log(` - Processing time: ${stats.processingTimeMs}ms`);
console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`);
console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`);
console.log(` - Estimated AI calls: ${estimatedAICallsMade}`);
console.log(` - Embeddings used: ${stats.embeddingsUsed}`);
console.log(` - Final items: ${stats.finalSelectedItems}`);
const currentLimit = rateLimitStore.get(userId);
const remainingMicroTasks = currentLimit ?
@ -175,7 +175,7 @@ export const POST: APIRoute = async ({ request }) => {
query: sanitizedQuery,
processingStats: {
...result.processingStats,
pipelineType: 'refactored',
pipelineType: 'micro-task',
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
estimatedAICallsMade
@ -191,16 +191,18 @@ export const POST: APIRoute = async ({ request }) => {
});
} catch (error) {
console.error('[AI-API] Pipeline error:', error);
console.error('[MICRO-TASK API] Pipeline error:', error);
if (error.message.includes('embeddings')) {
return apiServerError.unavailable('Embeddings service error');
} else if (error.message.includes('AI')) {
return apiServerError.unavailable('AI service error');
return apiServerError.unavailable('Embeddings service error - using AI fallback');
} else if (error.message.includes('micro-task')) {
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed');
} else if (error.message.includes('selector')) {
return apiServerError.unavailable('AI selector service error');
} else if (error.message.includes('rate limit')) {
return apiError.rateLimit('AI service rate limits exceeded');
return apiError.rateLimit('AI service rate limits exceeded during micro-task processing');
} else {
return apiServerError.internal('AI pipeline error');
return apiServerError.internal('Micro-task AI pipeline error');
}
}
};

View File

@ -1,5 +1,5 @@
---
// src/pages/contribute/index.astro
// src/pages/contribute/index.astro - Consolidated Auth
import BaseLayout from '../../layouts/BaseLayout.astro';
import { withAuth } from '../../utils/auth.js';

View File

@ -510,7 +510,9 @@ if (aiAuthRequired) {
}, 500);
};
function handleSharedURL() {
function handleSharedURL() {
console.log('[SHARE] Handling shared URL:', window.location.search);
const urlParams = new URLSearchParams(window.location.search);
const toolParam = urlParams.get('tool');
const viewParam = urlParams.get('view');

View File

@ -675,7 +675,6 @@ input[type="checkbox"] {
border-radius: 0.25rem;
font-size: 0.75rem;
margin: 0.125rem;
max-height: 1.5rem;
}
/* ===================================================================
@ -1807,44 +1806,11 @@ input[type="checkbox"] {
.ai-textarea-section {
flex: 1;
min-width: 0;
display: flex;
flex-direction: column;
}
.ai-textarea-section textarea {
width: 100%;
height: 180px;
min-height: 180px;
max-height: 300px;
resize: vertical;
font-size: 0.9375rem;
line-height: 1.5;
padding: 0.75rem;
border: 1px solid var(--color-border);
border-radius: 0.375rem;
background-color: var(--color-bg);
color: var(--color-text);
transition: var(--transition-fast);
flex: 1;
}
.confidence-tooltip {
background: var(--color-bg) !important;
border: 2px solid var(--color-border) !important;
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15) !important;
z-index: 2000 !important;
}
.ai-textarea-section textarea:focus {
outline: none;
border-color: var(--color-primary);
box-shadow: 0 0 0 3px rgb(37 99 235 / 10%);
}
.ai-suggestions-section {
flex: 0 0 320px;
min-height: 180px;
height: auto;
min-height: 120px;
}
.ai-input-container textarea {
@ -2220,20 +2186,12 @@ input[type="checkbox"] {
border-radius: 1rem;
font-weight: 500;
text-transform: uppercase;
position: relative;
z-index: 1;
}
.tool-rec-priority.high { background-color: var(--color-error); color: white; }
.tool-rec-priority.medium { background-color: var(--color-warning); color: white; }
.tool-rec-priority.low { background-color: var(--color-accent); color: white; }
[data-theme="dark"] .confidence-tooltip {
background: var(--color-bg-secondary) !important;
border-color: var(--color-border) !important;
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.4) !important;
}
.tool-rec-justification {
font-size: 0.875rem;
line-height: 1.5;
@ -2652,8 +2610,7 @@ footer {
================================================================= */
.smart-prompting-container {
height: auto;
min-height: 180px;
height: 100%;
animation: smartPromptSlideIn 0.4s cubic-bezier(0.4, 0, 0.2, 1);
}
@ -2662,10 +2619,8 @@ footer {
border: 1px solid var(--color-border);
border-radius: 0.5rem;
padding: 1rem;
height: auto;
min-height: 180px;
max-height: 400px;
overflow-y: auto;
height: 100%;
min-height: 120px;
display: flex;
flex-direction: column;
opacity: 0.85;
@ -2705,8 +2660,8 @@ footer {
/* Smart Prompting Hint */
.smart-prompting-hint {
height: 180px;
min-height: 180px;
height: 100%;
min-height: 120px;
display: flex;
align-items: center;
animation: hintFadeIn 0.3s ease-in-out;
@ -3420,8 +3375,8 @@ footer {
.ai-suggestions-section {
flex: 0 0 auto;
height: auto;
min-height: 120px;
width: 100%;
max-width: none;
}
.ai-textarea-section {
@ -3431,11 +3386,6 @@ footer {
min-height: 100px;
}
.ai-textarea-section textarea {
height: 150px;
min-height: 150px;
}
.ai-spotlight-content {
flex-direction: column;
gap: 0.75rem;

File diff suppressed because it is too large Load Diff

View File

@ -1,137 +0,0 @@
// src/utils/aiService.ts
import 'dotenv/config';
export interface AIServiceConfig {
endpoint: string;
apiKey: string;
model: string;
}
export interface AICallOptions {
temperature?: number;
timeout?: number;
}
export interface AIResponse {
content: string;
usage?: {
promptTokens: number;
completionTokens: number;
totalTokens: number;
};
}
class AIService {
private config: AIServiceConfig;
private defaultOptions: AICallOptions;
constructor() {
this.config = {
endpoint: this.getRequiredEnv('AI_ANALYZER_ENDPOINT'),
apiKey: this.getRequiredEnv('AI_ANALYZER_API_KEY'),
model: this.getRequiredEnv('AI_ANALYZER_MODEL')
};
this.defaultOptions = {
temperature: 0.3,
timeout: 60000
};
console.log('[AI-SERVICE] Initialized with model:', this.config.model);
}
private getRequiredEnv(key: string): string {
const value = process.env[key];
if (!value) {
throw new Error(`Missing required environment variable: ${key}`);
}
return value;
}
async callAI(prompt: string, options: AICallOptions = {}): Promise<AIResponse> {
const mergedOptions = { ...this.defaultOptions, ...options };
console.log('[AI-SERVICE] Making API call:', {
promptLength: prompt.length,
temperature: mergedOptions.temperature
});
const headers: Record<string, string> = {
'Content-Type': 'application/json'
};
if (this.config.apiKey) {
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
}
const requestBody = {
model: this.config.model,
messages: [{ role: 'user', content: prompt }],
temperature: mergedOptions.temperature
};
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), mergedOptions.timeout);
const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
method: 'POST',
headers,
body: JSON.stringify(requestBody),
signal: controller.signal
});
clearTimeout(timeoutId);
if (!response.ok) {
const errorText = await response.text();
console.error('[AI-SERVICE] API Error:', response.status, errorText);
throw new Error(`AI API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
const content = data.choices?.[0]?.message?.content;
if (!content) {
console.error('[AI-SERVICE] No response content from AI model');
throw new Error('No response from AI model');
}
console.log('[AI-SERVICE] API call successful:', {
responseLength: content.length,
usage: data.usage
});
return {
content: content.trim(),
usage: data.usage
};
} catch (error) {
if (error.name === 'AbortError') {
console.error('[AI-SERVICE] Request timeout');
throw new Error('AI request timeout');
}
console.error('[AI-SERVICE] API call failed:', error.message);
throw error;
}
}
async callMicroTaskAI(prompt: string): Promise<AIResponse> {
return this.callAI(prompt, {
temperature: 0.3,
timeout: 30000
});
}
estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
getConfig(): AIServiceConfig {
return { ...this.config };
}
}
export const aiService = new AIService();

View File

@ -83,21 +83,26 @@ export const apiServerError = {
};
export const apiSpecial = {
// JSON parsing error
invalidJSON: (): Response =>
apiError.badRequest('Invalid JSON in request body'),
// Missing required fields
missingRequired: (fields: string[]): Response =>
apiError.badRequest(`Missing required fields: ${fields.join(', ')}`),
// Empty request body
emptyBody: (): Response =>
apiError.badRequest('Request body cannot be empty'),
// File upload responses
uploadSuccess: (data: { url: string; filename: string; size: number; storage: string }): Response =>
apiResponse.created(data),
uploadFailed: (error: string): Response =>
apiServerError.internal(`Upload failed: ${error}`),
// Contribution responses
contributionSuccess: (data: { prUrl?: string; branchName?: string; message: string }): Response =>
apiResponse.created({ success: true, ...data }),
@ -109,6 +114,7 @@ export const apiWithHeaders = {
successWithHeaders: (data: any, headers: Record<string, string>): Response =>
createAPIResponse(data, 200, headers),
// Redirect response
redirect: (location: string, temporary: boolean = true): Response =>
new Response(null, {
status: temporary ? 302 : 301,

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,9 @@
// src/utils/clientUtils.ts
export function createToolSlug(toolName: string): string {
if (!toolName || typeof toolName !== 'string') {
console.warn('[CLIENT-UTILS] Invalid toolName provided to createToolSlug:', toolName);
console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
return '';
}
@ -29,81 +30,6 @@ export function isToolHosted(tool: any): boolean {
tool.projectUrl.trim() !== "";
}
export function sanitizeText(text: string): string {
if (typeof text !== 'string') return '';
return text
.replace(/^#{1,6}\s+/gm, '')
.replace(/^\s*[-*+]\s+/gm, '')
.replace(/^\s*\d+\.\s+/gm, '')
.replace(/\*\*(.+?)\*\*/g, '$1')
.replace(/\*(.+?)\*/g, '$1')
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
.replace(/```[\s\S]*?```/g, '[CODE BLOCK]')
.replace(/`([^`]+)`/g, '$1')
.replace(/<[^>]+>/g, '')
.replace(/\n\s*\n\s*\n/g, '\n\n')
.trim();
}
export function escapeHtml(text: string): string {
if (typeof text !== 'string') return String(text);
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
export function truncateText(text: string, maxLength: number): string {
if (!text || text.length <= maxLength) return text;
return text.slice(0, maxLength) + '...';
}
export function summarizeData(data: any): string {
if (data === null || data === undefined) return 'null';
if (typeof data === 'string') {
return data.length > 100 ? data.slice(0, 100) + '...' : data;
}
if (typeof data === 'number' || typeof data === 'boolean') {
return data.toString();
}
if (Array.isArray(data)) {
if (data.length === 0) return '[]';
if (data.length <= 3) return JSON.stringify(data);
return `[${data.slice(0, 3).map(i => typeof i === 'string' ? i : JSON.stringify(i)).join(', ')}, ...+${data.length - 3}]`;
}
if (typeof data === 'object') {
const keys = Object.keys(data);
if (keys.length === 0) return '{}';
if (keys.length <= 3) {
return '{' + keys.map(k => `${k}: ${typeof data[k] === 'string' ? data[k].slice(0, 20) + (data[k].length > 20 ? '...' : '') : JSON.stringify(data[k])}`).join(', ') + '}';
}
return `{${keys.slice(0, 3).join(', ')}, ...+${keys.length - 3} keys}`;
}
return String(data);
}
export function formatDuration(ms: number): string {
if (ms < 1000) return '< 1s';
if (ms < 60000) return `${Math.ceil(ms / 1000)}s`;
const minutes = Math.floor(ms / 60000);
const seconds = Math.ceil((ms % 60000) / 1000);
return seconds > 0 ? `${minutes}m ${seconds}s` : `${minutes}m`;
}
export function showElement(element: HTMLElement | null): void {
if (element) {
element.style.display = 'block';
element.classList.remove('hidden');
}
}
export function hideElement(element: HTMLElement | null): void {
if (element) {
element.style.display = 'none';
element.classList.add('hidden');
}
}
interface AutocompleteOptions {
minLength?: number;
maxResults?: number;
@ -276,7 +202,7 @@ export class AutocompleteManager {
defaultRender(item: any): string {
const text = typeof item === 'string' ? item : item.name || item.label || item.toString();
return `<div class="autocomplete-item">${escapeHtml(text)}</div>`;
return `<div class="autocomplete-item">${this.escapeHtml(text)}</div>`;
}
renderDropdown(): void {
@ -358,8 +284,8 @@ export class AutocompleteManager {
align-items: center;
gap: 0.25rem;
">
${escapeHtml(item)}
<button type="button" class="autocomplete-remove" data-item="${escapeHtml(item)}" style="
${this.escapeHtml(item)}
<button type="button" class="autocomplete-remove" data-item="${this.escapeHtml(item)}" style="
background: none;
border: none;
color: white;
@ -401,6 +327,12 @@ export class AutocompleteManager {
this.selectedIndex = -1;
}
escapeHtml(text: string): string {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
setDataSource(newDataSource: any[]): void {
this.dataSource = newDataSource;
}

View File

@ -1,225 +0,0 @@
// src/utils/confidenceScoring.ts
import { isToolHosted } from './clientUtils.js';
import 'dotenv/config';
export interface ConfidenceMetrics {
overall: number;
semanticRelevance: number;
taskSuitability: number;
uncertaintyFactors: string[];
strengthIndicators: string[];
}
export interface ConfidenceConfig {
semanticWeight: number;
suitabilityWeight: number;
minimumThreshold: number;
mediumThreshold: number;
highThreshold: number;
}
export interface AnalysisContext {
userQuery: string;
mode: string;
embeddingsSimilarities: Map<string, number>;
selectedTools?: Array<{
tool: any;
phase: string;
priority: string;
justification?: string;
taskRelevance?: number;
limitations?: string[];
}>;
}
class ConfidenceScoring {
private config: ConfidenceConfig;
constructor() {
this.config = {
semanticWeight: this.getEnvFloat('CONFIDENCE_SEMANTIC_WEIGHT', 0.3),
suitabilityWeight: this.getEnvFloat('CONFIDENCE_SUITABILITY_WEIGHT', 0.7),
minimumThreshold: this.getEnvInt('CONFIDENCE_MINIMUM_THRESHOLD', 40),
mediumThreshold: this.getEnvInt('CONFIDENCE_MEDIUM_THRESHOLD', 60),
highThreshold: this.getEnvInt('CONFIDENCE_HIGH_THRESHOLD', 80)
};
console.log('[CONFIDENCE-SCORING] Initialized with restored config:', this.config);
}
private getEnvFloat(key: string, defaultValue: number): number {
const value = process.env[key];
return value ? parseFloat(value) : defaultValue;
}
private getEnvInt(key: string, defaultValue: number): number {
const value = process.env[key];
return value ? parseInt(value, 10) : defaultValue;
}
calculateRecommendationConfidence(
tool: any,
context: AnalysisContext,
taskRelevance: number = 70,
limitations: string[] = []
): ConfidenceMetrics {
console.log('[CONFIDENCE-SCORING] Calculating confidence for tool:', tool.name);
const rawSemanticRelevance = context.embeddingsSimilarities.has(tool.name) ?
context.embeddingsSimilarities.get(tool.name)! * 100 : 50;
let enhancedTaskSuitability = taskRelevance;
if (context.mode === 'workflow') {
const toolSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name);
if (toolSelection && tool.phases && Array.isArray(tool.phases) && tool.phases.includes(toolSelection.phase)) {
const phaseBonus = Math.min(15, 100 - taskRelevance);
enhancedTaskSuitability = Math.min(100, taskRelevance + phaseBonus);
console.log('[CONFIDENCE-SCORING] Phase alignment bonus applied:', phaseBonus);
}
}
const overall = (
rawSemanticRelevance * this.config.semanticWeight +
enhancedTaskSuitability * this.config.suitabilityWeight
);
const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, limitations, overall);
const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall);
const result = {
overall: Math.round(overall),
semanticRelevance: Math.round(rawSemanticRelevance),
taskSuitability: Math.round(enhancedTaskSuitability),
uncertaintyFactors,
strengthIndicators
};
console.log('[CONFIDENCE-SCORING] Confidence calculated:', {
tool: tool.name,
overall: result.overall,
semantic: result.semanticRelevance,
task: result.taskSuitability,
uncertaintyCount: uncertaintyFactors.length,
strengthCount: strengthIndicators.length
});
return result;
}
private identifyUncertaintyFactors(
tool: any,
context: AnalysisContext,
limitations: string[],
confidence: number
): string[] {
const factors: string[] = [];
if (limitations?.length > 0) {
factors.push(...limitations.slice(0, 2));
}
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
if (similarity < 0.7) {
factors.push('Geringe semantische Ähnlichkeit zur Anfrage');
}
if (tool.skillLevel === 'expert' && /schnell|rapid|triage|urgent|sofort/i.test(context.userQuery)) {
factors.push('Experten-Tool für zeitkritisches Szenario');
}
if (tool.skillLevel === 'novice' && /komplex|erweitert|tiefgehend|advanced|forensisch/i.test(context.userQuery)) {
factors.push('Einsteiger-Tool für komplexe Analyse');
}
if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') {
factors.push('Installation und Setup erforderlich');
}
if (tool.license === 'Proprietary') {
factors.push('Kommerzielle Software - Lizenzkosten zu beachten');
}
if (confidence < 60) {
factors.push('Moderate Gesamtbewertung - alternative Ansätze empfohlen');
}
return factors.slice(0, 4);
}
private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
const indicators: string[] = [];
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
if (similarity >= 0.7) {
indicators.push('Sehr gute semantische Übereinstimmung mit Ihrer Anfrage');
}
if (tool.knowledgebase === true) {
indicators.push('Umfassende Dokumentation und Wissensbasis verfügbar');
}
if (isToolHosted(tool)) {
indicators.push('Sofort verfügbar über gehostete Lösung');
}
if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') {
indicators.push('Ausgewogenes Verhältnis zwischen Funktionalität und Benutzerfreundlichkeit');
}
if (tool.type === 'method' && /methodik|vorgehen|prozess|ansatz/i.test(context.userQuery)) {
indicators.push('Methodischer Ansatz passt zu Ihrer prozeduralen Anfrage');
}
return indicators.slice(0, 4);
}
calculateSelectionConfidence(result: any, candidateCount: number): number {
if (!result?.selectedTools) {
console.log('[CONFIDENCE-SCORING] No selected tools for confidence calculation');
return 30;
}
const selectionRatio = result.selectedTools.length / candidateCount;
const hasReasoning = result.reasoning && result.reasoning.length > 50;
let confidence = 60;
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
else if (selectionRatio <= 0.05) confidence -= 10;
else confidence -= 15;
if (hasReasoning) confidence += 15;
if (result.selectedConcepts?.length > 0) confidence += 5;
const finalConfidence = Math.min(95, Math.max(25, confidence));
console.log('[CONFIDENCE-SCORING] Selection confidence calculated:', {
candidateCount,
selectedCount: result.selectedTools.length,
selectionRatio: selectionRatio.toFixed(3),
hasReasoning,
confidence: finalConfidence
});
return finalConfidence;
}
getConfidenceLevel(confidence: number): 'weak' | 'moderate' | 'strong' {
if (confidence >= this.config.highThreshold) return 'strong';
if (confidence >= this.config.mediumThreshold) return 'moderate';
return 'weak';
}
getConfidenceColor(confidence: number): string {
if (confidence >= this.config.highThreshold) return 'var(--color-accent)';
if (confidence >= this.config.mediumThreshold) return 'var(--color-warning)';
return 'var(--color-error)';
}
getConfig(): ConfidenceConfig {
return { ...this.config };
}
}
export const confidenceScoring = new ConfidenceScoring();

View File

@ -85,7 +85,7 @@ let cachedData: ToolsData | null = null;
let cachedRandomizedData: ToolsData | null = null;
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
let lastRandomizationDate: string | null = null;
let cachedToolsHash: string | null = null;
let dataVersion: string | null = null;
function seededRandom(seed: number): () => number {
let x = Math.sin(seed) * 10000;
@ -110,6 +110,17 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
return shuffled;
}
function generateDataVersion(data: any): string {
const str = JSON.stringify(data, Object.keys(data).sort());
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash;
}
return Math.abs(hash).toString(36);
}
async function loadRawData(): Promise<ToolsData> {
if (!cachedData) {
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
@ -131,9 +142,8 @@ async function loadRawData(): Promise<ToolsData> {
};
}
const { getToolsFileHash } = await import('./hashUtils.js');
cachedToolsHash = await getToolsFileHash();
console.log(`[DATA SERVICE] Loaded data with hash: ${cachedToolsHash.slice(0, 12)}...`);
dataVersion = generateDataVersion(cachedData);
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
} catch (error) {
if (error instanceof z.ZodError) {
@ -224,7 +234,7 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
}
export function getDataVersion(): string | null {
return cachedToolsHash;
return dataVersion;
}
export function clearCache(): void {
@ -232,7 +242,7 @@ export function clearCache(): void {
cachedRandomizedData = null;
cachedCompressedData = null;
lastRandomizationDate = null;
cachedToolsHash = null;
dataVersion = null;
console.log('[DATA SERVICE] Enhanced cache cleared');
}

View File

@ -1,11 +1,11 @@
// src/utils/embeddings.ts - Refactored
// src/utils/embeddings.ts
import { promises as fs } from 'fs';
import path from 'path';
import { getCompressedToolsDataForAI } from './dataService.js';
import 'dotenv/config';
import crypto from 'crypto';
export interface EmbeddingData {
interface EmbeddingData {
id: string;
type: 'tool' | 'concept';
name: string;
@ -20,22 +20,14 @@ export interface EmbeddingData {
};
}
export interface SimilarityResult extends EmbeddingData {
similarity: number;
}
interface EmbeddingsDatabase {
version: string;
lastUpdated: number;
embeddings: EmbeddingData[];
}
interface EmbeddingsConfig {
endpoint?: string;
apiKey?: string;
model?: string;
batchSize: number;
batchDelay: number;
interface SimilarityResult extends EmbeddingData {
similarity: number;
}
class EmbeddingsService {
@ -43,30 +35,48 @@ class EmbeddingsService {
private isInitialized = false;
private initializationPromise: Promise<void> | null = null;
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
private config: EmbeddingsConfig;
private readonly batchSize: number;
private readonly batchDelay: number;
private enabled: boolean = false;
constructor() {
this.config = this.loadConfig();
console.log('[EMBEDDINGS-SERVICE] Initialized:', {
hasEndpoint: !!this.config.endpoint,
hasModel: !!this.config.model
});
this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
this.enabled = true;
}
private loadConfig(): EmbeddingsConfig {
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
const model = process.env.AI_EMBEDDINGS_MODEL;
const batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
const batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
return {
endpoint,
apiKey,
model,
batchSize,
batchDelay
};
private async checkEnabledStatus(): Promise<void> {
try {
const envEnabled = process.env.AI_EMBEDDINGS_ENABLED;
if (envEnabled === 'true') {
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
const model = process.env.AI_EMBEDDINGS_MODEL;
if (!endpoint || !model) {
console.warn('[EMBEDDINGS] Embeddings enabled but API configuration missing - disabling');
this.enabled = false;
return;
}
console.log('[EMBEDDINGS] All requirements met - enabling embeddings');
this.enabled = true;
return;
}
try {
await fs.stat(this.embeddingsPath);
console.log('[EMBEDDINGS] Existing embeddings file found - enabling');
this.enabled = true;
} catch {
console.log('[EMBEDDINGS] Embeddings not explicitly enabled - disabling');
this.enabled = false;
}
} catch (error) {
console.error('[EMBEDDINGS] Error checking enabled status:', error);
this.enabled = false;
}
}
async initialize(): Promise<void> {
@ -83,55 +93,63 @@ class EmbeddingsService {
}
private async performInitialization(): Promise<void> {
const initStart = Date.now();
try {
console.log('[EMBEDDINGS-SERVICE] Starting initialization');
await this.checkEnabledStatus();
if (!this.enabled) {
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
return;
}
/*if (!this.config.enabled) {
console.log('[EMBEDDINGS-SERVICE] Service disabled via configuration');
return;
}*/
const initStart = Date.now();
try {
console.log('[EMBEDDINGS] Initializing embeddings system…');
await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
const toolsData = await getCompressedToolsDataForAI();
const { getToolsFileHash } = await import('./hashUtils.js');
const currentDataHash = await getToolsFileHash();
const toolsData = await getCompressedToolsDataForAI();
const currentDataHash = await this.hashToolsFile();
const existing = await this.loadEmbeddings();
const cacheIsUsable = existing &&
const existing = await this.loadEmbeddings();
console.log('[EMBEDDINGS] Current hash:', currentDataHash);
console.log('[EMBEDDINGS] Existing file version:', existing?.version);
console.log('[EMBEDDINGS] Existing embeddings length:', existing?.embeddings?.length);
const cacheIsUsable =
existing &&
existing.version === currentDataHash &&
Array.isArray(existing.embeddings) &&
existing.embeddings.length > 0;
if (cacheIsUsable) {
console.log('[EMBEDDINGS-SERVICE] Using cached embeddings');
this.embeddings = existing.embeddings;
console.log('[EMBEDDINGS] Using cached embeddings');
this.embeddings = existing.embeddings;
} else {
console.log('[EMBEDDINGS-SERVICE] Generating new embeddings');
console.log('[EMBEDDINGS] Generating new embeddings');
await this.generateEmbeddings(toolsData, currentDataHash);
}
this.isInitialized = true;
console.log(`[EMBEDDINGS-SERVICE] Initialized successfully with ${this.embeddings.length} embeddings in ${Date.now() - initStart}ms`);
} catch (error) {
console.error('[EMBEDDINGS-SERVICE] Initialization failed:', error);
console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings in ${Date.now() - initStart} ms`);
} catch (err) {
console.error('[EMBEDDINGS] Failed to initialize:', err);
this.isInitialized = false;
throw error;
throw err;
} finally {
this.initializationPromise = null;
}
}
private async hashToolsFile(): Promise<string> {
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
const raw = await fs.readFile(file, 'utf8');
return crypto.createHash('sha256').update(raw).digest('hex');
}
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
try {
const data = await fs.readFile(this.embeddingsPath, 'utf8');
return JSON.parse(data);
} catch (error) {
console.log('[EMBEDDINGS-SERVICE] No existing embeddings file found');
console.log('[EMBEDDINGS] No existing embeddings found');
return null;
}
}
@ -144,7 +162,7 @@ class EmbeddingsService {
};
await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
console.log(`[EMBEDDINGS-SERVICE] Saved ${this.embeddings.length} embeddings to disk`);
console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
}
private createContentString(item: any): string {
@ -160,23 +178,30 @@ class EmbeddingsService {
}
private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
if (!this.config.endpoint || !this.config.model) {
throw new Error('Missing embeddings API configuration');
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
const model = process.env.AI_EMBEDDINGS_MODEL;
if (!endpoint || !model) {
const missing: string[] = [];
if (!endpoint) missing.push('AI_EMBEDDINGS_ENDPOINT');
if (!model) missing.push('AI_EMBEDDINGS_MODEL');
throw new Error(`Missing embeddings API configuration: ${missing.join(', ')}`);
}
const headers: Record<string, string> = {
'Content-Type': 'application/json'
};
if (this.config.apiKey) {
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
}
const response = await fetch(this.config.endpoint, {
const response = await fetch(endpoint, {
method: 'POST',
headers,
body: JSON.stringify({
model: this.config.model,
model,
input: contents
})
});
@ -208,16 +233,11 @@ class EmbeddingsService {
const contents = allItems.map(item => this.createContentString(item));
this.embeddings = [];
console.log(`[EMBEDDINGS-SERVICE] Generating embeddings for ${contents.length} items`);
for (let i = 0; i < contents.length; i += this.config.batchSize) {
const batch = contents.slice(i, i + this.config.batchSize);
const batchItems = allItems.slice(i, i + this.config.batchSize);
for (let i = 0; i < contents.length; i += this.batchSize) {
const batch = contents.slice(i, i + this.batchSize);
const batchItems = allItems.slice(i, i + this.batchSize);
const batchNumber = Math.ceil((i + 1) / this.config.batchSize);
const totalBatches = Math.ceil(contents.length / this.config.batchSize);
console.log(`[EMBEDDINGS-SERVICE] Processing batch ${batchNumber}/${totalBatches}`);
console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
try {
const embeddings = await this.generateEmbeddingsBatch(batch);
@ -240,12 +260,12 @@ class EmbeddingsService {
});
});
if (i + this.config.batchSize < contents.length) {
await new Promise(resolve => setTimeout(resolve, this.config.batchDelay));
if (i + this.batchSize < contents.length) {
await new Promise(resolve => setTimeout(resolve, this.batchDelay));
}
} catch (error) {
console.error(`[EMBEDDINGS-SERVICE] Batch ${batchNumber} failed:`, error);
console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
throw error;
}
}
@ -253,21 +273,18 @@ class EmbeddingsService {
await this.saveEmbeddings(version);
}
async embedText(text: string): Promise<number[]> {
if (!this.isInitialized) {
public async embedText(text: string): Promise<number[]> {
if (!this.enabled || !this.isInitialized) {
throw new Error('Embeddings service not available');
}
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
return embedding;
}
async waitForInitialization(): Promise<void> {
/*if (!this.config.enabled) {
return Promise.resolve();
}*/
if (this.isInitialized) {
await this.checkEnabledStatus();
if (!this.enabled || this.isInitialized) {
return Promise.resolve();
}
@ -279,6 +296,13 @@ class EmbeddingsService {
return this.initialize();
}
async forceRecheckEnvironment(): Promise<void> {
this.enabled = false;
this.isInitialized = false;
await this.checkEnabledStatus();
console.log('[EMBEDDINGS] Environment status re-checked, enabled:', this.enabled);
}
private cosineSimilarity(a: number[], b: number[]): number {
let dotProduct = 0;
let normA = 0;
@ -294,62 +318,145 @@ class EmbeddingsService {
}
async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<SimilarityResult[]> {
/*if (!this.config.enabled) {
console.log('[EMBEDDINGS-SERVICE] Service disabled, returning empty results');
return [];
}*/
if (!this.isInitialized || this.embeddings.length === 0) {
console.log('[EMBEDDINGS-SERVICE] Not initialized or no embeddings available');
if (!this.enabled) {
console.log('[EMBEDDINGS] Service disabled for similarity search');
return [];
}
try {
console.log(`[EMBEDDINGS-SERVICE] Finding similar items for query: "${query}"`);
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
const queryEmbedding = queryEmbeddings[0];
if (this.isInitialized && this.embeddings.length > 0) {
console.log(`[EMBEDDINGS] Using embeddings data for similarity search: ${query}`);
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
const queryEmbedding = queryEmbeddings[0];
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
...item,
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
}));
console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
const topScore = Math.max(...similarities.map(s => s.similarity));
const dynamicThreshold = Math.max(threshold, topScore * 0.85);
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
...item,
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
}));
const results = similarities
.filter(item => item.similarity >= dynamicThreshold)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, maxResults);
const topScore = Math.max(...similarities.map(s => s.similarity));
const dynamicCutOff = Math.max(threshold, topScore * 0.85);
console.log(`[EMBEDDINGS-SERVICE] Found ${results.length} similar items (threshold: ${dynamicThreshold.toFixed(3)})`);
if (results.length > 0) {
console.log('[EMBEDDINGS-SERVICE] Top 5 matches:');
results.slice(0, 5).forEach((item, idx) => {
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
const results = similarities
.filter(item => item.similarity >= dynamicCutOff)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, maxResults);
const orderingValid = results.every((item, index) => {
if (index === 0) return true;
return item.similarity <= results[index - 1].similarity;
});
if (!orderingValid) {
console.error('[EMBEDDINGS] CRITICAL: Similarity ordering is broken!');
results.forEach((item, idx) => {
console.error(` ${idx}: ${item.name} = ${item.similarity.toFixed(4)}`);
});
}
console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
if (results.length > 0) {
console.log('[EMBEDDINGS] Top 10 similarity matches:');
results.slice(0, 10).forEach((item, idx) => {
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
});
const topSimilarity = results[0].similarity;
const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
if (hasHigherSimilarity) {
console.error('[EMBEDDINGS] CRITICAL: Top result is not actually the highest similarity!');
}
}
return results;
} else {
console.log(`[EMBEDDINGS] No embeddings data, using fallback text matching: ${query}`);
const { getToolsData } = await import('./dataService.js');
const toolsData = await getToolsData();
const queryLower = query.toLowerCase();
const queryWords = queryLower.split(/\s+/).filter(w => w.length > 2);
const similarities: SimilarityResult[] = toolsData.tools
.map((tool: any) => {
let similarity = 0;
if (tool.name.toLowerCase().includes(queryLower)) {
similarity += 0.8;
}
if (tool.description && tool.description.toLowerCase().includes(queryLower)) {
similarity += 0.6;
}
if (tool.tags && Array.isArray(tool.tags)) {
const matchingTags = tool.tags.filter((tag: string) =>
tag.toLowerCase().includes(queryLower) || queryLower.includes(tag.toLowerCase())
);
if (tool.tags.length > 0) {
similarity += (matchingTags.length / tool.tags.length) * 0.4;
}
}
const toolText = `${tool.name} ${tool.description || ''} ${(tool.tags || []).join(' ')}`.toLowerCase();
const matchingWords = queryWords.filter(word => toolText.includes(word));
if (queryWords.length > 0) {
similarity += (matchingWords.length / queryWords.length) * 0.3;
}
return {
id: `tool_${tool.name.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase()}`,
type: 'tool' as const,
name: tool.name,
content: toolText,
embedding: [],
metadata: {
domains: tool.domains || [],
phases: tool.phases || [],
tags: tool.tags || [],
skillLevel: tool.skillLevel,
type: tool.type
},
similarity: Math.min(similarity, 1.0)
};
})
.filter(item => item.similarity >= threshold)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, maxResults);
console.log(`[EMBEDDINGS] Fallback found ${similarities.length} similar items`);
return similarities;
}
return results;
} catch (error) {
console.error('[EMBEDDINGS-SERVICE] Similarity search failed:', error);
console.error('[EMBEDDINGS] Failed to find similar items:', error);
return [];
}
}
getStats(): {initialized: boolean; count: number } {
isEnabled(): boolean {
if (!this.enabled && !this.isInitialized) {
this.checkEnabledStatus().catch(console.error);
}
return this.enabled;
}
getStats(): { enabled: boolean; initialized: boolean; count: number } {
return {
enabled: this.enabled,
initialized: this.isInitialized,
count: this.embeddings.length
};
}
getConfig(): EmbeddingsConfig {
return { ...this.config };
}
}
export const embeddingsService = new EmbeddingsService();
const embeddingsService = new EmbeddingsService();
export { embeddingsService, type EmbeddingData, type SimilarityResult };

View File

@ -1,20 +0,0 @@
// src/utils/hashUtils.ts
import { promises as fs } from 'fs';
import path from 'path';
import crypto from 'crypto';
export async function getToolsFileHash(): Promise<string> {
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
const raw = await fs.readFile(file, 'utf8');
return crypto.createHash('sha256').update(raw).digest('hex');
}
export function getToolsFileHashSync(): string | null {
try {
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
const raw = require('fs').readFileSync(file, 'utf8');
return crypto.createHash('sha256').update(raw).digest('hex');
} catch {
return null;
}
}

View File

@ -1,356 +0,0 @@
// src/utils/jsonUtils.ts
export class JSONParser {
static safeParseJSON(jsonString: string, fallback: any = null): any {
try {
let cleaned = jsonString.trim();
const jsonBlockPatterns = [
/```json\s*([\s\S]*?)\s*```/i,
/```\s*([\s\S]*?)\s*```/i,
/\{[\s\S]*\}/,
];
for (const pattern of jsonBlockPatterns) {
const match = cleaned.match(pattern);
if (match) {
cleaned = match[1] || match[0];
break;
}
}
if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
console.warn('[JSON-PARSER] JSON appears truncated, attempting recovery');
cleaned = this.repairTruncatedJSON(cleaned);
}
const parsed = JSON.parse(cleaned);
if (parsed && typeof parsed === 'object') {
if (!parsed.selectedTools) parsed.selectedTools = [];
if (!parsed.selectedConcepts) parsed.selectedConcepts = [];
if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
}
return parsed;
} catch (error) {
console.warn('[JSON-PARSER] JSON parsing failed:', error.message);
return fallback;
}
}
private static repairTruncatedJSON(cleaned: string): string {
let braceCount = 0;
let bracketCount = 0;
let inString = false;
let escaped = false;
let lastCompleteStructure = '';
for (let i = 0; i < cleaned.length; i++) {
const char = cleaned[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\') {
escaped = true;
continue;
}
if (char === '"' && !escaped) {
inString = !inString;
continue;
}
if (!inString) {
if (char === '{') braceCount++;
if (char === '}') braceCount--;
if (char === '[') bracketCount++;
if (char === ']') bracketCount--;
if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
lastCompleteStructure = cleaned.substring(0, i + 1);
}
}
}
if (lastCompleteStructure) {
return lastCompleteStructure;
} else {
if (braceCount > 0) cleaned += '}';
if (bracketCount > 0) cleaned += ']';
return cleaned;
}
}
static extractToolsFromMalformedJSON(jsonString: string): { selectedTools: string[]; selectedConcepts: string[] } {
const selectedTools: string[] = [];
const selectedConcepts: string[] = [];
const toolsMatch = jsonString.match(/"selectedTools"\s*:\s*\[([\s\S]*?)\]/i);
if (toolsMatch) {
const toolMatches = toolsMatch[1].match(/"([^"]+)"/g);
if (toolMatches) {
selectedTools.push(...toolMatches.map(match => match.replace(/"/g, '')));
}
}
const conceptsMatch = jsonString.match(/"selectedConcepts"\s*:\s*\[([\s\S]*?)\]/i);
if (conceptsMatch) {
const conceptMatches = conceptsMatch[1].match(/"([^"]+)"/g);
if (conceptMatches) {
selectedConcepts.push(...conceptMatches.map(match => match.replace(/"/g, '')));
}
}
if (selectedTools.length === 0 && selectedConcepts.length === 0) {
const allMatches = jsonString.match(/"([^"]+)"/g);
if (allMatches) {
const possibleNames = allMatches
.map(match => match.replace(/"/g, ''))
.filter(name =>
name.length > 2 &&
!['selectedTools', 'selectedConcepts', 'reasoning'].includes(name) &&
!name.includes(':') &&
!name.match(/^\d+$/)
)
.slice(0, 15);
selectedTools.push(...possibleNames);
}
}
return { selectedTools, selectedConcepts };
}
static secureParseJSON(jsonString: string, maxSize: number = 10 * 1024 * 1024): any {
if (typeof jsonString !== 'string') {
throw new Error('Input must be a string');
}
if (jsonString.length > maxSize) {
throw new Error(`JSON string too large (${jsonString.length} bytes, max ${maxSize})`);
}
const suspiciousPatterns = [
/<script/i,
/javascript:/i,
/eval\(/i,
/function\s*\(/i,
/__proto__/i,
/constructor/i
];
for (const pattern of suspiciousPatterns) {
if (pattern.test(jsonString)) {
throw new Error('Potentially malicious content detected in JSON');
}
}
try {
const parsed = JSON.parse(jsonString);
if (typeof parsed !== 'object' || parsed === null) {
throw new Error('JSON must be an object');
}
return parsed;
} catch (error) {
if (error instanceof SyntaxError) {
throw new Error(`Invalid JSON syntax: ${error.message}`);
}
throw error;
}
}
static sanitizeForAudit(obj: any, maxDepth: number = 5, currentDepth: number = 0): any {
if (currentDepth >= maxDepth) {
return '[Max depth reached]';
}
if (obj === null || obj === undefined) {
return obj;
}
if (typeof obj === 'string') {
if (obj.length > 500) {
return obj.slice(0, 500) + '...[truncated]';
}
return obj.replace(/<script[\s\S]*?<\/script>/gi, '[script removed]');
}
if (typeof obj === 'number' || typeof obj === 'boolean') {
return obj;
}
if (Array.isArray(obj)) {
if (obj.length > 20) {
return [
...obj.slice(0, 20).map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1)),
`...[${obj.length - 20} more items]`
];
}
return obj.map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1));
}
if (typeof obj === 'object') {
const keys = Object.keys(obj);
if (keys.length > 50) {
const sanitized: any = {};
keys.slice(0, 50).forEach(key => {
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
});
sanitized['[truncated]'] = `${keys.length - 50} more properties`;
return sanitized;
}
const sanitized: any = {};
keys.forEach(key => {
if (['__proto__', 'constructor', 'prototype'].includes(key)) {
return;
}
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
});
return sanitized;
}
return String(obj);
}
static validateAuditExportStructure(data: any): { isValid: boolean; errors: string[] } {
const errors: string[] = [];
if (!data || typeof data !== 'object') {
errors.push('Export data must be an object');
return { isValid: false, errors };
}
const requiredProps = ['metadata', 'recommendation', 'auditTrail'];
for (const prop of requiredProps) {
if (!(prop in data)) {
errors.push(`Missing required property: ${prop}`);
}
}
if (data.metadata && typeof data.metadata === 'object') {
const requiredMetadataProps = ['timestamp', 'version', 'userQuery', 'mode'];
for (const prop of requiredMetadataProps) {
if (!(prop in data.metadata)) {
errors.push(`Missing required metadata property: ${prop}`);
}
}
} else {
errors.push('Invalid metadata structure');
}
if (!Array.isArray(data.auditTrail)) {
errors.push('auditTrail must be an array');
} else {
data.auditTrail.forEach((entry: any, index: number) => {
if (!entry || typeof entry !== 'object') {
errors.push(`Audit entry ${index} is not a valid object`);
return;
}
const requiredEntryProps = ['timestamp', 'phase', 'action', 'confidence', 'processingTimeMs'];
for (const prop of requiredEntryProps) {
if (!(prop in entry)) {
errors.push(`Audit entry ${index} missing required property: ${prop}`);
}
}
});
}
return {
isValid: errors.length === 0,
errors
};
}
static prepareAuditExport(
recommendation: any,
userQuery: string,
mode: string,
auditTrail: any[] = [],
additionalMetadata: any = {}
): any {
return {
metadata: {
timestamp: new Date().toISOString(),
version: "1.0",
userQuery: userQuery.slice(0, 1000),
mode,
exportedBy: 'ForensicPathways',
toolsDataHash: additionalMetadata.toolsDataHash || 'unknown',
aiModel: additionalMetadata.aiModel || 'unknown',
aiParameters: additionalMetadata.aiParameters || {},
processingStats: additionalMetadata.processingStats || {}
},
recommendation: this.sanitizeForAudit(recommendation, 6),
auditTrail: auditTrail.map(entry => this.sanitizeForAudit(entry, 4)),
rawContext: {
selectedTools: additionalMetadata.selectedTools || [],
backgroundKnowledge: additionalMetadata.backgroundKnowledge || [],
contextHistory: additionalMetadata.contextHistory || [],
embeddingsSimilarities: additionalMetadata.embeddingsSimilarities || {}
}
};
}
static validateUploadedAnalysis(data: any): { isValid: boolean; issues: string[]; warnings: string[] } {
const issues: string[] = [];
const warnings: string[] = [];
const structureValidation = this.validateAuditExportStructure(data);
if (!structureValidation.isValid) {
issues.push(...structureValidation.errors);
return { isValid: false, issues, warnings };
}
if (data.metadata) {
const timestamp = new Date(data.metadata.timestamp);
if (isNaN(timestamp.getTime())) {
warnings.push('Invalid timestamp in metadata');
} else {
const age = Date.now() - timestamp.getTime();
const maxAge = 30 * 24 * 60 * 60 * 1000; // 30 days
if (age > maxAge) {
warnings.push(`Analysis is ${Math.floor(age / (24 * 60 * 60 * 1000))} days old`);
}
}
if (!['workflow', 'tool'].includes(data.metadata.mode)) {
warnings.push(`Unknown analysis mode: ${data.metadata.mode}`);
}
}
if (Array.isArray(data.auditTrail)) {
const aiDecisions = data.auditTrail.filter(e => e.action === 'ai-decision').length;
const toolSelections = data.auditTrail.filter(e => e.action === 'selection-decision').length;
if (aiDecisions === 0) {
warnings.push('No AI decisions found in audit trail');
}
if (toolSelections === 0) {
warnings.push('No tool selections found in audit trail');
}
const entriesWithConfidence = data.auditTrail.filter(e => typeof e.confidence === 'number').length;
const confidenceRatio = entriesWithConfidence / data.auditTrail.length;
if (confidenceRatio < 0.8) {
warnings.push(`Only ${Math.round(confidenceRatio * 100)}% of audit entries have confidence scores`);
}
}
return {
isValid: issues.length === 0,
issues,
warnings
};
}
}

22
src/utils/toolHelpers.ts Normal file
View File

@ -0,0 +1,22 @@
// src/utils/toolHelpers.ts
export interface Tool {
name: string;
type?: 'software' | 'method' | 'concept';
projectUrl?: string | null;
license?: string;
knowledgebase?: boolean;
domains?: string[];
phases?: string[];
platforms?: string[];
skillLevel?: string;
description?: string;
tags?: string[];
related_concepts?: string[];
}
export {
createToolSlug,
findToolByIdentifier,
isToolHosted
} from './clientUtils.js';

View File

@ -1,346 +0,0 @@
// src/utils/toolSelector.ts
import { aiService } from './aiService.js';
import { embeddingsService, type SimilarityResult } from './embeddings.js';
import { confidenceScoring } from './confidenceScoring.js';
import { JSONParser } from './jsonUtils.js';
import { getPrompt } from '../config/prompts.js';
import 'dotenv/config';
export interface ToolSelectionConfig {
maxSelectedItems: number;
embeddingCandidates: number;
similarityThreshold: number;
embeddingSelectionLimit: number;
embeddingConceptsLimit: number;
noEmbeddingsToolLimit: number;
noEmbeddingsConceptLimit: number;
embeddingsMinTools: number;
embeddingsMaxReductionRatio: number;
methodSelectionRatio: number;
softwareSelectionRatio: number;
}
export interface SelectionContext {
userQuery: string;
mode: string;
embeddingsSimilarities: Map<string, number>;
seenToolNames: Set<string>;
selectedTools?: Array<{
tool: any;
phase: string;
priority: string;
justification?: string;
taskRelevance?: number;
limitations?: string[];
}>;
}
export interface ToolSelectionResult {
selectedTools: any[];
selectedConcepts: any[];
confidence: number;
}
class ToolSelector {
private config: ToolSelectionConfig;
constructor() {
this.config = {
maxSelectedItems: this.getEnvInt('AI_MAX_SELECTED_ITEMS', 25),
embeddingCandidates: this.getEnvInt('AI_EMBEDDING_CANDIDATES', 50),
similarityThreshold: this.getEnvFloat('AI_SIMILARITY_THRESHOLD', 0.3),
embeddingSelectionLimit: this.getEnvInt('AI_EMBEDDING_SELECTION_LIMIT', 30),
embeddingConceptsLimit: this.getEnvInt('AI_EMBEDDING_CONCEPTS_LIMIT', 15),
noEmbeddingsToolLimit: this.getEnvInt('AI_NO_EMBEDDINGS_TOOL_LIMIT', 25),
noEmbeddingsConceptLimit: this.getEnvInt('AI_NO_EMBEDDINGS_CONCEPT_LIMIT', 10),
embeddingsMinTools: this.getEnvInt('AI_EMBEDDINGS_MIN_TOOLS', 8),
embeddingsMaxReductionRatio: this.getEnvFloat('AI_EMBEDDINGS_MAX_REDUCTION_RATIO', 0.75),
methodSelectionRatio: this.getEnvFloat('AI_METHOD_SELECTION_RATIO', 0.4),
softwareSelectionRatio: this.getEnvFloat('AI_SOFTWARE_SELECTION_RATIO', 0.5)
};
console.log('[TOOL-SELECTOR] Initialized with config:', this.config);
}
private getEnvInt(key: string, defaultValue: number): number {
const value = process.env[key];
return value ? parseInt(value, 10) : defaultValue;
}
private getEnvFloat(key: string, defaultValue: number): number {
const value = process.env[key];
return value ? parseFloat(value) : defaultValue;
}
async getIntelligentCandidates(
userQuery: string,
toolsData: any,
mode: string,
context: SelectionContext
): Promise<{
tools: any[];
concepts: any[];
domains: any[];
phases: any[];
'domain-agnostic-software': any[];
}> {
console.log('[TOOL-SELECTOR] Getting intelligent candidates for query');
let candidateTools: any[] = [];
let candidateConcepts: any[] = [];
context.embeddingsSimilarities.clear();
try {
await embeddingsService.waitForInitialization();
} catch (error) {
console.error('[TOOL-SELECTOR] Embeddings initialization failed:', error);
}
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
const embeddingsSearchStart = Date.now();
const similarItems = await embeddingsService.findSimilar(
userQuery,
this.config.embeddingCandidates,
this.config.similarityThreshold
) as SimilarityResult[];
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
const { auditService } = await import('./auditService.js');
const { getDataVersion } = await import('./dataService.js');
const toolsDataHash = getDataVersion() || 'unknown';
auditService.addEmbeddingsSearch(
userQuery,
similarItems,
this.config.similarityThreshold,
embeddingsSearchStart,
{
toolsDataHash: toolsDataHash,
selectionPhase: 'initial-candidate-selection',
candidateLimit: this.config.embeddingCandidates,
mode: mode,
reasoning: `Initiale semantische Suche für ${mode}-Modus - Reduzierung der ${toolsData.tools.length} verfügbaren Tools auf ${similarItems.length} relevante Kandidaten`
}
);
similarItems.forEach(item => {
context.embeddingsSimilarities.set(item.name, item.similarity);
});
const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept]));
const similarTools = similarItems
.filter((item: any) => item.type === 'tool')
.map((item: any) => toolsMap.get(item.name))
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
const similarConcepts = similarItems
.filter((item: any) => item.type === 'concept')
.map((item: any) => conceptsMap.get(item.name))
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
const totalAvailableTools = toolsData.tools.length;
const reductionRatio = similarTools.length / totalAvailableTools;
if (similarTools.length >= this.config.embeddingsMinTools && reductionRatio <= this.config.embeddingsMaxReductionRatio) {
candidateTools = similarTools;
candidateConcepts = similarConcepts;
console.log('[TOOL-SELECTOR] Using embeddings filtering:', totalAvailableTools, '→', similarTools.length, 'tools');
} else {
console.log('[TOOL-SELECTOR] Embeddings filtering insufficient, using full dataset');
candidateTools = toolsData.tools;
candidateConcepts = toolsData.concepts;
}
const selection = await this.performAISelection(
userQuery,
candidateTools,
candidateConcepts,
mode,
context
);
return {
tools: selection.selectedTools,
concepts: selection.selectedConcepts,
domains: toolsData.domains,
phases: toolsData.phases,
'domain-agnostic-software': toolsData['domain-agnostic-software']
};
}
private async performAISelection(
userQuery: string,
candidateTools: any[],
candidateConcepts: any[],
mode: string,
context: SelectionContext
): Promise<ToolSelectionResult> {
console.log('[TOOL-SELECTOR] Performing AI selection');
const candidateMethods = candidateTools.filter((tool: any) => tool && tool.type === 'method');
const candidateSoftware = candidateTools.filter((tool: any) => tool && tool.type === 'software');
console.log('[TOOL-SELECTOR] Candidates:', candidateMethods.length, 'methods,', candidateSoftware.length, 'software,', candidateConcepts.length, 'concepts');
const methodsWithFullData = candidateMethods.map(this.createToolData);
const softwareWithFullData = candidateSoftware.map(this.createToolData);
const conceptsWithFullData = candidateConcepts.map(this.createConceptData);
const maxTools = Math.min(this.config.embeddingSelectionLimit, this.config.noEmbeddingsToolLimit);
const maxConcepts = Math.min(this.config.embeddingConceptsLimit, this.config.noEmbeddingsConceptLimit);
const methodLimit = Math.ceil(maxTools * this.config.methodSelectionRatio);
const softwareLimit = Math.floor(maxTools * this.config.softwareSelectionRatio);
const toolsToSend: any[] = [
...methodsWithFullData.slice(0, methodLimit),
...softwareWithFullData.slice(0, softwareLimit),
];
const remainingCapacity = maxTools - toolsToSend.length;
if (remainingCapacity > 0) {
const extraMethods = methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity);
const extraSoftware = softwareWithFullData.slice(softwareLimit, softwareLimit + (remainingCapacity - extraMethods.length));
toolsToSend.push(...extraMethods, ...extraSoftware);
}
const conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
console.log('[TOOL-SELECTOR] Sending to AI:',
toolsToSend.filter((t: any) => t.type === 'method').length, 'methods,',
toolsToSend.filter((t: any) => t.type === 'software').length, 'software,',
conceptsToSend.length, 'concepts'
);
try {
const response = await aiService.callAI(prompt);
const result = JSONParser.safeParseJSON(response.content, null);
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
console.error('[TOOL-SELECTOR] AI selection returned invalid structure');
throw new Error('AI selection failed to return valid tool and concept selection');
}
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
if (totalSelected === 0) {
throw new Error('AI selection returned empty selection');
}
const toolsMap = new Map(candidateTools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map(candidateConcepts.map((concept: any) => [concept.name, concept]));
const selectedTools = result.selectedTools
.map((name: string) => toolsMap.get(name))
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
const selectedConcepts = result.selectedConcepts
.map((name: string) => conceptsMap.get(name))
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
const selectedMethods = selectedTools.filter((t: any) => t && t.type === 'method');
const selectedSoftware = selectedTools.filter((t: any) => t && t.type === 'software');
console.log('[TOOL-SELECTOR] AI selected:', selectedMethods.length, 'methods,', selectedSoftware.length, 'software,', selectedConcepts.length, 'concepts');
const confidence = confidenceScoring.calculateSelectionConfidence(
result,
candidateTools.length + candidateConcepts.length
);
return { selectedTools, selectedConcepts, confidence };
} catch (error) {
console.error('[TOOL-SELECTOR] AI selection failed:', error);
throw error;
}
}
async selectToolsForPhase(
userQuery: string,
phase: any,
availableTools: any[],
context: SelectionContext
): Promise<Array<{
toolName: string;
taskRelevance: number;
justification: string;
limitations: string[];
}>> {
console.log('[TOOL-SELECTOR] Selecting tools for phase:', phase.id);
if (availableTools.length === 0) {
console.log('[TOOL-SELECTOR] No tools available for phase:', phase.id);
return [];
}
const prompt = getPrompt('phaseToolSelection', userQuery, phase, availableTools);
try {
const response = await aiService.callMicroTaskAI(prompt);
const selections = JSONParser.safeParseJSON(response.content, []);
if (Array.isArray(selections)) {
const validSelections = selections.filter((sel: any) => {
const matchingTool = availableTools.find((tool: any) => tool && tool.name === sel.toolName);
if (!matchingTool) {
console.warn('[TOOL-SELECTOR] Invalid tool selection for phase:', phase.id, sel.toolName);
}
return !!matchingTool;
});
console.log('[TOOL-SELECTOR] Valid selections for phase:', phase.id, validSelections.length);
return validSelections;
}
return [];
} catch (error) {
console.error('[TOOL-SELECTOR] Phase tool selection failed:', error);
return [];
}
}
private createToolData = (tool: any) => ({
name: tool.name,
type: tool.type,
description: tool.description,
domains: tool.domains,
phases: tool.phases,
platforms: tool.platforms || [],
tags: tool.tags || [],
skillLevel: tool.skillLevel,
license: tool.license,
accessType: tool.accessType,
projectUrl: tool.projectUrl,
knowledgebase: tool.knowledgebase,
related_concepts: tool.related_concepts || [],
related_software: tool.related_software || []
});
private createConceptData = (concept: any) => ({
name: concept.name,
type: 'concept',
description: concept.description,
domains: concept.domains,
phases: concept.phases,
tags: concept.tags || [],
skillLevel: concept.skillLevel,
related_concepts: concept.related_concepts || [],
related_software: concept.related_software || []
});
getConfig(): ToolSelectionConfig {
return { ...this.config };
}
}
export const toolSelector = new ToolSelector();