From a778f5b5f79066cebcd4760ec1682cde8d098e26 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 18 Jul 2025 23:16:22 +0200 Subject: [PATCH] centalize data model loading --- src/components/AIQueryInterface.astro | 10 +-- src/components/ToolFilters.astro | 10 +-- src/components/ToolMatrix.astro | 9 +-- src/pages/api/ai/query.ts | 38 ++++----- src/pages/index.astro | 9 +-- src/pages/knowledgebase.astro | 9 +-- src/pages/status.astro | 9 +-- src/utils/dataService.ts | 111 ++++++++++++++++++++++++++ 8 files changed, 145 insertions(+), 60 deletions(-) create mode 100644 src/utils/dataService.ts diff --git a/src/components/AIQueryInterface.astro b/src/components/AIQueryInterface.astro index d70b6f5..0dc2df5 100644 --- a/src/components/AIQueryInterface.astro +++ b/src/components/AIQueryInterface.astro @@ -1,12 +1,10 @@ --- -import { promises as fs } from 'fs'; -import { load } from 'js-yaml'; -import path from 'path'; +import { getToolsData } from '../utils/dataService.js'; + // Load tools data for validation -const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml'); -const yamlContent = await fs.readFile(yamlPath, 'utf8'); -const data = load(yamlContent) as any; +const data = await getToolsData(); + const tools = data.tools; const phases = data.phases; const domainAgnosticSoftware = data['domain-agnostic-software'] || []; // Add this line diff --git a/src/components/ToolFilters.astro b/src/components/ToolFilters.astro index b809fa4..c26ecaf 100644 --- a/src/components/ToolFilters.astro +++ b/src/components/ToolFilters.astro @@ -1,12 +1,10 @@ --- -import { promises as fs } from 'fs'; -import { load } from 'js-yaml'; -import path from 'path'; +import { getToolsData } from '../utils/dataService.js'; + // Load tools data -const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml'); -const yamlContent = await fs.readFile(yamlPath, 'utf8'); -const data = load(yamlContent) as any; +const data = await getToolsData(); + const domains = data.domains; const phases = data.phases; diff --git a/src/components/ToolMatrix.astro b/src/components/ToolMatrix.astro index b433c51..44986f3 100644 --- a/src/components/ToolMatrix.astro +++ b/src/components/ToolMatrix.astro @@ -1,12 +1,9 @@ --- -import { promises as fs } from 'fs'; -import { load } from 'js-yaml'; -import path from 'path'; +import { getToolsData } from '../utils/dataService.js'; + // Load tools data -const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml'); -const yamlContent = await fs.readFile(yamlPath, 'utf8'); -const data = load(yamlContent) as any; +const data = await getToolsData(); const domains = data.domains; const phases = data.phases; diff --git a/src/pages/api/ai/query.ts b/src/pages/api/ai/query.ts index 42cfe07..f4b3a89 100644 --- a/src/pages/api/ai/query.ts +++ b/src/pages/api/ai/query.ts @@ -2,9 +2,8 @@ // src/pages/api/ai/query.ts import type { APIRoute } from 'astro'; import { getSessionFromRequest, verifySession } from '../../../utils/auth.js'; -import { promises as fs } from 'fs'; -import { load } from 'js-yaml'; -import path from 'path'; +import { getCompressedToolsDataForAI } from '../../../utils/dataService.js'; + export const prerender = false; @@ -74,9 +73,7 @@ function checkRateLimit(userId: string): boolean { // Load tools database async function loadToolsDatabase() { try { - const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml'); - const yamlContent = await fs.readFile(yamlPath, 'utf8'); - return load(yamlContent) as any; + return await getCompressedToolsDataForAI(); } catch (error) { console.error('Failed to load tools database:', error); throw new Error('Database unavailable'); @@ -90,11 +87,11 @@ function createSystemPrompt(toolsData: any): string { description: tool.description, domains: tool.domains, phases: tool.phases, + domainAgnostic: tool['domain-agnostic-software'], platforms: tool.platforms, skillLevel: tool.skillLevel, license: tool.license, tags: tool.tags, - projectUrl: tool.projectUrl ? 'self-hosted' : 'external' })); // Get regular phases (no more filtering needed) @@ -119,16 +116,9 @@ function createSystemPrompt(toolsData: any): string { ).join('\n'); // Build dynamic phase descriptions for tool selection - const phaseDescriptions = regularPhases.map((phase: any) => { - // Create generic descriptions or you could add a 'description' field to the YAML - const descriptions = { - 'data-collection': 'Imaging, Acquisition, Remote Collection Tools', - 'examination': 'Parsing, Extraction, Initial Analysis Tools', - 'analysis': 'Deep Analysis, Correlation, Visualization Tools', - 'reporting': 'Documentation, Visualization, Presentation Tools (z.B. QGIS für Geodaten, Timeline-Tools)' - }; - return `- ${phase.name}: ${phase.description || descriptions[phase.id] || 'Tools for this phase'}`; - }).join('\n'); + const phaseDescriptions = regularPhases.map((phase: any) => + `- ${phase.name}: ${phase.description || 'Tools for this phase'}` + ).join('\n'); // Add domain-agnostic software descriptions const domainAgnosticDescriptions = domainAgnosticSoftware.map((section: any) => @@ -153,13 +143,13 @@ FORENSISCHE DOMÄNEN: ${domainsDescription} WICHTIGE REGELN: -1. Open Source Tools bevorzugen (license != "Proprietary") -2. Pro Phase 1-3 Tools empfehlen (immer mindestens 1 wenn verfügbar) -3. Tools können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool für jede Phase zu empfehlen! -4. Für Reporting-Phase: Visualisierungs- und Dokumentationstools einschließen -5. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug. -6. Deutsche Antworten für deutsche Anfragen, English for English queries -7. Bewerbe NIEMALS Proprietäre Software fälschlicherweise als Open-Source-Tools, erkenne aber an, falls diese besser geeignet sein könnte. +1. Pro Phase 1-3 Tools empfehlen (immer mindestens 1 wenn verfügbar) +2. Tools können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool für jede Phase zu empfehlen, selbst wenn die Priorität "low" ist. +3. Für Reporting-Phase: Visualisierungs- und Dokumentationstools einschließen +4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug. +5. Deutsche Antworten für deutsche Anfragen, English for English queries +6. Bewerbe NIEMALS Proprietäre Software fälschlicherweise als Open-Source-Tools, erkenne aber an, falls diese besser geeignet sein könnte. +7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an wenn ein proprietäres Tool besser geeignet ist. TOOL-AUSWAHL NACH PHASE: ${phaseDescriptions} diff --git a/src/pages/index.astro b/src/pages/index.astro index af2c752..fa01370 100644 --- a/src/pages/index.astro +++ b/src/pages/index.astro @@ -4,14 +4,11 @@ import ToolCard from '../components/ToolCard.astro'; import ToolFilters from '../components/ToolFilters.astro'; import ToolMatrix from '../components/ToolMatrix.astro'; import AIQueryInterface from '../components/AIQueryInterface.astro'; -import { promises as fs } from 'fs'; -import { load } from 'js-yaml'; -import path from 'path'; +import { getToolsData } from '../utils/dataService.js'; + // Load tools data -const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml'); -const yamlContent = await fs.readFile(yamlPath, 'utf8'); -const data = load(yamlContent) as any; +const data = await getToolsData(); const tools = data.tools; --- diff --git a/src/pages/knowledgebase.astro b/src/pages/knowledgebase.astro index 9aaa97a..e08cc4b 100644 --- a/src/pages/knowledgebase.astro +++ b/src/pages/knowledgebase.astro @@ -1,13 +1,10 @@ --- import BaseLayout from '../layouts/BaseLayout.astro'; -import { promises as fs } from 'fs'; -import { load } from 'js-yaml'; -import path from 'path'; +import { getToolsData } from '../utils/dataService.js'; + // Load tools data -const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml'); -const yamlContent = await fs.readFile(yamlPath, 'utf8'); -const data = load(yamlContent) as any; +const data = await getToolsData(); // Filter tools that have knowledgebase entries const knowledgebaseTools = data.tools.filter((tool: any) => tool.knowledgebase === true); diff --git a/src/pages/status.astro b/src/pages/status.astro index 1076cac..035c23b 100644 --- a/src/pages/status.astro +++ b/src/pages/status.astro @@ -1,13 +1,10 @@ --- import BaseLayout from '../layouts/BaseLayout.astro'; -import { promises as fs } from 'fs'; -import { load } from 'js-yaml'; -import path from 'path'; +import { getToolsData } from '../utils/dataService.js'; + // Load tools data to get self-hosted services -const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml'); -const yamlContent = await fs.readFile(yamlPath, 'utf8'); -const data = load(yamlContent) as any; +const data = await getToolsData(); // Filter for hosted services based on projectUrl presence const hostedServices = data.tools.filter((tool: any) => { diff --git a/src/utils/dataService.ts b/src/utils/dataService.ts new file mode 100644 index 0000000..c3132de --- /dev/null +++ b/src/utils/dataService.ts @@ -0,0 +1,111 @@ +import { promises as fs } from 'fs'; +import { load } from 'js-yaml'; +import path from 'path'; + +interface ToolsData { + tools: any[]; + domains: any[]; + phases: any[]; + 'domain-agnostic-software': any[]; +} + +interface CompressedToolsData extends Omit { + tools: any[]; +} + +let cachedData: ToolsData | null = null; +let cachedRandomizedData: ToolsData | null = null; +let cachedCompressedData: CompressedToolsData | null = null; +let lastRandomizationDate: string | null = null; + +// Create a seeded random number generator +function seededRandom(seed: number): () => number { + let x = Math.sin(seed) * 10000; + return function() { + x = Math.sin(x) * 10000; + return x - Math.floor(x); + }; +} + +// Get today's date as seed + process start time for consistency within day/session +function getDailySeed(): number { + const today = new Date().toDateString(); + const processStart = process.uptime(); + return today.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0) + Math.floor(processStart); +} + +// Fisher-Yates shuffle with seeded random +function shuffleArray(array: T[], randomFn: () => number): T[] { + const shuffled = [...array]; + for (let i = shuffled.length - 1; i > 0; i--) { + const j = Math.floor(randomFn() * (i + 1)); + [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]]; + } + return shuffled; +} + +// Load raw data from YAML +async function loadRawData(): Promise { + if (!cachedData) { + const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml'); + const yamlContent = await fs.readFile(yamlPath, 'utf8'); + cachedData = load(yamlContent) as ToolsData; + } + return cachedData; +} + +// Get tools data with randomized tool order (daily seed) +export async function getToolsData(): Promise { + const today = new Date().toDateString(); + + // Check if we need to re-randomize (new day or first load) + if (!cachedRandomizedData || lastRandomizationDate !== today) { + const rawData = await loadRawData(); + const seed = getDailySeed(); + const randomFn = seededRandom(seed); + + // Randomize tools array while keeping other data intact + const randomizedTools = shuffleArray(rawData.tools, randomFn); + + cachedRandomizedData = { + ...rawData, + tools: randomizedTools + }; + + lastRandomizationDate = today; + + // Clear compressed cache when we re-randomize + cachedCompressedData = null; + } + + return cachedRandomizedData; +} + +// Get compressed data for AI (removes projectUrl and statusUrl) +export async function getCompressedToolsDataForAI(): Promise { + if (!cachedCompressedData) { + const data = await getToolsData(); + + const compressedTools = data.tools.map(tool => { + const { projectUrl, statusUrl, ...compressedTool } = tool; + return compressedTool; + }); + + cachedCompressedData = { + tools: compressedTools, + domains: data.domains, + phases: data.phases, + 'domain-agnostic-software': data['domain-agnostic-software'] + }; + } + + return cachedCompressedData; +} + +// Force cache refresh (useful for development) +export function clearCache(): void { + cachedData = null; + cachedRandomizedData = null; + cachedCompressedData = null; + lastRandomizationDate = null; +} \ No newline at end of file