centalize data model loading

This commit is contained in:
overcuriousity 2025-07-18 23:16:22 +02:00
parent 3877e3a63e
commit a778f5b5f7
8 changed files with 145 additions and 60 deletions

View File

@ -1,12 +1,10 @@
---
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
import { getToolsData } from '../utils/dataService.js';
// Load tools data for validation
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
const yamlContent = await fs.readFile(yamlPath, 'utf8');
const data = load(yamlContent) as any;
const data = await getToolsData();
const tools = data.tools;
const phases = data.phases;
const domainAgnosticSoftware = data['domain-agnostic-software'] || []; // Add this line

View File

@ -1,12 +1,10 @@
---
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
import { getToolsData } from '../utils/dataService.js';
// Load tools data
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
const yamlContent = await fs.readFile(yamlPath, 'utf8');
const data = load(yamlContent) as any;
const data = await getToolsData();
const domains = data.domains;
const phases = data.phases;

View File

@ -1,12 +1,9 @@
---
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
import { getToolsData } from '../utils/dataService.js';
// Load tools data
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
const yamlContent = await fs.readFile(yamlPath, 'utf8');
const data = load(yamlContent) as any;
const data = await getToolsData();
const domains = data.domains;
const phases = data.phases;

View File

@ -2,9 +2,8 @@
// src/pages/api/ai/query.ts
import type { APIRoute } from 'astro';
import { getSessionFromRequest, verifySession } from '../../../utils/auth.js';
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
export const prerender = false;
@ -74,9 +73,7 @@ function checkRateLimit(userId: string): boolean {
// Load tools database
async function loadToolsDatabase() {
try {
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
const yamlContent = await fs.readFile(yamlPath, 'utf8');
return load(yamlContent) as any;
return await getCompressedToolsDataForAI();
} catch (error) {
console.error('Failed to load tools database:', error);
throw new Error('Database unavailable');
@ -90,11 +87,11 @@ function createSystemPrompt(toolsData: any): string {
description: tool.description,
domains: tool.domains,
phases: tool.phases,
domainAgnostic: tool['domain-agnostic-software'],
platforms: tool.platforms,
skillLevel: tool.skillLevel,
license: tool.license,
tags: tool.tags,
projectUrl: tool.projectUrl ? 'self-hosted' : 'external'
}));
// Get regular phases (no more filtering needed)
@ -119,16 +116,9 @@ function createSystemPrompt(toolsData: any): string {
).join('\n');
// Build dynamic phase descriptions for tool selection
const phaseDescriptions = regularPhases.map((phase: any) => {
// Create generic descriptions or you could add a 'description' field to the YAML
const descriptions = {
'data-collection': 'Imaging, Acquisition, Remote Collection Tools',
'examination': 'Parsing, Extraction, Initial Analysis Tools',
'analysis': 'Deep Analysis, Correlation, Visualization Tools',
'reporting': 'Documentation, Visualization, Presentation Tools (z.B. QGIS für Geodaten, Timeline-Tools)'
};
return `- ${phase.name}: ${phase.description || descriptions[phase.id] || 'Tools for this phase'}`;
}).join('\n');
const phaseDescriptions = regularPhases.map((phase: any) =>
`- ${phase.name}: ${phase.description || 'Tools for this phase'}`
).join('\n');
// Add domain-agnostic software descriptions
const domainAgnosticDescriptions = domainAgnosticSoftware.map((section: any) =>
@ -153,13 +143,13 @@ FORENSISCHE DOMÄNEN:
${domainsDescription}
WICHTIGE REGELN:
1. Open Source Tools bevorzugen (license != "Proprietary")
2. Pro Phase 1-3 Tools empfehlen (immer mindestens 1 wenn verfügbar)
3. Tools können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool für jede Phase zu empfehlen!
4. Für Reporting-Phase: Visualisierungs- und Dokumentationstools einschließen
5. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug.
6. Deutsche Antworten für deutsche Anfragen, English for English queries
7. Bewerbe NIEMALS Proprietäre Software fälschlicherweise als Open-Source-Tools, erkenne aber an, falls diese besser geeignet sein könnte.
1. Pro Phase 1-3 Tools empfehlen (immer mindestens 1 wenn verfügbar)
2. Tools können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool für jede Phase zu empfehlen, selbst wenn die Priorität "low" ist.
3. Für Reporting-Phase: Visualisierungs- und Dokumentationstools einschließen
4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug.
5. Deutsche Antworten für deutsche Anfragen, English for English queries
6. Bewerbe NIEMALS Proprietäre Software fälschlicherweise als Open-Source-Tools, erkenne aber an, falls diese besser geeignet sein könnte.
7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an wenn ein proprietäres Tool besser geeignet ist.
TOOL-AUSWAHL NACH PHASE:
${phaseDescriptions}

View File

@ -4,14 +4,11 @@ import ToolCard from '../components/ToolCard.astro';
import ToolFilters from '../components/ToolFilters.astro';
import ToolMatrix from '../components/ToolMatrix.astro';
import AIQueryInterface from '../components/AIQueryInterface.astro';
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
import { getToolsData } from '../utils/dataService.js';
// Load tools data
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
const yamlContent = await fs.readFile(yamlPath, 'utf8');
const data = load(yamlContent) as any;
const data = await getToolsData();
const tools = data.tools;
---

View File

@ -1,13 +1,10 @@
---
import BaseLayout from '../layouts/BaseLayout.astro';
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
import { getToolsData } from '../utils/dataService.js';
// Load tools data
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
const yamlContent = await fs.readFile(yamlPath, 'utf8');
const data = load(yamlContent) as any;
const data = await getToolsData();
// Filter tools that have knowledgebase entries
const knowledgebaseTools = data.tools.filter((tool: any) => tool.knowledgebase === true);

View File

@ -1,13 +1,10 @@
---
import BaseLayout from '../layouts/BaseLayout.astro';
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
import { getToolsData } from '../utils/dataService.js';
// Load tools data to get self-hosted services
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
const yamlContent = await fs.readFile(yamlPath, 'utf8');
const data = load(yamlContent) as any;
const data = await getToolsData();
// Filter for hosted services based on projectUrl presence
const hostedServices = data.tools.filter((tool: any) => {

111
src/utils/dataService.ts Normal file
View File

@ -0,0 +1,111 @@
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
interface ToolsData {
tools: any[];
domains: any[];
phases: any[];
'domain-agnostic-software': any[];
}
interface CompressedToolsData extends Omit<ToolsData, 'tools'> {
tools: any[];
}
let cachedData: ToolsData | null = null;
let cachedRandomizedData: ToolsData | null = null;
let cachedCompressedData: CompressedToolsData | null = null;
let lastRandomizationDate: string | null = null;
// Create a seeded random number generator
function seededRandom(seed: number): () => number {
let x = Math.sin(seed) * 10000;
return function() {
x = Math.sin(x) * 10000;
return x - Math.floor(x);
};
}
// Get today's date as seed + process start time for consistency within day/session
function getDailySeed(): number {
const today = new Date().toDateString();
const processStart = process.uptime();
return today.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0) + Math.floor(processStart);
}
// Fisher-Yates shuffle with seeded random
function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
const shuffled = [...array];
for (let i = shuffled.length - 1; i > 0; i--) {
const j = Math.floor(randomFn() * (i + 1));
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
}
return shuffled;
}
// Load raw data from YAML
async function loadRawData(): Promise<ToolsData> {
if (!cachedData) {
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
const yamlContent = await fs.readFile(yamlPath, 'utf8');
cachedData = load(yamlContent) as ToolsData;
}
return cachedData;
}
// Get tools data with randomized tool order (daily seed)
export async function getToolsData(): Promise<ToolsData> {
const today = new Date().toDateString();
// Check if we need to re-randomize (new day or first load)
if (!cachedRandomizedData || lastRandomizationDate !== today) {
const rawData = await loadRawData();
const seed = getDailySeed();
const randomFn = seededRandom(seed);
// Randomize tools array while keeping other data intact
const randomizedTools = shuffleArray(rawData.tools, randomFn);
cachedRandomizedData = {
...rawData,
tools: randomizedTools
};
lastRandomizationDate = today;
// Clear compressed cache when we re-randomize
cachedCompressedData = null;
}
return cachedRandomizedData;
}
// Get compressed data for AI (removes projectUrl and statusUrl)
export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData> {
if (!cachedCompressedData) {
const data = await getToolsData();
const compressedTools = data.tools.map(tool => {
const { projectUrl, statusUrl, ...compressedTool } = tool;
return compressedTool;
});
cachedCompressedData = {
tools: compressedTools,
domains: data.domains,
phases: data.phases,
'domain-agnostic-software': data['domain-agnostic-software']
};
}
return cachedCompressedData;
}
// Force cache refresh (useful for development)
export function clearCache(): void {
cachedData = null;
cachedRandomizedData = null;
cachedCompressedData = null;
lastRandomizationDate = null;
}