forensic-pathways/src/utils/jsonUtils.ts
2025-08-17 11:45:53 +02:00

356 lines
11 KiB
TypeScript

// src/utils/jsonUtils.ts
export class JSONParser {
static safeParseJSON(jsonString: string, fallback: any = null): any {
try {
let cleaned = jsonString.trim();
const jsonBlockPatterns = [
/```json\s*([\s\S]*?)\s*```/i,
/```\s*([\s\S]*?)\s*```/i,
/\{[\s\S]*\}/,
];
for (const pattern of jsonBlockPatterns) {
const match = cleaned.match(pattern);
if (match) {
cleaned = match[1] || match[0];
break;
}
}
if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
console.warn('[JSON-PARSER] JSON appears truncated, attempting recovery');
cleaned = this.repairTruncatedJSON(cleaned);
}
const parsed = JSON.parse(cleaned);
if (parsed && typeof parsed === 'object') {
if (!parsed.selectedTools) parsed.selectedTools = [];
if (!parsed.selectedConcepts) parsed.selectedConcepts = [];
if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
}
return parsed;
} catch (error) {
console.warn('[JSON-PARSER] JSON parsing failed:', error.message);
return fallback;
}
}
private static repairTruncatedJSON(cleaned: string): string {
let braceCount = 0;
let bracketCount = 0;
let inString = false;
let escaped = false;
let lastCompleteStructure = '';
for (let i = 0; i < cleaned.length; i++) {
const char = cleaned[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\') {
escaped = true;
continue;
}
if (char === '"' && !escaped) {
inString = !inString;
continue;
}
if (!inString) {
if (char === '{') braceCount++;
if (char === '}') braceCount--;
if (char === '[') bracketCount++;
if (char === ']') bracketCount--;
if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
lastCompleteStructure = cleaned.substring(0, i + 1);
}
}
}
if (lastCompleteStructure) {
return lastCompleteStructure;
} else {
if (braceCount > 0) cleaned += '}';
if (bracketCount > 0) cleaned += ']';
return cleaned;
}
}
static extractToolsFromMalformedJSON(jsonString: string): { selectedTools: string[]; selectedConcepts: string[] } {
const selectedTools: string[] = [];
const selectedConcepts: string[] = [];
const toolsMatch = jsonString.match(/"selectedTools"\s*:\s*\[([\s\S]*?)\]/i);
if (toolsMatch) {
const toolMatches = toolsMatch[1].match(/"([^"]+)"/g);
if (toolMatches) {
selectedTools.push(...toolMatches.map(match => match.replace(/"/g, '')));
}
}
const conceptsMatch = jsonString.match(/"selectedConcepts"\s*:\s*\[([\s\S]*?)\]/i);
if (conceptsMatch) {
const conceptMatches = conceptsMatch[1].match(/"([^"]+)"/g);
if (conceptMatches) {
selectedConcepts.push(...conceptMatches.map(match => match.replace(/"/g, '')));
}
}
if (selectedTools.length === 0 && selectedConcepts.length === 0) {
const allMatches = jsonString.match(/"([^"]+)"/g);
if (allMatches) {
const possibleNames = allMatches
.map(match => match.replace(/"/g, ''))
.filter(name =>
name.length > 2 &&
!['selectedTools', 'selectedConcepts', 'reasoning'].includes(name) &&
!name.includes(':') &&
!name.match(/^\d+$/)
)
.slice(0, 15);
selectedTools.push(...possibleNames);
}
}
return { selectedTools, selectedConcepts };
}
static secureParseJSON(jsonString: string, maxSize: number = 10 * 1024 * 1024): any {
if (typeof jsonString !== 'string') {
throw new Error('Input must be a string');
}
if (jsonString.length > maxSize) {
throw new Error(`JSON string too large (${jsonString.length} bytes, max ${maxSize})`);
}
const suspiciousPatterns = [
/<script/i,
/javascript:/i,
/eval\(/i,
/function\s*\(/i,
/__proto__/i,
/constructor/i
];
for (const pattern of suspiciousPatterns) {
if (pattern.test(jsonString)) {
throw new Error('Potentially malicious content detected in JSON');
}
}
try {
const parsed = JSON.parse(jsonString);
if (typeof parsed !== 'object' || parsed === null) {
throw new Error('JSON must be an object');
}
return parsed;
} catch (error) {
if (error instanceof SyntaxError) {
throw new Error(`Invalid JSON syntax: ${error.message}`);
}
throw error;
}
}
static sanitizeForAudit(obj: any, maxDepth: number = 5, currentDepth: number = 0): any {
if (currentDepth >= maxDepth) {
return '[Max depth reached]';
}
if (obj === null || obj === undefined) {
return obj;
}
if (typeof obj === 'string') {
if (obj.length > 500) {
return obj.slice(0, 500) + '...[truncated]';
}
return obj.replace(/<script[\s\S]*?<\/script>/gi, '[script removed]');
}
if (typeof obj === 'number' || typeof obj === 'boolean') {
return obj;
}
if (Array.isArray(obj)) {
if (obj.length > 20) {
return [
...obj.slice(0, 20).map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1)),
`...[${obj.length - 20} more items]`
];
}
return obj.map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1));
}
if (typeof obj === 'object') {
const keys = Object.keys(obj);
if (keys.length > 50) {
const sanitized: any = {};
keys.slice(0, 50).forEach(key => {
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
});
sanitized['[truncated]'] = `${keys.length - 50} more properties`;
return sanitized;
}
const sanitized: any = {};
keys.forEach(key => {
if (['__proto__', 'constructor', 'prototype'].includes(key)) {
return;
}
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
});
return sanitized;
}
return String(obj);
}
static validateAuditExportStructure(data: any): { isValid: boolean; errors: string[] } {
const errors: string[] = [];
if (!data || typeof data !== 'object') {
errors.push('Export data must be an object');
return { isValid: false, errors };
}
const requiredProps = ['metadata', 'recommendation', 'auditTrail'];
for (const prop of requiredProps) {
if (!(prop in data)) {
errors.push(`Missing required property: ${prop}`);
}
}
if (data.metadata && typeof data.metadata === 'object') {
const requiredMetadataProps = ['timestamp', 'version', 'userQuery', 'mode'];
for (const prop of requiredMetadataProps) {
if (!(prop in data.metadata)) {
errors.push(`Missing required metadata property: ${prop}`);
}
}
} else {
errors.push('Invalid metadata structure');
}
if (!Array.isArray(data.auditTrail)) {
errors.push('auditTrail must be an array');
} else {
data.auditTrail.forEach((entry: any, index: number) => {
if (!entry || typeof entry !== 'object') {
errors.push(`Audit entry ${index} is not a valid object`);
return;
}
const requiredEntryProps = ['timestamp', 'phase', 'action', 'confidence', 'processingTimeMs'];
for (const prop of requiredEntryProps) {
if (!(prop in entry)) {
errors.push(`Audit entry ${index} missing required property: ${prop}`);
}
}
});
}
return {
isValid: errors.length === 0,
errors
};
}
static prepareAuditExport(
recommendation: any,
userQuery: string,
mode: string,
auditTrail: any[] = [],
additionalMetadata: any = {}
): any {
return {
metadata: {
timestamp: new Date().toISOString(),
version: "1.0",
userQuery: userQuery.slice(0, 1000),
mode,
exportedBy: 'ForensicPathways',
toolsDataHash: additionalMetadata.toolsDataHash || 'unknown',
aiModel: additionalMetadata.aiModel || 'unknown',
aiParameters: additionalMetadata.aiParameters || {},
processingStats: additionalMetadata.processingStats || {}
},
recommendation: this.sanitizeForAudit(recommendation, 6),
auditTrail: auditTrail.map(entry => this.sanitizeForAudit(entry, 4)),
rawContext: {
selectedTools: additionalMetadata.selectedTools || [],
backgroundKnowledge: additionalMetadata.backgroundKnowledge || [],
contextHistory: additionalMetadata.contextHistory || [],
embeddingsSimilarities: additionalMetadata.embeddingsSimilarities || {}
}
};
}
static validateUploadedAnalysis(data: any): { isValid: boolean; issues: string[]; warnings: string[] } {
const issues: string[] = [];
const warnings: string[] = [];
const structureValidation = this.validateAuditExportStructure(data);
if (!structureValidation.isValid) {
issues.push(...structureValidation.errors);
return { isValid: false, issues, warnings };
}
if (data.metadata) {
const timestamp = new Date(data.metadata.timestamp);
if (isNaN(timestamp.getTime())) {
warnings.push('Invalid timestamp in metadata');
} else {
const age = Date.now() - timestamp.getTime();
const maxAge = 30 * 24 * 60 * 60 * 1000; // 30 days
if (age > maxAge) {
warnings.push(`Analysis is ${Math.floor(age / (24 * 60 * 60 * 1000))} days old`);
}
}
if (!['workflow', 'tool'].includes(data.metadata.mode)) {
warnings.push(`Unknown analysis mode: ${data.metadata.mode}`);
}
}
if (Array.isArray(data.auditTrail)) {
const aiDecisions = data.auditTrail.filter(e => e.action === 'ai-decision').length;
const toolSelections = data.auditTrail.filter(e => e.action === 'selection-decision').length;
if (aiDecisions === 0) {
warnings.push('No AI decisions found in audit trail');
}
if (toolSelections === 0) {
warnings.push('No tool selections found in audit trail');
}
const entriesWithConfidence = data.auditTrail.filter(e => typeof e.confidence === 'number').length;
const confidenceRatio = entriesWithConfidence / data.auditTrail.length;
if (confidenceRatio < 0.8) {
warnings.push(`Only ${Math.round(confidenceRatio * 100)}% of audit entries have confidence scores`);
}
}
return {
isValid: issues.length === 0,
issues,
warnings
};
}
}