356 lines
11 KiB
TypeScript
356 lines
11 KiB
TypeScript
// src/utils/jsonUtils.ts
|
|
export class JSONParser {
|
|
static safeParseJSON(jsonString: string, fallback: any = null): any {
|
|
try {
|
|
let cleaned = jsonString.trim();
|
|
|
|
const jsonBlockPatterns = [
|
|
/```json\s*([\s\S]*?)\s*```/i,
|
|
/```\s*([\s\S]*?)\s*```/i,
|
|
/\{[\s\S]*\}/,
|
|
];
|
|
|
|
for (const pattern of jsonBlockPatterns) {
|
|
const match = cleaned.match(pattern);
|
|
if (match) {
|
|
cleaned = match[1] || match[0];
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
|
|
console.warn('[JSON-PARSER] JSON appears truncated, attempting recovery');
|
|
cleaned = this.repairTruncatedJSON(cleaned);
|
|
}
|
|
|
|
const parsed = JSON.parse(cleaned);
|
|
|
|
if (parsed && typeof parsed === 'object') {
|
|
if (!parsed.selectedTools) parsed.selectedTools = [];
|
|
if (!parsed.selectedConcepts) parsed.selectedConcepts = [];
|
|
if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
|
|
if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
|
|
}
|
|
|
|
return parsed;
|
|
|
|
} catch (error) {
|
|
console.warn('[JSON-PARSER] JSON parsing failed:', error.message);
|
|
return fallback;
|
|
}
|
|
}
|
|
|
|
private static repairTruncatedJSON(cleaned: string): string {
|
|
let braceCount = 0;
|
|
let bracketCount = 0;
|
|
let inString = false;
|
|
let escaped = false;
|
|
let lastCompleteStructure = '';
|
|
|
|
for (let i = 0; i < cleaned.length; i++) {
|
|
const char = cleaned[i];
|
|
|
|
if (escaped) {
|
|
escaped = false;
|
|
continue;
|
|
}
|
|
|
|
if (char === '\\') {
|
|
escaped = true;
|
|
continue;
|
|
}
|
|
|
|
if (char === '"' && !escaped) {
|
|
inString = !inString;
|
|
continue;
|
|
}
|
|
|
|
if (!inString) {
|
|
if (char === '{') braceCount++;
|
|
if (char === '}') braceCount--;
|
|
if (char === '[') bracketCount++;
|
|
if (char === ']') bracketCount--;
|
|
|
|
if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
|
|
lastCompleteStructure = cleaned.substring(0, i + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (lastCompleteStructure) {
|
|
return lastCompleteStructure;
|
|
} else {
|
|
if (braceCount > 0) cleaned += '}';
|
|
if (bracketCount > 0) cleaned += ']';
|
|
return cleaned;
|
|
}
|
|
}
|
|
|
|
static extractToolsFromMalformedJSON(jsonString: string): { selectedTools: string[]; selectedConcepts: string[] } {
|
|
const selectedTools: string[] = [];
|
|
const selectedConcepts: string[] = [];
|
|
|
|
const toolsMatch = jsonString.match(/"selectedTools"\s*:\s*\[([\s\S]*?)\]/i);
|
|
if (toolsMatch) {
|
|
const toolMatches = toolsMatch[1].match(/"([^"]+)"/g);
|
|
if (toolMatches) {
|
|
selectedTools.push(...toolMatches.map(match => match.replace(/"/g, '')));
|
|
}
|
|
}
|
|
|
|
const conceptsMatch = jsonString.match(/"selectedConcepts"\s*:\s*\[([\s\S]*?)\]/i);
|
|
if (conceptsMatch) {
|
|
const conceptMatches = conceptsMatch[1].match(/"([^"]+)"/g);
|
|
if (conceptMatches) {
|
|
selectedConcepts.push(...conceptMatches.map(match => match.replace(/"/g, '')));
|
|
}
|
|
}
|
|
|
|
if (selectedTools.length === 0 && selectedConcepts.length === 0) {
|
|
const allMatches = jsonString.match(/"([^"]+)"/g);
|
|
if (allMatches) {
|
|
const possibleNames = allMatches
|
|
.map(match => match.replace(/"/g, ''))
|
|
.filter(name =>
|
|
name.length > 2 &&
|
|
!['selectedTools', 'selectedConcepts', 'reasoning'].includes(name) &&
|
|
!name.includes(':') &&
|
|
!name.match(/^\d+$/)
|
|
)
|
|
.slice(0, 15);
|
|
|
|
selectedTools.push(...possibleNames);
|
|
}
|
|
}
|
|
|
|
return { selectedTools, selectedConcepts };
|
|
}
|
|
|
|
static secureParseJSON(jsonString: string, maxSize: number = 10 * 1024 * 1024): any {
|
|
if (typeof jsonString !== 'string') {
|
|
throw new Error('Input must be a string');
|
|
}
|
|
|
|
if (jsonString.length > maxSize) {
|
|
throw new Error(`JSON string too large (${jsonString.length} bytes, max ${maxSize})`);
|
|
}
|
|
|
|
const suspiciousPatterns = [
|
|
/<script/i,
|
|
/javascript:/i,
|
|
/eval\(/i,
|
|
/function\s*\(/i,
|
|
/__proto__/i,
|
|
/constructor/i
|
|
];
|
|
|
|
for (const pattern of suspiciousPatterns) {
|
|
if (pattern.test(jsonString)) {
|
|
throw new Error('Potentially malicious content detected in JSON');
|
|
}
|
|
}
|
|
|
|
try {
|
|
const parsed = JSON.parse(jsonString);
|
|
|
|
if (typeof parsed !== 'object' || parsed === null) {
|
|
throw new Error('JSON must be an object');
|
|
}
|
|
|
|
return parsed;
|
|
} catch (error) {
|
|
if (error instanceof SyntaxError) {
|
|
throw new Error(`Invalid JSON syntax: ${error.message}`);
|
|
}
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
static sanitizeForAudit(obj: any, maxDepth: number = 5, currentDepth: number = 0): any {
|
|
if (currentDepth >= maxDepth) {
|
|
return '[Max depth reached]';
|
|
}
|
|
|
|
if (obj === null || obj === undefined) {
|
|
return obj;
|
|
}
|
|
|
|
if (typeof obj === 'string') {
|
|
if (obj.length > 500) {
|
|
return obj.slice(0, 500) + '...[truncated]';
|
|
}
|
|
return obj.replace(/<script[\s\S]*?<\/script>/gi, '[script removed]');
|
|
}
|
|
|
|
if (typeof obj === 'number' || typeof obj === 'boolean') {
|
|
return obj;
|
|
}
|
|
|
|
if (Array.isArray(obj)) {
|
|
if (obj.length > 20) {
|
|
return [
|
|
...obj.slice(0, 20).map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1)),
|
|
`...[${obj.length - 20} more items]`
|
|
];
|
|
}
|
|
return obj.map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1));
|
|
}
|
|
|
|
if (typeof obj === 'object') {
|
|
const keys = Object.keys(obj);
|
|
if (keys.length > 50) {
|
|
const sanitized: any = {};
|
|
keys.slice(0, 50).forEach(key => {
|
|
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
|
|
});
|
|
sanitized['[truncated]'] = `${keys.length - 50} more properties`;
|
|
return sanitized;
|
|
}
|
|
|
|
const sanitized: any = {};
|
|
keys.forEach(key => {
|
|
if (['__proto__', 'constructor', 'prototype'].includes(key)) {
|
|
return;
|
|
}
|
|
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
|
|
});
|
|
return sanitized;
|
|
}
|
|
|
|
return String(obj);
|
|
}
|
|
|
|
static validateAuditExportStructure(data: any): { isValid: boolean; errors: string[] } {
|
|
const errors: string[] = [];
|
|
|
|
if (!data || typeof data !== 'object') {
|
|
errors.push('Export data must be an object');
|
|
return { isValid: false, errors };
|
|
}
|
|
|
|
const requiredProps = ['metadata', 'recommendation', 'auditTrail'];
|
|
for (const prop of requiredProps) {
|
|
if (!(prop in data)) {
|
|
errors.push(`Missing required property: ${prop}`);
|
|
}
|
|
}
|
|
|
|
if (data.metadata && typeof data.metadata === 'object') {
|
|
const requiredMetadataProps = ['timestamp', 'version', 'userQuery', 'mode'];
|
|
for (const prop of requiredMetadataProps) {
|
|
if (!(prop in data.metadata)) {
|
|
errors.push(`Missing required metadata property: ${prop}`);
|
|
}
|
|
}
|
|
} else {
|
|
errors.push('Invalid metadata structure');
|
|
}
|
|
|
|
if (!Array.isArray(data.auditTrail)) {
|
|
errors.push('auditTrail must be an array');
|
|
} else {
|
|
data.auditTrail.forEach((entry: any, index: number) => {
|
|
if (!entry || typeof entry !== 'object') {
|
|
errors.push(`Audit entry ${index} is not a valid object`);
|
|
return;
|
|
}
|
|
|
|
const requiredEntryProps = ['timestamp', 'phase', 'action', 'confidence', 'processingTimeMs'];
|
|
for (const prop of requiredEntryProps) {
|
|
if (!(prop in entry)) {
|
|
errors.push(`Audit entry ${index} missing required property: ${prop}`);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
return {
|
|
isValid: errors.length === 0,
|
|
errors
|
|
};
|
|
}
|
|
|
|
static prepareAuditExport(
|
|
recommendation: any,
|
|
userQuery: string,
|
|
mode: string,
|
|
auditTrail: any[] = [],
|
|
additionalMetadata: any = {}
|
|
): any {
|
|
return {
|
|
metadata: {
|
|
timestamp: new Date().toISOString(),
|
|
version: "1.0",
|
|
userQuery: userQuery.slice(0, 1000),
|
|
mode,
|
|
exportedBy: 'ForensicPathways',
|
|
toolsDataHash: additionalMetadata.toolsDataHash || 'unknown',
|
|
aiModel: additionalMetadata.aiModel || 'unknown',
|
|
aiParameters: additionalMetadata.aiParameters || {},
|
|
processingStats: additionalMetadata.processingStats || {}
|
|
},
|
|
recommendation: this.sanitizeForAudit(recommendation, 6),
|
|
auditTrail: auditTrail.map(entry => this.sanitizeForAudit(entry, 4)),
|
|
rawContext: {
|
|
selectedTools: additionalMetadata.selectedTools || [],
|
|
backgroundKnowledge: additionalMetadata.backgroundKnowledge || [],
|
|
contextHistory: additionalMetadata.contextHistory || [],
|
|
embeddingsSimilarities: additionalMetadata.embeddingsSimilarities || {}
|
|
}
|
|
};
|
|
}
|
|
|
|
static validateUploadedAnalysis(data: any): { isValid: boolean; issues: string[]; warnings: string[] } {
|
|
const issues: string[] = [];
|
|
const warnings: string[] = [];
|
|
|
|
const structureValidation = this.validateAuditExportStructure(data);
|
|
if (!structureValidation.isValid) {
|
|
issues.push(...structureValidation.errors);
|
|
return { isValid: false, issues, warnings };
|
|
}
|
|
|
|
if (data.metadata) {
|
|
const timestamp = new Date(data.metadata.timestamp);
|
|
if (isNaN(timestamp.getTime())) {
|
|
warnings.push('Invalid timestamp in metadata');
|
|
} else {
|
|
const age = Date.now() - timestamp.getTime();
|
|
const maxAge = 30 * 24 * 60 * 60 * 1000; // 30 days
|
|
if (age > maxAge) {
|
|
warnings.push(`Analysis is ${Math.floor(age / (24 * 60 * 60 * 1000))} days old`);
|
|
}
|
|
}
|
|
|
|
if (!['workflow', 'tool'].includes(data.metadata.mode)) {
|
|
warnings.push(`Unknown analysis mode: ${data.metadata.mode}`);
|
|
}
|
|
}
|
|
|
|
if (Array.isArray(data.auditTrail)) {
|
|
const aiDecisions = data.auditTrail.filter(e => e.action === 'ai-decision').length;
|
|
const toolSelections = data.auditTrail.filter(e => e.action === 'selection-decision').length;
|
|
|
|
if (aiDecisions === 0) {
|
|
warnings.push('No AI decisions found in audit trail');
|
|
}
|
|
|
|
if (toolSelections === 0) {
|
|
warnings.push('No tool selections found in audit trail');
|
|
}
|
|
|
|
const entriesWithConfidence = data.auditTrail.filter(e => typeof e.confidence === 'number').length;
|
|
const confidenceRatio = entriesWithConfidence / data.auditTrail.length;
|
|
|
|
if (confidenceRatio < 0.8) {
|
|
warnings.push(`Only ${Math.round(confidenceRatio * 100)}% of audit entries have confidence scores`);
|
|
}
|
|
}
|
|
|
|
return {
|
|
isValid: issues.length === 0,
|
|
issues,
|
|
warnings
|
|
};
|
|
}
|
|
} |