forensic-pathways/src/utils/jsonUtils.ts

// src/utils/jsonUtils.ts
export class JSONParser {
  static safeParseJSON(jsonString: string, fallback: any = null): any {
    try {
      let cleaned = jsonString.trim();

      const jsonBlockPatterns = [
        /```json\s*([\s\S]*?)\s*```/i,
        /```\s*([\s\S]*?)\s*```/i,
        /\{[\s\S]*\}/,
      ];

      for (const pattern of jsonBlockPatterns) {
        const match = cleaned.match(pattern);
        if (match) {
          cleaned = match[1] || match[0];
          break;
        }
      }

      if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
        console.warn('[JSON-PARSER] JSON appears truncated, attempting recovery');
        cleaned = this.repairTruncatedJSON(cleaned);
      }

      const parsed = JSON.parse(cleaned);

      if (parsed && typeof parsed === 'object') {
        if (!parsed.selectedTools) parsed.selectedTools = [];
        if (!parsed.selectedConcepts) parsed.selectedConcepts = [];
        if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
        if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
      }

      return parsed;

    } catch (error) {
      console.warn('[JSON-PARSER] JSON parsing failed:', error.message);
      return fallback;
    }
  }

  private static repairTruncatedJSON(cleaned: string): string {
    let braceCount = 0;
    let bracketCount = 0;
    let inString = false;
    let escaped = false;
    let lastCompleteStructure = '';

    for (let i = 0; i < cleaned.length; i++) {
      const char = cleaned[i];

      if (escaped) {
        escaped = false;
        continue;
      }

      if (char === '\\') {
        escaped = true;
        continue;
      }

      if (char === '"' && !escaped) {
        inString = !inString;
        continue;
      }

      if (!inString) {
        if (char === '{') braceCount++;
        if (char === '}') braceCount--;
        if (char === '[') bracketCount++;
        if (char === ']') bracketCount--;

        if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
          lastCompleteStructure = cleaned.substring(0, i + 1);
        }
      }
    }

    if (lastCompleteStructure) {
      return lastCompleteStructure;
    } else {
      if (braceCount > 0) cleaned += '}';
      if (bracketCount > 0) cleaned += ']';
      return cleaned;
    }
  }

  static extractToolsFromMalformedJSON(jsonString: string): { selectedTools: string[]; selectedConcepts: string[] } {
    const selectedTools: string[] = [];
    const selectedConcepts: string[] = [];

    const toolsMatch = jsonString.match(/"selectedTools"\s*:\s*\[([\s\S]*?)\]/i);
    if (toolsMatch) {
      const toolMatches = toolsMatch[1].match(/"([^"]+)"/g);
      if (toolMatches) {
        selectedTools.push(...toolMatches.map(match => match.replace(/"/g, '')));
      }
    }

    const conceptsMatch = jsonString.match(/"selectedConcepts"\s*:\s*\[([\s\S]*?)\]/i);
    if (conceptsMatch) {
      const conceptMatches = conceptsMatch[1].match(/"([^"]+)"/g);
      if (conceptMatches) {
        selectedConcepts.push(...conceptMatches.map(match => match.replace(/"/g, '')));
      }
    }

    if (selectedTools.length === 0 && selectedConcepts.length === 0) {
      const allMatches = jsonString.match(/"([^"]+)"/g);
      if (allMatches) {
        const possibleNames = allMatches
          .map(match => match.replace(/"/g, ''))
          .filter(name =>
            name.length > 2 &&
            !['selectedTools', 'selectedConcepts', 'reasoning'].includes(name) &&
            !name.includes(':') &&
            !name.match(/^\d+$/)
          )
          .slice(0, 15);

        selectedTools.push(...possibleNames);
      }
    }

    return { selectedTools, selectedConcepts };
  }

  static secureParseJSON(jsonString: string, maxSize: number = 10 * 1024 * 1024): any {
    if (typeof jsonString !== 'string') {
      throw new Error('Input must be a string');
    }

    if (jsonString.length > maxSize) {
      throw new Error(`JSON string too large (${jsonString.length} bytes, max ${maxSize})`);
    }

    const suspiciousPatterns = [
      /<script/i,
      /javascript:/i,
      /eval\(/i,
      /function\s*\(/i,
      /__proto__/i,
      /constructor/i
    ];

    for (const pattern of suspiciousPatterns) {
      if (pattern.test(jsonString)) {
        throw new Error('Potentially malicious content detected in JSON');
      }
    }

    try {
      const parsed = JSON.parse(jsonString);

      if (typeof parsed !== 'object' || parsed === null) {
        throw new Error('JSON must be an object');
      }

      return parsed;
    } catch (error) {
      if (error instanceof SyntaxError) {
        throw new Error(`Invalid JSON syntax: ${error.message}`);
      }
      throw error;
    }
  }

  static sanitizeForAudit(obj: any, maxDepth: number = 5, currentDepth: number = 0): any {
    if (currentDepth >= maxDepth) {
      return '[Max depth reached]';
    }

    if (obj === null || obj === undefined) {
      return obj;
    }

    if (typeof obj === 'string') {
      if (obj.length > 500) {
        return obj.slice(0, 500) + '...[truncated]';
      }
      return obj.replace(/<script[\s\S]*?<\/script>/gi, '[script removed]');
    }

    if (typeof obj === 'number' || typeof obj === 'boolean') {
      return obj;
    }

    if (Array.isArray(obj)) {
      if (obj.length > 20) {
        return [
          ...obj.slice(0, 20).map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1)),
          `...[${obj.length - 20} more items]`
        ];
      }
      return obj.map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1));
    }

    if (typeof obj === 'object') {
      const keys = Object.keys(obj);
      if (keys.length > 50) {
        const sanitized: any = {};
        keys.slice(0, 50).forEach(key => {
          sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
        });
        sanitized['[truncated]'] = `${keys.length - 50} more properties`;
        return sanitized;
      }

      const sanitized: any = {};
      keys.forEach(key => {
        if (['__proto__', 'constructor', 'prototype'].includes(key)) {
          return;
        }
        sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
      });
      return sanitized;
    }

    return String(obj);
  }

  static validateAuditExportStructure(data: any): { isValid: boolean; errors: string[] } {
    const errors: string[] = [];

    if (!data || typeof data !== 'object') {
      errors.push('Export data must be an object');
      return { isValid: false, errors };
    }

    const requiredProps = ['metadata', 'recommendation', 'auditTrail'];
    for (const prop of requiredProps) {
      if (!(prop in data)) {
        errors.push(`Missing required property: ${prop}`);
      }
    }

    if (data.metadata && typeof data.metadata === 'object') {
      const requiredMetadataProps = ['timestamp', 'version', 'userQuery', 'mode'];
      for (const prop of requiredMetadataProps) {
        if (!(prop in data.metadata)) {
          errors.push(`Missing required metadata property: ${prop}`);
        }
      }
    } else {
      errors.push('Invalid metadata structure');
    }

    if (!Array.isArray(data.auditTrail)) {
      errors.push('auditTrail must be an array');
    } else {
      data.auditTrail.forEach((entry: any, index: number) => {
        if (!entry || typeof entry !== 'object') {
          errors.push(`Audit entry ${index} is not a valid object`);
          return;
        }

        const requiredEntryProps = ['timestamp', 'phase', 'action', 'confidence', 'processingTimeMs'];
        for (const prop of requiredEntryProps) {
          if (!(prop in entry)) {
            errors.push(`Audit entry ${index} missing required property: ${prop}`);
          }
        }
      });
    }

    return {
      isValid: errors.length === 0,
      errors
    };
  }

  static prepareAuditExport(
    recommendation: any,
    userQuery: string,
    mode: string,
    auditTrail: any[] = [],
    additionalMetadata: any = {}
  ): any {
    return {
      metadata: {
        timestamp: new Date().toISOString(),
        version: "1.0",
        userQuery: userQuery.slice(0, 1000),
        mode,
        exportedBy: 'ForensicPathways',
        toolsDataHash: additionalMetadata.toolsDataHash || 'unknown',
        aiModel: additionalMetadata.aiModel || 'unknown',
        aiParameters: additionalMetadata.aiParameters || {},
        processingStats: additionalMetadata.processingStats || {}
      },
      recommendation: this.sanitizeForAudit(recommendation, 6),
      auditTrail: auditTrail.map(entry => this.sanitizeForAudit(entry, 4)),
      rawContext: {
        selectedTools: additionalMetadata.selectedTools || [],
        backgroundKnowledge: additionalMetadata.backgroundKnowledge || [],
        contextHistory: additionalMetadata.contextHistory || [],
        embeddingsSimilarities: additionalMetadata.embeddingsSimilarities || {}
      }
    };
  }

  static validateUploadedAnalysis(data: any): { isValid: boolean; issues: string[]; warnings: string[] } {
    const issues: string[] = [];
    const warnings: string[] = [];

    const structureValidation = this.validateAuditExportStructure(data);
    if (!structureValidation.isValid) {
      issues.push(...structureValidation.errors);
      return { isValid: false, issues, warnings };
    }

    if (data.metadata) {
      const timestamp = new Date(data.metadata.timestamp);
      if (isNaN(timestamp.getTime())) {
        warnings.push('Invalid timestamp in metadata');
      } else {
        const age = Date.now() - timestamp.getTime();
        const maxAge = 30 * 24 * 60 * 60 * 1000; // 30 days
        if (age > maxAge) {
          warnings.push(`Analysis is ${Math.floor(age / (24 * 60 * 60 * 1000))} days old`);
        }
      }

      if (!['workflow', 'tool'].includes(data.metadata.mode)) {
        warnings.push(`Unknown analysis mode: ${data.metadata.mode}`);
      }
    }

    if (Array.isArray(data.auditTrail)) {
      const aiDecisions = data.auditTrail.filter(e => e.action === 'ai-decision').length;
      const toolSelections = data.auditTrail.filter(e => e.action === 'selection-decision').length;

      if (aiDecisions === 0) {
        warnings.push('No AI decisions found in audit trail');
      }

      if (toolSelections === 0) {
        warnings.push('No tool selections found in audit trail');
      }

      const entriesWithConfidence = data.auditTrail.filter(e => typeof e.confidence === 'number').length;
      const confidenceRatio = entriesWithConfidence / data.auditTrail.length;

      if (confidenceRatio < 0.8) {
        warnings.push(`Only ${Math.round(confidenceRatio * 100)}% of audit entries have confidence scores`);
      }
    }

    return {
      isValid: issues.length === 0,
      issues,
      warnings
    };
  }
}