2025-09-24 11:16:26 +00:00
5 changed files with 225 additions and 947 deletions
--- a/core/graph_manager.py
+++ b/core/graph_manager.py
@@ -163,7 +163,8 @@ class GraphManager:
                'to': target,
                'label': attrs.get('relationship_type', ''),
                'source_provider': attrs.get('source_provider', ''),
-                'discovery_timestamp': attrs.get('discovery_timestamp')
+                'discovery_timestamp': attrs.get('discovery_timestamp'),
+                'raw_data': attrs.get('raw_data', {})
            })
        
        return {
--- a/core/scanner.py
+++ b/core/scanner.py
@@ -929,7 +929,7 @@ class Scanner:
        
        # Re-enqueue the node for full processing
        is_ip = _is_valid_ip(node_id)
-        eligible_providers = self._get_eligible_providers(node_id, is_ip, False)
+        eligible_providers = self._get_eligible_providers(node_id, is_ip, False, is_extracted=True)
        for provider in eligible_providers:
            provider_name = provider.get_name()
            priority = self._get_priority(provider_name)
@@ -1133,7 +1133,7 @@ class Scanner:
            self.logger.logger.warning(f"Error initializing provider states for {target}: {e}")


-    def _get_eligible_providers(self, target: str, is_ip: bool, dns_only: bool) -> List:
+    def _get_eligible_providers(self, target: str, is_ip: bool, dns_only: bool, is_extracted: bool = False) -> List:
        """
        FIXED: Improved provider eligibility checking with better filtering.
        """
@@ -1145,7 +1145,7 @@ class Scanner:
        
        # Check if the target is part of a large entity
        is_in_large_entity = False
-        if self.graph.graph.has_node(target):
+        if self.graph.graph.has_node(target) and not is_extracted:
            metadata = self.graph.graph.nodes[target].get('metadata', {})
            if 'large_entity_id' in metadata:
                is_in_large_entity = True
--- a/providers/correlation_provider.py
+++ b/providers/correlation_provider.py
@@ -1,4 +1,4 @@
-# DNScope/providers/correlation_provider.py
+# dnsrecon-reduced/providers/correlation_provider.py

 import re
 from typing import Dict, Any, List
@@ -24,6 +24,10 @@ class CorrelationProvider(BaseProvider):
        self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
        self.EXCLUDED_KEYS = [
            'cert_source',
+            'a_records',
+            'mx_records',
+            'ns_records',
+            'ptr_records',
            'cert_issuer_ca_id',
            'cert_common_name',
            'cert_validity_period_days',
@@ -38,6 +42,8 @@ class CorrelationProvider(BaseProvider):
            'updated_timestamp',
            'discovery_timestamp',
            'query_timestamp',
+            'shodan_ip_str',
+            'shodan_a_record',
        ]

    def get_name(self) -> str:
@@ -83,7 +89,7 @@ class CorrelationProvider(BaseProvider):
    def _find_correlations(self, node_id: str) -> ProviderResult:
        """
        Find correlations for a given node with enhanced filtering and error handling.
-        UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
+        UPDATED: Enhanced with discovery timestamps for time-based edge coloring and list value processing.
        """
        result = ProviderResult()
        discovery_time = datetime.now(timezone.utc)
@@ -109,38 +115,46 @@ class CorrelationProvider(BaseProvider):
                attr_value = attr.get('value')
                attr_provider = attr.get('provider', 'unknown')

-                # Enhanced filtering logic
-                should_exclude = self._should_exclude_attribute(attr_name, attr_value)
+                # Prepare a list of values to iterate over
+                values_to_process = []
+                if isinstance(attr_value, list):
+                    values_to_process.extend(attr_value)
+                else:
+                    values_to_process.append(attr_value)

-                if should_exclude:
-                    continue
+                for value_item in values_to_process:
+                    # Enhanced filtering logic
+                    should_exclude = self._should_exclude_attribute(attr_name, value_item)
                    
-                # Build correlation index
-                if attr_value not in self.correlation_index:
-                    self.correlation_index[attr_value] = {
-                        'nodes': set(),
-                        'sources': []
+                    if should_exclude:
+                        continue
+
+                    # Build correlation index
+                    if value_item not in self.correlation_index:
+                        self.correlation_index[value_item] = {
+                            'nodes': set(),
+                            'sources': []
+                        }
+
+                    self.correlation_index[value_item]['nodes'].add(node_id)
+
+                    source_info = {
+                        'node_id': node_id,
+                        'provider': attr_provider,
+                        'attribute': attr_name,
+                        'path': f"{attr_provider}_{attr_name}"
                    }

-                self.correlation_index[attr_value]['nodes'].add(node_id)
+                    # Avoid duplicate sources
+                    existing_sources = [s for s in self.correlation_index[value_item]['sources']
+                                    if s['node_id'] == node_id and s['path'] == source_info['path']]
+                    if not existing_sources:
+                        self.correlation_index[value_item]['sources'].append(source_info)

-                source_info = {
-                    'node_id': node_id,
-                    'provider': attr_provider,
-                    'attribute': attr_name,
-                    'path': f"{attr_provider}_{attr_name}"
-                }
-
-                # Avoid duplicate sources
-                existing_sources = [s for s in self.correlation_index[attr_value]['sources']
-                                if s['node_id'] == node_id and s['path'] == source_info['path']]
-                if not existing_sources:
-                    self.correlation_index[attr_value]['sources'].append(source_info)
-
-                # Create correlation if we have multiple nodes with this value
-                if len(self.correlation_index[attr_value]['nodes']) > 1:
-                    self._create_correlation_relationships(attr_value, self.correlation_index[attr_value], result, discovery_time)
-                    correlations_found += 1
+                    # Create correlation if we have multiple nodes with this value
+                    if len(self.correlation_index[value_item]['nodes']) > 1:
+                        self._create_correlation_relationships(value_item, self.correlation_index[value_item], result, discovery_time)
+                        correlations_found += 1
                    
            # Log correlation results
            if correlations_found > 0:
--- a/static/css/main.css
+++ b/static/css/main.css
@@ -401,7 +401,7 @@ input[type="text"]:focus, select:focus {
    gap: 0.3rem;
    position: absolute;
    top: 10px;
-    left: 10px;
+    right: 10px;
    background: rgba(26, 26, 26, 0.9);
    padding: 0.5rem;
    border-radius: 6px;
@@ -1406,7 +1406,7 @@ input[type="password"]:focus {
    .graph-controls {
        position: relative;
        top: auto;
-        left: auto;
+        right: auto;
        margin-bottom: 1rem;
        min-width: auto;
    }
--- a/static/js/graph.js
+++ b/static/js/graph.js