implement new data api
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
# core/graph_manager.py
|
||||
# dnsrecon-reduced/core/graph_manager.py
|
||||
|
||||
"""
|
||||
Graph data model for DNSRecon using NetworkX.
|
||||
Manages in-memory graph storage with confidence scoring and forensic metadata.
|
||||
Now fully compatible with the unified ProviderResult data model.
|
||||
"""
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
@@ -28,6 +29,7 @@ class GraphManager:
|
||||
"""
|
||||
Thread-safe graph manager for DNSRecon infrastructure mapping.
|
||||
Uses NetworkX for in-memory graph storage with confidence scoring.
|
||||
Compatible with unified ProviderResult data model.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
@@ -192,21 +194,36 @@ class GraphManager:
|
||||
})
|
||||
return all_correlations
|
||||
|
||||
def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[Dict[str, Any]] = None,
|
||||
def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[List[Dict[str, Any]]] = None,
|
||||
description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool:
|
||||
"""Add a node to the graph, update attributes, and process correlations."""
|
||||
"""
|
||||
Add a node to the graph, update attributes, and process correlations.
|
||||
Now compatible with unified data model - attributes are dictionaries from converted StandardAttribute objects.
|
||||
"""
|
||||
is_new_node = not self.graph.has_node(node_id)
|
||||
if is_new_node:
|
||||
self.graph.add_node(node_id, type=node_type.value,
|
||||
added_timestamp=datetime.now(timezone.utc).isoformat(),
|
||||
attributes=attributes or {},
|
||||
attributes=attributes or [], # Store as a list from the start
|
||||
description=description,
|
||||
metadata=metadata or {})
|
||||
else:
|
||||
# Safely merge new attributes into existing attributes
|
||||
# Safely merge new attributes into the existing list of attributes
|
||||
if attributes:
|
||||
existing_attributes = self.graph.nodes[node_id].get('attributes', {})
|
||||
existing_attributes.update(attributes)
|
||||
existing_attributes = self.graph.nodes[node_id].get('attributes', [])
|
||||
|
||||
# Handle cases where old data might still be in dictionary format
|
||||
if not isinstance(existing_attributes, list):
|
||||
existing_attributes = []
|
||||
|
||||
# Create a set of existing attribute names for efficient duplicate checking
|
||||
existing_attr_names = {attr['name'] for attr in existing_attributes}
|
||||
|
||||
for new_attr in attributes:
|
||||
if new_attr['name'] not in existing_attr_names:
|
||||
existing_attributes.append(new_attr)
|
||||
existing_attr_names.add(new_attr['name'])
|
||||
|
||||
self.graph.nodes[node_id]['attributes'] = existing_attributes
|
||||
if description:
|
||||
self.graph.nodes[node_id]['description'] = description
|
||||
@@ -485,19 +502,28 @@ class GraphManager:
|
||||
if d.get('confidence_score', 0) >= min_confidence]
|
||||
|
||||
def get_graph_data(self) -> Dict[str, Any]:
|
||||
"""Export graph data formatted for frontend visualization."""
|
||||
"""
|
||||
Export graph data formatted for frontend visualization.
|
||||
Compatible with unified data model - preserves all attribute information for frontend display.
|
||||
"""
|
||||
nodes = []
|
||||
for node_id, attrs in self.graph.nodes(data=True):
|
||||
node_data = {'id': node_id, 'label': node_id, 'type': attrs.get('type', 'unknown'),
|
||||
'attributes': attrs.get('attributes', {}),
|
||||
'attributes': attrs.get('attributes', []), # Ensure attributes is a list
|
||||
'description': attrs.get('description', ''),
|
||||
'metadata': attrs.get('metadata', {}),
|
||||
'added_timestamp': attrs.get('added_timestamp')}
|
||||
|
||||
# Customize node appearance based on type and attributes
|
||||
node_type = node_data['type']
|
||||
attributes = node_data['attributes']
|
||||
if node_type == 'domain' and attributes.get('certificates', {}).get('has_valid_cert') is False:
|
||||
node_data['color'] = {'background': '#c7c7c7', 'border': '#999'} # Gray for invalid cert
|
||||
attributes_list = node_data['attributes']
|
||||
|
||||
# CORRECTED LOGIC: Handle certificate validity styling
|
||||
if node_type == 'domain' and isinstance(attributes_list, list):
|
||||
# Find the certificates attribute in the list
|
||||
cert_attr = next((attr for attr in attributes_list if attr.get('name') == 'certificates'), None)
|
||||
if cert_attr and cert_attr.get('value', {}).get('has_valid_cert') is False:
|
||||
node_data['color'] = {'background': '#c7c7c7', 'border': '#999'} # Gray for invalid cert
|
||||
|
||||
# Add incoming and outgoing edges to node data
|
||||
if self.graph.has_node(node_id):
|
||||
@@ -528,7 +554,7 @@ class GraphManager:
|
||||
'last_modified': self.last_modified,
|
||||
'total_nodes': self.get_node_count(),
|
||||
'total_edges': self.get_edge_count(),
|
||||
'graph_format': 'dnsrecon_v1_nodeling'
|
||||
'graph_format': 'dnsrecon_v1_unified_model'
|
||||
},
|
||||
'graph': graph_data,
|
||||
'statistics': self.get_statistics()
|
||||
|
||||
106
core/provider_result.py
Normal file
106
core/provider_result.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# dnsrecon-reduced/core/provider_result.py
|
||||
|
||||
"""
|
||||
Unified data model for DNSRecon passive reconnaissance.
|
||||
Standardizes the data structure across all providers to ensure consistent processing.
|
||||
"""
|
||||
|
||||
from typing import Any, Optional, List, Dict
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
@dataclass
|
||||
class StandardAttribute:
|
||||
"""A unified data structure for a single piece of information about a node."""
|
||||
target_node: str
|
||||
name: str
|
||||
value: Any
|
||||
type: str
|
||||
provider: str
|
||||
confidence: float
|
||||
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
metadata: Optional[Dict[str, Any]] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate the attribute after initialization."""
|
||||
if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0:
|
||||
raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class Relationship:
|
||||
"""A unified data structure for a directional link between two nodes."""
|
||||
source_node: str
|
||||
target_node: str
|
||||
relationship_type: str
|
||||
confidence: float
|
||||
provider: str
|
||||
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
raw_data: Optional[Dict[str, Any]] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate the relationship after initialization."""
|
||||
if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0:
|
||||
raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProviderResult:
|
||||
"""A container for all data returned by a provider from a single query."""
|
||||
attributes: List[StandardAttribute] = field(default_factory=list)
|
||||
relationships: List[Relationship] = field(default_factory=list)
|
||||
|
||||
def add_attribute(self, target_node: str, name: str, value: Any, attr_type: str,
|
||||
provider: str, confidence: float = 0.8,
|
||||
metadata: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Helper method to add an attribute to the result."""
|
||||
self.attributes.append(StandardAttribute(
|
||||
target_node=target_node,
|
||||
name=name,
|
||||
value=value,
|
||||
type=attr_type,
|
||||
provider=provider,
|
||||
confidence=confidence,
|
||||
metadata=metadata or {}
|
||||
))
|
||||
|
||||
def add_relationship(self, source_node: str, target_node: str, relationship_type: str,
|
||||
provider: str, confidence: float = 0.8,
|
||||
raw_data: Optional[Dict[str, Any]] = None) -> None:
|
||||
"""Helper method to add a relationship to the result."""
|
||||
self.relationships.append(Relationship(
|
||||
source_node=source_node,
|
||||
target_node=target_node,
|
||||
relationship_type=relationship_type,
|
||||
confidence=confidence,
|
||||
provider=provider,
|
||||
raw_data=raw_data or {}
|
||||
))
|
||||
|
||||
def get_discovered_nodes(self) -> set:
|
||||
"""Get all unique node identifiers discovered in this result."""
|
||||
nodes = set()
|
||||
|
||||
# Add nodes from relationships
|
||||
for rel in self.relationships:
|
||||
nodes.add(rel.source_node)
|
||||
nodes.add(rel.target_node)
|
||||
|
||||
# Add nodes from attributes
|
||||
for attr in self.attributes:
|
||||
nodes.add(attr.target_node)
|
||||
|
||||
return nodes
|
||||
|
||||
def get_relationship_count(self) -> int:
|
||||
"""Get the total number of relationships in this result."""
|
||||
return len(self.relationships)
|
||||
|
||||
def get_attribute_count(self) -> int:
|
||||
"""Get the total number of attributes in this result."""
|
||||
return len(self.attributes)
|
||||
|
||||
def is_large_entity(self, threshold: int) -> bool:
|
||||
"""Check if this result qualifies as a large entity based on relationship count."""
|
||||
return self.get_relationship_count() > threshold
|
||||
@@ -1,7 +1,6 @@
|
||||
# dnsrecon-reduced/core/rate_limiter.py
|
||||
|
||||
import time
|
||||
import redis
|
||||
|
||||
class GlobalRateLimiter:
|
||||
def __init__(self, redis_client):
|
||||
|
||||
529
core/scanner.py
529
core/scanner.py
@@ -2,18 +2,18 @@
|
||||
|
||||
import threading
|
||||
import traceback
|
||||
import time
|
||||
import os
|
||||
import importlib
|
||||
import redis
|
||||
from typing import List, Set, Dict, Any, Tuple, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError, Future
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from collections import defaultdict
|
||||
from queue import PriorityQueue
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from core.graph_manager import GraphManager, NodeType
|
||||
from core.logger import get_forensic_logger, new_session
|
||||
from core.provider_result import ProviderResult
|
||||
from utils.helpers import _is_valid_ip, _is_valid_domain
|
||||
from providers.base_provider import BaseProvider
|
||||
from core.rate_limiter import GlobalRateLimiter
|
||||
@@ -30,6 +30,7 @@ class ScanStatus:
|
||||
class Scanner:
|
||||
"""
|
||||
Main scanning orchestrator for DNSRecon passive reconnaissance.
|
||||
Now provider-agnostic, consuming standardized ProviderResult objects.
|
||||
"""
|
||||
|
||||
def __init__(self, session_config=None):
|
||||
@@ -470,6 +471,10 @@ class Scanner:
|
||||
print(f" - Tasks processed: {len(processed_tasks)}")
|
||||
|
||||
def _query_single_provider_for_target(self, provider: BaseProvider, target: str, depth: int) -> Tuple[Set[str], Set[str], bool]:
|
||||
"""
|
||||
Query a single provider and process the unified ProviderResult.
|
||||
Now provider-agnostic - handles any provider that returns ProviderResult.
|
||||
"""
|
||||
if self._is_stop_requested():
|
||||
print(f"Stop requested before querying {provider.get_name()} for {target}")
|
||||
return set(), set(), False
|
||||
@@ -478,21 +483,24 @@ class Scanner:
|
||||
target_type = NodeType.IP if is_ip else NodeType.DOMAIN
|
||||
print(f"Querying {provider.get_name()} for {target_type.value}: {target} at depth {depth}")
|
||||
|
||||
# Ensure target node exists in graph
|
||||
self.graph.add_node(target, target_type)
|
||||
self._initialize_provider_states(target)
|
||||
|
||||
new_targets = set()
|
||||
large_entity_members = set()
|
||||
node_attributes = defaultdict(lambda: defaultdict(list))
|
||||
provider_successful = True
|
||||
|
||||
try:
|
||||
provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth)
|
||||
if provider_results is None:
|
||||
# Query provider - now returns unified ProviderResult
|
||||
provider_result = self._query_single_provider_unified(provider, target, is_ip, depth)
|
||||
|
||||
if provider_result is None:
|
||||
provider_successful = False
|
||||
elif not self._is_stop_requested():
|
||||
discovered, is_large_entity = self._process_provider_results(
|
||||
target, provider, provider_results, node_attributes, depth
|
||||
# Process the unified result
|
||||
discovered, is_large_entity = self._process_provider_result_unified(
|
||||
target, provider, provider_result, depth
|
||||
)
|
||||
if is_large_entity:
|
||||
large_entity_members.update(discovered)
|
||||
@@ -504,15 +512,177 @@ class Scanner:
|
||||
provider_successful = False
|
||||
self._log_provider_error(target, provider.get_name(), str(e))
|
||||
|
||||
if not self._is_stop_requested():
|
||||
for node_id, attributes in node_attributes.items():
|
||||
if self.graph.graph.has_node(node_id):
|
||||
node_is_ip = _is_valid_ip(node_id)
|
||||
node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
|
||||
self.graph.add_node(node_id, node_type_to_add, attributes=attributes)
|
||||
|
||||
return new_targets, large_entity_members, provider_successful
|
||||
|
||||
def _query_single_provider_unified(self, provider: BaseProvider, target: str, is_ip: bool, current_depth: int) -> Optional[ProviderResult]:
|
||||
"""
|
||||
Query a single provider with stop signal checking, now returns ProviderResult.
|
||||
"""
|
||||
provider_name = provider.get_name()
|
||||
start_time = datetime.now(timezone.utc)
|
||||
|
||||
if self._is_stop_requested():
|
||||
print(f"Stop requested before querying {provider_name} for {target}")
|
||||
return None
|
||||
|
||||
print(f"Querying {provider_name} for {target}")
|
||||
|
||||
self.logger.logger.info(f"Attempting {provider_name} query for {target} at depth {current_depth}")
|
||||
|
||||
try:
|
||||
# Query the provider - returns unified ProviderResult
|
||||
if is_ip:
|
||||
result = provider.query_ip(target)
|
||||
else:
|
||||
result = provider.query_domain(target)
|
||||
|
||||
if self._is_stop_requested():
|
||||
print(f"Stop requested after querying {provider_name} for {target}")
|
||||
return None
|
||||
|
||||
# Update provider state with relationship count (more meaningful than raw result count)
|
||||
relationship_count = result.get_relationship_count() if result else 0
|
||||
self._update_provider_state(target, provider_name, 'success', relationship_count, None, start_time)
|
||||
|
||||
print(f"✓ {provider_name} returned {relationship_count} relationships for {target}")
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
|
||||
print(f"✗ {provider_name} failed for {target}: {e}")
|
||||
return None
|
||||
|
||||
def _process_provider_result_unified(self, target: str, provider: BaseProvider,
|
||||
provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
|
||||
"""
|
||||
Process a unified ProviderResult object to update the graph.
|
||||
Returns (discovered_targets, is_large_entity).
|
||||
"""
|
||||
provider_name = provider.get_name()
|
||||
discovered_targets = set()
|
||||
|
||||
if self._is_stop_requested():
|
||||
print(f"Stop requested before processing results from {provider_name} for {target}")
|
||||
return discovered_targets, False
|
||||
|
||||
# Check for large entity based on relationship count
|
||||
if provider_result.get_relationship_count() > self.config.large_entity_threshold:
|
||||
print(f"Large entity detected: {provider_name} returned {provider_result.get_relationship_count()} relationships for {target}")
|
||||
members = self._create_large_entity_from_provider_result(target, provider_name, provider_result, current_depth)
|
||||
return members, True
|
||||
|
||||
# Process relationships
|
||||
for i, relationship in enumerate(provider_result.relationships):
|
||||
if i % 5 == 0 and self._is_stop_requested(): # Check periodically for stop
|
||||
print(f"Stop requested while processing relationships from {provider_name} for {target}")
|
||||
break
|
||||
|
||||
# Add nodes for relationship endpoints
|
||||
source_node = relationship.source_node
|
||||
target_node = relationship.target_node
|
||||
|
||||
# Determine node types
|
||||
source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN
|
||||
if target_node.startswith('AS') and target_node[2:].isdigit():
|
||||
target_type = NodeType.ASN
|
||||
elif _is_valid_ip(target_node):
|
||||
target_type = NodeType.IP
|
||||
else:
|
||||
target_type = NodeType.DOMAIN
|
||||
|
||||
# Add nodes to graph
|
||||
self.graph.add_node(source_node, source_type)
|
||||
self.graph.add_node(target_node, target_type)
|
||||
|
||||
# Add edge to graph
|
||||
if self.graph.add_edge(
|
||||
source_node, target_node,
|
||||
relationship.relationship_type,
|
||||
relationship.confidence,
|
||||
provider_name,
|
||||
relationship.raw_data
|
||||
):
|
||||
print(f"Added relationship: {source_node} -> {target_node} ({relationship.relationship_type})")
|
||||
|
||||
# Track discovered targets for further processing
|
||||
if _is_valid_domain(target_node) or _is_valid_ip(target_node):
|
||||
discovered_targets.add(target_node)
|
||||
|
||||
# Process attributes, preserving them as a list of objects
|
||||
attributes_by_node = defaultdict(list)
|
||||
for attribute in provider_result.attributes:
|
||||
# Convert the StandardAttribute object to a dictionary that the frontend can use
|
||||
attr_dict = {
|
||||
"name": attribute.name,
|
||||
"value": attribute.value,
|
||||
"type": attribute.type,
|
||||
"provider": attribute.provider,
|
||||
"confidence": attribute.confidence,
|
||||
"metadata": attribute.metadata
|
||||
}
|
||||
attributes_by_node[attribute.target_node].append(attr_dict)
|
||||
|
||||
# Add attributes to nodes
|
||||
for node_id, node_attributes_list in attributes_by_node.items():
|
||||
if self.graph.graph.has_node(node_id):
|
||||
# Determine node type
|
||||
if _is_valid_ip(node_id):
|
||||
node_type = NodeType.IP
|
||||
elif node_id.startswith('AS') and node_id[2:].isdigit():
|
||||
node_type = NodeType.ASN
|
||||
else:
|
||||
node_type = NodeType.DOMAIN
|
||||
|
||||
# Add node with the list of attributes
|
||||
self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
|
||||
|
||||
return discovered_targets, False
|
||||
|
||||
def _create_large_entity_from_provider_result(self, source: str, provider_name: str,
|
||||
provider_result: ProviderResult, current_depth: int) -> Set[str]:
|
||||
"""
|
||||
Create a large entity node from a ProviderResult and return the members for DNS processing.
|
||||
"""
|
||||
entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}"
|
||||
|
||||
# Extract target nodes from relationships
|
||||
targets = [rel.target_node for rel in provider_result.relationships]
|
||||
node_type = 'unknown'
|
||||
|
||||
if targets:
|
||||
if _is_valid_domain(targets[0]):
|
||||
node_type = 'domain'
|
||||
elif _is_valid_ip(targets[0]):
|
||||
node_type = 'ip'
|
||||
|
||||
# Create nodes in graph (they exist but are grouped)
|
||||
for target in targets:
|
||||
target_node_type = NodeType.DOMAIN if node_type == 'domain' else NodeType.IP
|
||||
self.graph.add_node(target, target_node_type)
|
||||
|
||||
attributes = {
|
||||
'count': len(targets),
|
||||
'nodes': targets,
|
||||
'node_type': node_type,
|
||||
'source_provider': provider_name,
|
||||
'discovery_depth': current_depth,
|
||||
'threshold_exceeded': self.config.large_entity_threshold,
|
||||
}
|
||||
description = f'Large entity created due to {len(targets)} relationships from {provider_name}'
|
||||
|
||||
self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes, description=description)
|
||||
|
||||
# Create edge from source to large entity
|
||||
if provider_result.relationships:
|
||||
rel_type = provider_result.relationships[0].relationship_type
|
||||
self.graph.add_edge(source, entity_id, rel_type, 0.9, provider_name,
|
||||
{'large_entity_info': f'Contains {len(targets)} {node_type}s'})
|
||||
|
||||
self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(targets)} targets from {provider_name}")
|
||||
print(f"Created large entity {entity_id} for {len(targets)} {node_type}s from {provider_name}")
|
||||
|
||||
return set(targets)
|
||||
|
||||
def stop_scan(self) -> bool:
|
||||
"""Request immediate scan termination with proper cleanup."""
|
||||
try:
|
||||
@@ -558,6 +728,73 @@ class Scanner:
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
|
||||
"""
|
||||
Extracts a node from a large entity, re-creates its original edge, and
|
||||
re-queues it for full scanning.
|
||||
"""
|
||||
if not self.graph.graph.has_node(large_entity_id):
|
||||
print(f"ERROR: Large entity {large_entity_id} not found.")
|
||||
return False
|
||||
|
||||
# 1. Get the original source node that discovered the large entity
|
||||
predecessors = list(self.graph.graph.predecessors(large_entity_id))
|
||||
if not predecessors:
|
||||
print(f"ERROR: No source node found for large entity {large_entity_id}.")
|
||||
return False
|
||||
source_node_id = predecessors[0]
|
||||
|
||||
# Get the original edge data to replicate it for the extracted node
|
||||
original_edge_data = self.graph.graph.get_edge_data(source_node_id, large_entity_id)
|
||||
if not original_edge_data:
|
||||
print(f"ERROR: Could not find original edge data from {source_node_id} to {large_entity_id}.")
|
||||
return False
|
||||
|
||||
# 2. Modify the graph data structure first
|
||||
success = self.graph.extract_node_from_large_entity(large_entity_id, node_id_to_extract)
|
||||
if not success:
|
||||
print(f"ERROR: Node {node_id_to_extract} could not be removed from {large_entity_id}'s attributes.")
|
||||
return False
|
||||
|
||||
# 3. Create the direct edge from the original source to the newly extracted node
|
||||
print(f"Re-creating direct edge from {source_node_id} to extracted node {node_id_to_extract}")
|
||||
self.graph.add_edge(
|
||||
source_id=source_node_id,
|
||||
target_id=node_id_to_extract,
|
||||
relationship_type=original_edge_data.get('relationship_type', 'extracted_from_large_entity'),
|
||||
confidence_score=original_edge_data.get('confidence_score', 0.85), # Slightly lower confidence
|
||||
source_provider=original_edge_data.get('source_provider', 'unknown'),
|
||||
raw_data={'context': f'Extracted from large entity {large_entity_id}'}
|
||||
)
|
||||
|
||||
# 4. Re-queue the extracted node for full processing by all eligible providers
|
||||
print(f"Re-queueing extracted node {node_id_to_extract} for full reconnaissance...")
|
||||
is_ip = _is_valid_ip(node_id_to_extract)
|
||||
current_depth = self.graph.graph.nodes[large_entity_id].get('attributes', {}).get('discovery_depth', 0)
|
||||
|
||||
eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False)
|
||||
for provider in eligible_providers:
|
||||
provider_name = provider.get_name()
|
||||
self.task_queue.put((self._get_priority(provider_name), (provider_name, node_id_to_extract, current_depth)))
|
||||
self.total_tasks_ever_enqueued += 1
|
||||
|
||||
# 5. If the scanner is not running, we need to kickstart it to process this one item.
|
||||
if self.status != ScanStatus.RUNNING:
|
||||
print("Scanner is idle. Starting a mini-scan to process the extracted node.")
|
||||
self.status = ScanStatus.RUNNING
|
||||
self._update_session_state()
|
||||
|
||||
if not self.scan_thread or not self.scan_thread.is_alive():
|
||||
self.scan_thread = threading.Thread(
|
||||
target=self._execute_scan,
|
||||
args=(self.current_target, self.max_depth),
|
||||
daemon=True
|
||||
)
|
||||
self.scan_thread.start()
|
||||
|
||||
print(f"Successfully extracted and re-queued {node_id_to_extract} from {large_entity_id}.")
|
||||
return True
|
||||
|
||||
def _update_session_state(self) -> None:
|
||||
"""
|
||||
Update the scanner state in Redis for GUI updates.
|
||||
@@ -656,39 +893,6 @@ class Scanner:
|
||||
provider_state = provider_states.get(provider_name)
|
||||
return provider_state is not None and provider_state.get('status') == 'success'
|
||||
|
||||
def _query_single_provider_forensic(self, provider, target: str, is_ip: bool, current_depth: int) -> Optional[List]:
|
||||
"""Query a single provider with stop signal checking."""
|
||||
provider_name = provider.get_name()
|
||||
start_time = datetime.now(timezone.utc)
|
||||
|
||||
if self._is_stop_requested():
|
||||
print(f"Stop requested before querying {provider_name} for {target}")
|
||||
return None
|
||||
|
||||
print(f"Querying {provider_name} for {target}")
|
||||
|
||||
self.logger.logger.info(f"Attempting {provider_name} query for {target} at depth {current_depth}")
|
||||
|
||||
try:
|
||||
if is_ip:
|
||||
results = provider.query_ip(target)
|
||||
else:
|
||||
results = provider.query_domain(target)
|
||||
|
||||
if self._is_stop_requested():
|
||||
print(f"Stop requested after querying {provider_name} for {target}")
|
||||
return None
|
||||
|
||||
self._update_provider_state(target, provider_name, 'success', len(results), None, start_time)
|
||||
|
||||
print(f"✓ {provider_name} returned {len(results)} results for {target}")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
|
||||
print(f"✗ {provider_name} failed for {target}: {e}")
|
||||
return None
|
||||
|
||||
def _update_provider_state(self, target: str, provider_name: str, status: str,
|
||||
results_count: int, error: Optional[str], start_time: datetime) -> None:
|
||||
"""Update provider state in node metadata for forensic tracking."""
|
||||
@@ -711,237 +915,6 @@ class Scanner:
|
||||
|
||||
self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)")
|
||||
|
||||
def _process_provider_results(self, target: str, provider, results: List,
|
||||
node_attributes: Dict, current_depth: int) -> Tuple[Set[str], bool]:
|
||||
"""Process provider results, returns (discovered_targets, is_large_entity)."""
|
||||
provider_name = provider.get_name()
|
||||
discovered_targets = set()
|
||||
|
||||
if self._is_stop_requested():
|
||||
print(f"Stop requested before processing results from {provider_name} for {target}")
|
||||
return discovered_targets, False
|
||||
|
||||
if len(results) > self.config.large_entity_threshold:
|
||||
print(f"Large entity detected: {provider_name} returned {len(results)} results for {target}")
|
||||
members = self._create_large_entity(target, provider_name, results, current_depth)
|
||||
return members, True
|
||||
|
||||
for i, (source, rel_target, rel_type, confidence, raw_data) in enumerate(results):
|
||||
if i % 5 == 0 and self._is_stop_requested(): # Check more frequently
|
||||
print(f"Stop requested while processing results from {provider_name} for {target}")
|
||||
break
|
||||
|
||||
self.logger.log_relationship_discovery(
|
||||
source_node=source,
|
||||
target_node=rel_target,
|
||||
relationship_type=rel_type,
|
||||
confidence_score=confidence,
|
||||
provider=provider_name,
|
||||
raw_data=raw_data,
|
||||
discovery_method=f"{provider_name}_query_depth_{current_depth}"
|
||||
)
|
||||
|
||||
# Collect attributes for the source node
|
||||
self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
|
||||
|
||||
# If the relationship is asn_membership, collect attributes for the target ASN node
|
||||
if rel_type == 'asn_membership':
|
||||
self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target])
|
||||
|
||||
|
||||
if isinstance(rel_target, list):
|
||||
# If the target is a list, iterate and process each item
|
||||
for single_target in rel_target:
|
||||
if _is_valid_ip(single_target):
|
||||
self.graph.add_node(single_target, NodeType.IP)
|
||||
if self.graph.add_edge(source, single_target, rel_type, confidence, provider_name, raw_data):
|
||||
print(f"Added IP relationship: {source} -> {single_target} ({rel_type})")
|
||||
discovered_targets.add(single_target)
|
||||
elif _is_valid_domain(single_target):
|
||||
self.graph.add_node(single_target, NodeType.DOMAIN)
|
||||
if self.graph.add_edge(source, single_target, rel_type, confidence, provider_name, raw_data):
|
||||
print(f"Added domain relationship: {source} -> {single_target} ({rel_type})")
|
||||
discovered_targets.add(single_target)
|
||||
self._collect_node_attributes(single_target, provider_name, rel_type, source, raw_data, node_attributes[single_target])
|
||||
|
||||
elif _is_valid_ip(rel_target):
|
||||
self.graph.add_node(rel_target, NodeType.IP)
|
||||
if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
|
||||
print(f"Added IP relationship: {source} -> {rel_target} ({rel_type})")
|
||||
discovered_targets.add(rel_target)
|
||||
|
||||
elif rel_target.startswith('AS') and rel_target[2:].isdigit():
|
||||
self.graph.add_node(rel_target, NodeType.ASN)
|
||||
if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
|
||||
print(f"Added ASN relationship: {source} -> {rel_target} ({rel_type})")
|
||||
|
||||
elif _is_valid_domain(rel_target):
|
||||
self.graph.add_node(rel_target, NodeType.DOMAIN)
|
||||
if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
|
||||
print(f"Added domain relationship: {source} -> {rel_target} ({rel_type})")
|
||||
discovered_targets.add(rel_target)
|
||||
self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target])
|
||||
|
||||
else:
|
||||
self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
|
||||
|
||||
return discovered_targets, False
|
||||
|
||||
def _create_large_entity(self, source: str, provider_name: str, results: List, current_depth: int) -> Set[str]:
|
||||
"""Create a large entity node and returns the members for DNS processing."""
|
||||
entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}"
|
||||
|
||||
targets = [rel[1] for rel in results if len(rel) > 1]
|
||||
node_type = 'unknown'
|
||||
|
||||
if targets:
|
||||
if _is_valid_domain(targets[0]):
|
||||
node_type = 'domain'
|
||||
elif _is_valid_ip(targets[0]):
|
||||
node_type = 'ip'
|
||||
|
||||
# We still create the nodes so they exist in the graph, they are just not processed for edges yet.
|
||||
for target in targets:
|
||||
self.graph.add_node(target, NodeType.DOMAIN if node_type == 'domain' else NodeType.IP)
|
||||
|
||||
attributes = {
|
||||
'count': len(targets),
|
||||
'nodes': targets,
|
||||
'node_type': node_type,
|
||||
'source_provider': provider_name,
|
||||
'discovery_depth': current_depth,
|
||||
'threshold_exceeded': self.config.large_entity_threshold,
|
||||
}
|
||||
description = f'Large entity created due to {len(targets)} results from {provider_name}'
|
||||
|
||||
self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes, description=description)
|
||||
|
||||
if results:
|
||||
rel_type = results[0][2]
|
||||
self.graph.add_edge(source, entity_id, rel_type, 0.9, provider_name,
|
||||
{'large_entity_info': f'Contains {len(targets)} {node_type}s'})
|
||||
|
||||
self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(targets)} targets from {provider_name}")
|
||||
print(f"Created large entity {entity_id} for {len(targets)} {node_type}s from {provider_name}")
|
||||
|
||||
return set(targets)
|
||||
|
||||
def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
|
||||
"""
|
||||
Extracts a node from a large entity, re-creates its original edge, and
|
||||
re-queues it for full scanning.
|
||||
"""
|
||||
if not self.graph.graph.has_node(large_entity_id):
|
||||
print(f"ERROR: Large entity {large_entity_id} not found.")
|
||||
return False
|
||||
|
||||
# 1. Get the original source node that discovered the large entity
|
||||
predecessors = list(self.graph.graph.predecessors(large_entity_id))
|
||||
if not predecessors:
|
||||
print(f"ERROR: No source node found for large entity {large_entity_id}.")
|
||||
return False
|
||||
source_node_id = predecessors[0]
|
||||
|
||||
# Get the original edge data to replicate it for the extracted node
|
||||
original_edge_data = self.graph.graph.get_edge_data(source_node_id, large_entity_id)
|
||||
if not original_edge_data:
|
||||
print(f"ERROR: Could not find original edge data from {source_node_id} to {large_entity_id}.")
|
||||
return False
|
||||
|
||||
# 2. Modify the graph data structure first
|
||||
success = self.graph.extract_node_from_large_entity(large_entity_id, node_id_to_extract)
|
||||
if not success:
|
||||
print(f"ERROR: Node {node_id_to_extract} could not be removed from {large_entity_id}'s attributes.")
|
||||
return False
|
||||
|
||||
# 3. Create the direct edge from the original source to the newly extracted node
|
||||
print(f"Re-creating direct edge from {source_node_id} to extracted node {node_id_to_extract}")
|
||||
self.graph.add_edge(
|
||||
source_id=source_node_id,
|
||||
target_id=node_id_to_extract,
|
||||
relationship_type=original_edge_data.get('relationship_type', 'extracted_from_large_entity'),
|
||||
confidence_score=original_edge_data.get('confidence_score', 0.85), # Slightly lower confidence
|
||||
source_provider=original_edge_data.get('source_provider', 'unknown'),
|
||||
raw_data={'context': f'Extracted from large entity {large_entity_id}'}
|
||||
)
|
||||
|
||||
# 4. Re-queue the extracted node for full processing by all eligible providers
|
||||
print(f"Re-queueing extracted node {node_id_to_extract} for full reconnaissance...")
|
||||
is_ip = _is_valid_ip(node_id_to_extract)
|
||||
current_depth = self.graph.graph.nodes[large_entity_id].get('attributes', {}).get('discovery_depth', 0)
|
||||
|
||||
eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False)
|
||||
for provider in eligible_providers:
|
||||
provider_name = provider.get_name()
|
||||
self.task_queue.put((self._get_priority(provider_name), (provider_name, node_id_to_extract, current_depth)))
|
||||
self.total_tasks_ever_enqueued += 1
|
||||
|
||||
# 5. If the scanner is not running, we need to kickstart it to process this one item.
|
||||
if self.status != ScanStatus.RUNNING:
|
||||
print("Scanner is idle. Starting a mini-scan to process the extracted node.")
|
||||
self.status = ScanStatus.RUNNING
|
||||
self._update_session_state()
|
||||
|
||||
if not self.scan_thread or not self.scan_thread.is_alive():
|
||||
self.scan_thread = threading.Thread(
|
||||
target=self._execute_scan,
|
||||
args=(self.current_target, self.max_depth),
|
||||
daemon=True
|
||||
)
|
||||
self.scan_thread.start()
|
||||
|
||||
print(f"Successfully extracted and re-queued {node_id_to_extract} from {large_entity_id}.")
|
||||
return True
|
||||
|
||||
def _collect_node_attributes(self, node_id: str, provider_name: str, rel_type: str,
|
||||
target: str, raw_data: Dict[str, Any], attributes: Dict[str, Any]) -> None:
|
||||
"""Collect and organize attributes for a node."""
|
||||
self.logger.logger.debug(f"Collecting attributes for {node_id} from {provider_name}: {rel_type}")
|
||||
|
||||
if provider_name == 'dns':
|
||||
record_type = raw_data.get('query_type', 'UNKNOWN')
|
||||
value = raw_data.get('value', target)
|
||||
dns_entry = f"{record_type}: {value}"
|
||||
if dns_entry not in attributes.get('dns_records', []):
|
||||
attributes.setdefault('dns_records', []).append(dns_entry)
|
||||
|
||||
elif provider_name == 'crtsh':
|
||||
if rel_type == "san_certificate":
|
||||
domain_certs = raw_data.get('domain_certificates', {})
|
||||
if node_id in domain_certs:
|
||||
cert_summary = domain_certs[node_id]
|
||||
attributes['certificates'] = cert_summary
|
||||
if target not in attributes.get('related_domains_san', []):
|
||||
attributes.setdefault('related_domains_san', []).append(target)
|
||||
|
||||
elif provider_name == 'shodan':
|
||||
# This logic will now apply to the correct node (ASN or IP)
|
||||
shodan_attributes = attributes.setdefault('shodan', {})
|
||||
for key, value in raw_data.items():
|
||||
if key not in shodan_attributes or not shodan_attributes.get(key):
|
||||
shodan_attributes[key] = value
|
||||
|
||||
if _is_valid_ip(node_id):
|
||||
if 'ports' in raw_data:
|
||||
attributes['ports'] = raw_data['ports']
|
||||
if 'os' in raw_data and raw_data['os']:
|
||||
attributes['os'] = raw_data['os']
|
||||
|
||||
if rel_type == "asn_membership":
|
||||
# This is the key change: these attributes are for the target (the ASN),
|
||||
# not the source (the IP). We will add them to the ASN node later.
|
||||
pass
|
||||
|
||||
record_type_name = rel_type
|
||||
if record_type_name not in attributes:
|
||||
attributes[record_type_name] = []
|
||||
|
||||
if isinstance(target, list):
|
||||
attributes[record_type_name].extend(target)
|
||||
else:
|
||||
if target not in attributes[record_type_name]:
|
||||
attributes[record_type_name].append(target)
|
||||
|
||||
def _log_target_processing_error(self, target: str, error: str) -> None:
|
||||
"""Log target processing errors for forensic trail."""
|
||||
self.logger.logger.error(f"Target processing failed for {target}: {error}")
|
||||
|
||||
@@ -5,15 +5,11 @@ import time
|
||||
import uuid
|
||||
import redis
|
||||
import pickle
|
||||
from typing import Dict, Optional, Any, List
|
||||
from typing import Dict, Optional, Any
|
||||
|
||||
from core.scanner import Scanner
|
||||
from config import config
|
||||
|
||||
# WARNING: Using pickle can be a security risk if the data source is not trusted.
|
||||
# In this case, we are only serializing/deserializing our own trusted Scanner objects,
|
||||
# which is generally safe. Do not unpickle data from untrusted sources.
|
||||
|
||||
class SessionManager:
|
||||
"""
|
||||
Manages multiple scanner instances for concurrent user sessions using Redis.
|
||||
|
||||
Reference in New Issue
Block a user