data model refinement
This commit is contained in:
@@ -152,21 +152,31 @@ class GraphManager:
|
||||
})
|
||||
return all_correlations
|
||||
|
||||
def add_node(self, node_id: str, node_type: NodeType, metadata: Optional[Dict[str, Any]] = None) -> bool:
|
||||
"""Add a node to the graph, update metadata, and process correlations."""
|
||||
def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[Dict[str, Any]] = None,
|
||||
description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool:
|
||||
"""Add a node to the graph, update attributes, and process correlations."""
|
||||
is_new_node = not self.graph.has_node(node_id)
|
||||
if is_new_node:
|
||||
self.graph.add_node(node_id, type=node_type.value,
|
||||
added_timestamp=datetime.now(timezone.utc).isoformat(),
|
||||
attributes=attributes or {},
|
||||
description=description,
|
||||
metadata=metadata or {})
|
||||
elif metadata:
|
||||
# Safely merge new metadata into existing metadata
|
||||
existing_metadata = self.graph.nodes[node_id].get('metadata', {})
|
||||
existing_metadata.update(metadata)
|
||||
self.graph.nodes[node_id]['metadata'] = existing_metadata
|
||||
else:
|
||||
# Safely merge new attributes into existing attributes
|
||||
if attributes:
|
||||
existing_attributes = self.graph.nodes[node_id].get('attributes', {})
|
||||
existing_attributes.update(attributes)
|
||||
self.graph.nodes[node_id]['attributes'] = existing_attributes
|
||||
if description:
|
||||
self.graph.nodes[node_id]['description'] = description
|
||||
if metadata:
|
||||
existing_metadata = self.graph.nodes[node_id].get('metadata', {})
|
||||
existing_metadata.update(metadata)
|
||||
self.graph.nodes[node_id]['metadata'] = existing_metadata
|
||||
|
||||
if metadata and node_type != NodeType.CORRELATION_OBJECT:
|
||||
correlations = self._check_for_correlations(node_id, metadata)
|
||||
if attributes and node_type != NodeType.CORRELATION_OBJECT:
|
||||
correlations = self._check_for_correlations(node_id, attributes)
|
||||
for corr in correlations:
|
||||
value = corr['value']
|
||||
|
||||
@@ -186,7 +196,7 @@ class GraphManager:
|
||||
continue # Skip creating a redundant correlation node
|
||||
|
||||
# Proceed to create a new correlation node if no major node was found.
|
||||
correlation_node_id = f"corr_{hash(value) & 0x7FFFFFFF}"
|
||||
correlation_node_id = f"{value}"
|
||||
if not self.graph.has_node(correlation_node_id):
|
||||
self.add_node(correlation_node_id, NodeType.CORRELATION_OBJECT,
|
||||
metadata={'value': value, 'sources': corr['sources'],
|
||||
@@ -203,7 +213,7 @@ class GraphManager:
|
||||
for c_node_id in set(corr['nodes']):
|
||||
self.add_edge(c_node_id, correlation_node_id, RelationshipType.CORRELATED_TO)
|
||||
|
||||
self._update_correlation_index(node_id, metadata)
|
||||
self._update_correlation_index(node_id, attributes)
|
||||
|
||||
self.last_modified = datetime.now(timezone.utc).isoformat()
|
||||
return is_new_node
|
||||
@@ -263,12 +273,14 @@ class GraphManager:
|
||||
nodes = []
|
||||
for node_id, attrs in self.graph.nodes(data=True):
|
||||
node_data = {'id': node_id, 'label': node_id, 'type': attrs.get('type', 'unknown'),
|
||||
'attributes': attrs.get('attributes', {}),
|
||||
'description': attrs.get('description', ''),
|
||||
'metadata': attrs.get('metadata', {}),
|
||||
'added_timestamp': attrs.get('added_timestamp')}
|
||||
# Customize node appearance based on type and metadata
|
||||
# Customize node appearance based on type and attributes
|
||||
node_type = node_data['type']
|
||||
metadata = node_data['metadata']
|
||||
if node_type == 'domain' and metadata.get('certificate_data', {}).get('has_valid_cert') is False:
|
||||
attributes = node_data['attributes']
|
||||
if node_type == 'domain' and attributes.get('certificates', {}).get('has_valid_cert') is False:
|
||||
node_data['color'] = {'background': '#c7c7c7', 'border': '#999'} # Gray for invalid cert
|
||||
nodes.append(node_data)
|
||||
|
||||
|
||||
@@ -344,7 +344,7 @@ class Scanner:
|
||||
|
||||
new_targets = set()
|
||||
large_entity_members = set()
|
||||
target_metadata = defaultdict(lambda: defaultdict(list))
|
||||
node_attributes = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
eligible_providers = self._get_eligible_providers(target, is_ip, dns_only)
|
||||
|
||||
@@ -361,7 +361,7 @@ class Scanner:
|
||||
provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth)
|
||||
if provider_results and not self._is_stop_requested():
|
||||
discovered, is_large_entity = self._process_provider_results_forensic(
|
||||
target, provider, provider_results, target_metadata, depth
|
||||
target, provider, provider_results, node_attributes, depth
|
||||
)
|
||||
if is_large_entity:
|
||||
large_entity_members.update(discovered)
|
||||
@@ -370,11 +370,11 @@ class Scanner:
|
||||
except Exception as e:
|
||||
self._log_provider_error(target, provider.get_name(), str(e))
|
||||
|
||||
for node_id, metadata_dict in target_metadata.items():
|
||||
for node_id, attributes in node_attributes.items():
|
||||
if self.graph.graph.has_node(node_id):
|
||||
node_is_ip = _is_valid_ip(node_id)
|
||||
node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
|
||||
self.graph.add_node(node_id, node_type_to_add, metadata=metadata_dict)
|
||||
self.graph.add_node(node_id, node_type_to_add, attributes=attributes)
|
||||
|
||||
return new_targets, large_entity_members
|
||||
|
||||
@@ -485,7 +485,7 @@ class Scanner:
|
||||
self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)")
|
||||
|
||||
def _process_provider_results_forensic(self, target: str, provider, results: List,
|
||||
target_metadata: Dict, current_depth: int) -> Tuple[Set[str], bool]:
|
||||
node_attributes: Dict, current_depth: int) -> Tuple[Set[str], bool]:
|
||||
"""Process provider results, returns (discovered_targets, is_large_entity)."""
|
||||
provider_name = provider.get_name()
|
||||
discovered_targets = set()
|
||||
@@ -514,7 +514,7 @@ class Scanner:
|
||||
discovery_method=f"{provider_name}_query_depth_{current_depth}"
|
||||
)
|
||||
|
||||
self._collect_node_metadata_forensic(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source])
|
||||
self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
|
||||
|
||||
if _is_valid_ip(rel_target):
|
||||
self.graph.add_node(rel_target, NodeType.IP)
|
||||
@@ -532,10 +532,10 @@ class Scanner:
|
||||
if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
|
||||
print(f"Added domain relationship: {source} -> {rel_target} ({rel_type.relationship_name})")
|
||||
discovered_targets.add(rel_target)
|
||||
self._collect_node_metadata_forensic(rel_target, provider_name, rel_type, source, raw_data, target_metadata[rel_target])
|
||||
self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target])
|
||||
|
||||
else:
|
||||
self._collect_node_metadata_forensic(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source])
|
||||
self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
|
||||
|
||||
return discovered_targets, False
|
||||
|
||||
@@ -555,17 +555,17 @@ class Scanner:
|
||||
for target in targets:
|
||||
self.graph.add_node(target, NodeType.DOMAIN if node_type == 'domain' else NodeType.IP)
|
||||
|
||||
metadata = {
|
||||
attributes = {
|
||||
'count': len(targets),
|
||||
'nodes': targets,
|
||||
'node_type': node_type,
|
||||
'source_provider': provider_name,
|
||||
'discovery_depth': current_depth,
|
||||
'threshold_exceeded': self.config.large_entity_threshold,
|
||||
'forensic_note': f'Large entity created due to {len(targets)} results from {provider_name}'
|
||||
}
|
||||
description = f'Large entity created due to {len(targets)} results from {provider_name}'
|
||||
|
||||
self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, metadata=metadata)
|
||||
self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes, description=description)
|
||||
|
||||
if results:
|
||||
rel_type = results[0][2]
|
||||
@@ -577,49 +577,50 @@ class Scanner:
|
||||
|
||||
return set(targets)
|
||||
|
||||
def _collect_node_metadata_forensic(self, node_id: str, provider_name: str, rel_type: RelationshipType,
|
||||
target: str, raw_data: Dict[str, Any], metadata: Dict[str, Any]) -> None:
|
||||
"""Collect and organize metadata for forensic tracking with enhanced logging."""
|
||||
self.logger.logger.debug(f"Collecting metadata for {node_id} from {provider_name}: {rel_type.relationship_name}")
|
||||
def _collect_node_attributes(self, node_id: str, provider_name: str, rel_type: RelationshipType,
|
||||
target: str, raw_data: Dict[str, Any], attributes: Dict[str, Any]) -> None:
|
||||
"""Collect and organize attributes for a node."""
|
||||
self.logger.logger.debug(f"Collecting attributes for {node_id} from {provider_name}: {rel_type.relationship_name}")
|
||||
|
||||
if provider_name == 'dns':
|
||||
record_type = raw_data.get('query_type', 'UNKNOWN')
|
||||
value = raw_data.get('value', target)
|
||||
dns_entry = f"{record_type}: {value}"
|
||||
if dns_entry not in metadata.get('dns_records', []):
|
||||
metadata.setdefault('dns_records', []).append(dns_entry)
|
||||
if dns_entry not in attributes.get('dns_records', []):
|
||||
attributes.setdefault('dns_records', []).append(dns_entry)
|
||||
|
||||
elif provider_name == 'crtsh':
|
||||
if rel_type == RelationshipType.SAN_CERTIFICATE:
|
||||
domain_certs = raw_data.get('domain_certificates', {})
|
||||
if node_id in domain_certs:
|
||||
cert_summary = domain_certs[node_id]
|
||||
metadata['certificate_data'] = cert_summary
|
||||
metadata['has_valid_cert'] = cert_summary.get('has_valid_cert', False)
|
||||
if target not in metadata.get('related_domains_san', []):
|
||||
metadata.setdefault('related_domains_san', []).append(target)
|
||||
attributes['certificates'] = cert_summary
|
||||
if target not in attributes.get('related_domains_san', []):
|
||||
attributes.setdefault('related_domains_san', []).append(target)
|
||||
|
||||
elif provider_name == 'shodan':
|
||||
shodan_attributes = attributes.setdefault('shodan', {})
|
||||
for key, value in raw_data.items():
|
||||
if key not in metadata.get('shodan', {}) or not metadata.get('shodan', {}).get(key):
|
||||
metadata.setdefault('shodan', {})[key] = value
|
||||
if key not in shodan_attributes or not shodan_attributes.get(key):
|
||||
shodan_attributes[key] = value
|
||||
|
||||
if rel_type == RelationshipType.ASN_MEMBERSHIP:
|
||||
metadata['asn_data'] = {
|
||||
'asn': target,
|
||||
attributes['asn'] = {
|
||||
'id': target,
|
||||
'description': raw_data.get('org', ''),
|
||||
'isp': raw_data.get('isp', ''),
|
||||
'country': raw_data.get('country', '')
|
||||
}
|
||||
|
||||
record_type_name = rel_type.relationship_name
|
||||
if record_type_name not in metadata:
|
||||
metadata[record_type_name] = []
|
||||
if record_type_name not in attributes:
|
||||
attributes[record_type_name] = []
|
||||
|
||||
if isinstance(target, list):
|
||||
metadata[record_type_name].extend(target)
|
||||
attributes[record_type_name].extend(target)
|
||||
else:
|
||||
metadata[record_type_name].append(target)
|
||||
if target not in attributes[record_type_name]:
|
||||
attributes[record_type_name].append(target)
|
||||
|
||||
|
||||
def _log_target_processing_error(self, target: str, error: str) -> None:
|
||||
|
||||
Reference in New Issue
Block a user