data model refinement

This commit is contained in:
overcuriousity
2025-09-13 21:10:27 +02:00
parent 930fdca500
commit 2974312278
5 changed files with 231 additions and 230 deletions

View File

@@ -152,21 +152,31 @@ class GraphManager:
})
return all_correlations
def add_node(self, node_id: str, node_type: NodeType, metadata: Optional[Dict[str, Any]] = None) -> bool:
"""Add a node to the graph, update metadata, and process correlations."""
def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[Dict[str, Any]] = None,
description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool:
"""Add a node to the graph, update attributes, and process correlations."""
is_new_node = not self.graph.has_node(node_id)
if is_new_node:
self.graph.add_node(node_id, type=node_type.value,
added_timestamp=datetime.now(timezone.utc).isoformat(),
attributes=attributes or {},
description=description,
metadata=metadata or {})
elif metadata:
# Safely merge new metadata into existing metadata
existing_metadata = self.graph.nodes[node_id].get('metadata', {})
existing_metadata.update(metadata)
self.graph.nodes[node_id]['metadata'] = existing_metadata
else:
# Safely merge new attributes into existing attributes
if attributes:
existing_attributes = self.graph.nodes[node_id].get('attributes', {})
existing_attributes.update(attributes)
self.graph.nodes[node_id]['attributes'] = existing_attributes
if description:
self.graph.nodes[node_id]['description'] = description
if metadata:
existing_metadata = self.graph.nodes[node_id].get('metadata', {})
existing_metadata.update(metadata)
self.graph.nodes[node_id]['metadata'] = existing_metadata
if metadata and node_type != NodeType.CORRELATION_OBJECT:
correlations = self._check_for_correlations(node_id, metadata)
if attributes and node_type != NodeType.CORRELATION_OBJECT:
correlations = self._check_for_correlations(node_id, attributes)
for corr in correlations:
value = corr['value']
@@ -186,7 +196,7 @@ class GraphManager:
continue # Skip creating a redundant correlation node
# Proceed to create a new correlation node if no major node was found.
correlation_node_id = f"corr_{hash(value) & 0x7FFFFFFF}"
correlation_node_id = f"{value}"
if not self.graph.has_node(correlation_node_id):
self.add_node(correlation_node_id, NodeType.CORRELATION_OBJECT,
metadata={'value': value, 'sources': corr['sources'],
@@ -203,7 +213,7 @@ class GraphManager:
for c_node_id in set(corr['nodes']):
self.add_edge(c_node_id, correlation_node_id, RelationshipType.CORRELATED_TO)
self._update_correlation_index(node_id, metadata)
self._update_correlation_index(node_id, attributes)
self.last_modified = datetime.now(timezone.utc).isoformat()
return is_new_node
@@ -263,12 +273,14 @@ class GraphManager:
nodes = []
for node_id, attrs in self.graph.nodes(data=True):
node_data = {'id': node_id, 'label': node_id, 'type': attrs.get('type', 'unknown'),
'attributes': attrs.get('attributes', {}),
'description': attrs.get('description', ''),
'metadata': attrs.get('metadata', {}),
'added_timestamp': attrs.get('added_timestamp')}
# Customize node appearance based on type and metadata
# Customize node appearance based on type and attributes
node_type = node_data['type']
metadata = node_data['metadata']
if node_type == 'domain' and metadata.get('certificate_data', {}).get('has_valid_cert') is False:
attributes = node_data['attributes']
if node_type == 'domain' and attributes.get('certificates', {}).get('has_valid_cert') is False:
node_data['color'] = {'background': '#c7c7c7', 'border': '#999'} # Gray for invalid cert
nodes.append(node_data)

View File

@@ -344,7 +344,7 @@ class Scanner:
new_targets = set()
large_entity_members = set()
target_metadata = defaultdict(lambda: defaultdict(list))
node_attributes = defaultdict(lambda: defaultdict(list))
eligible_providers = self._get_eligible_providers(target, is_ip, dns_only)
@@ -361,7 +361,7 @@ class Scanner:
provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth)
if provider_results and not self._is_stop_requested():
discovered, is_large_entity = self._process_provider_results_forensic(
target, provider, provider_results, target_metadata, depth
target, provider, provider_results, node_attributes, depth
)
if is_large_entity:
large_entity_members.update(discovered)
@@ -370,11 +370,11 @@ class Scanner:
except Exception as e:
self._log_provider_error(target, provider.get_name(), str(e))
for node_id, metadata_dict in target_metadata.items():
for node_id, attributes in node_attributes.items():
if self.graph.graph.has_node(node_id):
node_is_ip = _is_valid_ip(node_id)
node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
self.graph.add_node(node_id, node_type_to_add, metadata=metadata_dict)
self.graph.add_node(node_id, node_type_to_add, attributes=attributes)
return new_targets, large_entity_members
@@ -485,7 +485,7 @@ class Scanner:
self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)")
def _process_provider_results_forensic(self, target: str, provider, results: List,
target_metadata: Dict, current_depth: int) -> Tuple[Set[str], bool]:
node_attributes: Dict, current_depth: int) -> Tuple[Set[str], bool]:
"""Process provider results, returns (discovered_targets, is_large_entity)."""
provider_name = provider.get_name()
discovered_targets = set()
@@ -514,7 +514,7 @@ class Scanner:
discovery_method=f"{provider_name}_query_depth_{current_depth}"
)
self._collect_node_metadata_forensic(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source])
self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
if _is_valid_ip(rel_target):
self.graph.add_node(rel_target, NodeType.IP)
@@ -532,10 +532,10 @@ class Scanner:
if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
print(f"Added domain relationship: {source} -> {rel_target} ({rel_type.relationship_name})")
discovered_targets.add(rel_target)
self._collect_node_metadata_forensic(rel_target, provider_name, rel_type, source, raw_data, target_metadata[rel_target])
self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target])
else:
self._collect_node_metadata_forensic(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source])
self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
return discovered_targets, False
@@ -555,17 +555,17 @@ class Scanner:
for target in targets:
self.graph.add_node(target, NodeType.DOMAIN if node_type == 'domain' else NodeType.IP)
metadata = {
attributes = {
'count': len(targets),
'nodes': targets,
'node_type': node_type,
'source_provider': provider_name,
'discovery_depth': current_depth,
'threshold_exceeded': self.config.large_entity_threshold,
'forensic_note': f'Large entity created due to {len(targets)} results from {provider_name}'
}
description = f'Large entity created due to {len(targets)} results from {provider_name}'
self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, metadata=metadata)
self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes, description=description)
if results:
rel_type = results[0][2]
@@ -577,49 +577,50 @@ class Scanner:
return set(targets)
def _collect_node_metadata_forensic(self, node_id: str, provider_name: str, rel_type: RelationshipType,
target: str, raw_data: Dict[str, Any], metadata: Dict[str, Any]) -> None:
"""Collect and organize metadata for forensic tracking with enhanced logging."""
self.logger.logger.debug(f"Collecting metadata for {node_id} from {provider_name}: {rel_type.relationship_name}")
def _collect_node_attributes(self, node_id: str, provider_name: str, rel_type: RelationshipType,
target: str, raw_data: Dict[str, Any], attributes: Dict[str, Any]) -> None:
"""Collect and organize attributes for a node."""
self.logger.logger.debug(f"Collecting attributes for {node_id} from {provider_name}: {rel_type.relationship_name}")
if provider_name == 'dns':
record_type = raw_data.get('query_type', 'UNKNOWN')
value = raw_data.get('value', target)
dns_entry = f"{record_type}: {value}"
if dns_entry not in metadata.get('dns_records', []):
metadata.setdefault('dns_records', []).append(dns_entry)
if dns_entry not in attributes.get('dns_records', []):
attributes.setdefault('dns_records', []).append(dns_entry)
elif provider_name == 'crtsh':
if rel_type == RelationshipType.SAN_CERTIFICATE:
domain_certs = raw_data.get('domain_certificates', {})
if node_id in domain_certs:
cert_summary = domain_certs[node_id]
metadata['certificate_data'] = cert_summary
metadata['has_valid_cert'] = cert_summary.get('has_valid_cert', False)
if target not in metadata.get('related_domains_san', []):
metadata.setdefault('related_domains_san', []).append(target)
attributes['certificates'] = cert_summary
if target not in attributes.get('related_domains_san', []):
attributes.setdefault('related_domains_san', []).append(target)
elif provider_name == 'shodan':
shodan_attributes = attributes.setdefault('shodan', {})
for key, value in raw_data.items():
if key not in metadata.get('shodan', {}) or not metadata.get('shodan', {}).get(key):
metadata.setdefault('shodan', {})[key] = value
if key not in shodan_attributes or not shodan_attributes.get(key):
shodan_attributes[key] = value
if rel_type == RelationshipType.ASN_MEMBERSHIP:
metadata['asn_data'] = {
'asn': target,
attributes['asn'] = {
'id': target,
'description': raw_data.get('org', ''),
'isp': raw_data.get('isp', ''),
'country': raw_data.get('country', '')
}
record_type_name = rel_type.relationship_name
if record_type_name not in metadata:
metadata[record_type_name] = []
if record_type_name not in attributes:
attributes[record_type_name] = []
if isinstance(target, list):
metadata[record_type_name].extend(target)
attributes[record_type_name].extend(target)
else:
metadata[record_type_name].append(target)
if target not in attributes[record_type_name]:
attributes[record_type_name].append(target)
def _log_target_processing_error(self, target: str, error: str) -> None: