new node types
This commit is contained in:
parent
cd14198452
commit
98e1b2280b
@ -43,12 +43,13 @@ class GraphManager:
|
|||||||
# Compile regex for date filtering for efficiency
|
# Compile regex for date filtering for efficiency
|
||||||
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
|
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
|
||||||
|
|
||||||
# These are the actual attribute names created in providers, WITHOUT provider prefix
|
# FIXED: Exclude cert_issuer_name since we already create proper CA relationships
|
||||||
self.EXCLUDED_KEYS = [
|
self.EXCLUDED_KEYS = [
|
||||||
# Certificate metadata that creates noise
|
# Certificate metadata that creates noise or has dedicated node types
|
||||||
'cert_source', # Always 'crtsh' for crtsh provider
|
'cert_source', # Always 'crtsh' for crtsh provider
|
||||||
'cert_common_name',
|
'cert_common_name',
|
||||||
'cert_validity_period_days', # Numerical, not useful for correlation
|
'cert_validity_period_days', # Numerical, not useful for correlation
|
||||||
|
'cert_issuer_name', # FIXED: Has dedicated CA nodes, don't correlate
|
||||||
#'cert_certificate_id', # Unique per certificate
|
#'cert_certificate_id', # Unique per certificate
|
||||||
#'cert_serial_number', # Unique per certificate
|
#'cert_serial_number', # Unique per certificate
|
||||||
'cert_entry_timestamp', # Timestamp, filtered by date regex anyway
|
'cert_entry_timestamp', # Timestamp, filtered by date regex anyway
|
||||||
@ -211,7 +212,7 @@ class GraphManager:
|
|||||||
def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
|
def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if there's a direct edge between two nodes in either direction.
|
Check if there's a direct edge between two nodes in either direction.
|
||||||
Returns True if node_aâ†'node_b OR node_bâ†'node_a exists.
|
Returns True if node_aâ†'node_b OR node_bâ†'node_a exists.
|
||||||
"""
|
"""
|
||||||
return (self.graph.has_edge(node_a, node_b) or
|
return (self.graph.has_edge(node_a, node_b) or
|
||||||
self.graph.has_edge(node_b, node_a))
|
self.graph.has_edge(node_b, node_a))
|
||||||
|
|||||||
@ -550,6 +550,7 @@ class Scanner:
|
|||||||
provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
|
provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
|
||||||
"""
|
"""
|
||||||
Process a unified ProviderResult object to update the graph.
|
Process a unified ProviderResult object to update the graph.
|
||||||
|
VERIFIED: Proper ISP and CA node type assignment.
|
||||||
"""
|
"""
|
||||||
provider_name = provider.get_name()
|
provider_name = provider.get_name()
|
||||||
discovered_targets = set()
|
discovered_targets = set()
|
||||||
@ -557,6 +558,7 @@ class Scanner:
|
|||||||
if self._is_stop_requested():
|
if self._is_stop_requested():
|
||||||
return discovered_targets, False
|
return discovered_targets, False
|
||||||
|
|
||||||
|
# Process all attributes first, grouping by target node
|
||||||
attributes_by_node = defaultdict(list)
|
attributes_by_node = defaultdict(list)
|
||||||
for attribute in provider_result.attributes:
|
for attribute in provider_result.attributes:
|
||||||
attr_dict = {
|
attr_dict = {
|
||||||
@ -569,8 +571,10 @@ class Scanner:
|
|||||||
}
|
}
|
||||||
attributes_by_node[attribute.target_node].append(attr_dict)
|
attributes_by_node[attribute.target_node].append(attr_dict)
|
||||||
|
|
||||||
|
# Add attributes to existing nodes (important for ISP nodes to get ASN attributes)
|
||||||
for node_id, node_attributes_list in attributes_by_node.items():
|
for node_id, node_attributes_list in attributes_by_node.items():
|
||||||
if self.graph.graph.has_node(node_id):
|
if self.graph.graph.has_node(node_id):
|
||||||
|
# Node already exists, just add attributes
|
||||||
if _is_valid_ip(node_id):
|
if _is_valid_ip(node_id):
|
||||||
node_type = NodeType.IP
|
node_type = NodeType.IP
|
||||||
else:
|
else:
|
||||||
@ -578,10 +582,12 @@ class Scanner:
|
|||||||
|
|
||||||
self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
|
self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
|
||||||
|
|
||||||
|
# Check if this should be a large entity
|
||||||
if provider_result.get_relationship_count() > self.config.large_entity_threshold:
|
if provider_result.get_relationship_count() > self.config.large_entity_threshold:
|
||||||
members = self._create_large_entity_from_provider_result(target, provider_name, provider_result, current_depth)
|
members = self._create_large_entity_from_provider_result(target, provider_name, provider_result, current_depth)
|
||||||
return members, True
|
return members, True
|
||||||
|
|
||||||
|
# Process relationships and create nodes with proper types
|
||||||
for i, relationship in enumerate(provider_result.relationships):
|
for i, relationship in enumerate(provider_result.relationships):
|
||||||
if i % 5 == 0 and self._is_stop_requested():
|
if i % 5 == 0 and self._is_stop_requested():
|
||||||
break
|
break
|
||||||
@ -589,20 +595,24 @@ class Scanner:
|
|||||||
source_node = relationship.source_node
|
source_node = relationship.source_node
|
||||||
target_node = relationship.target_node
|
target_node = relationship.target_node
|
||||||
|
|
||||||
|
# VERIFIED: Determine source node type
|
||||||
source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN
|
source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN
|
||||||
|
|
||||||
if provider_name == 'shodan' and relationship.relationship_type == 'ip_to_isp':
|
# VERIFIED: Determine target node type based on provider and relationship
|
||||||
target_type = NodeType.ISP
|
if provider_name == 'shodan' and relationship.relationship_type == 'shodan_isp':
|
||||||
elif provider_name == 'crtsh' and relationship.relationship_type == 'issued_by':
|
target_type = NodeType.ISP # ISP node for Shodan organization data
|
||||||
target_type = NodeType.CA
|
elif provider_name == 'crtsh' and relationship.relationship_type == 'crtsh_cert_issuer':
|
||||||
|
target_type = NodeType.CA # CA node for certificate issuers
|
||||||
elif _is_valid_ip(target_node):
|
elif _is_valid_ip(target_node):
|
||||||
target_type = NodeType.IP
|
target_type = NodeType.IP
|
||||||
else:
|
else:
|
||||||
target_type = NodeType.DOMAIN
|
target_type = NodeType.DOMAIN
|
||||||
|
|
||||||
|
# Create or update nodes with proper types
|
||||||
self.graph.add_node(source_node, source_type)
|
self.graph.add_node(source_node, source_type)
|
||||||
self.graph.add_node(target_node, target_type)
|
self.graph.add_node(target_node, target_type)
|
||||||
|
|
||||||
|
# Add the relationship edge
|
||||||
if self.graph.add_edge(
|
if self.graph.add_edge(
|
||||||
source_node, target_node,
|
source_node, target_node,
|
||||||
relationship.relationship_type,
|
relationship.relationship_type,
|
||||||
@ -610,8 +620,9 @@ class Scanner:
|
|||||||
provider_name,
|
provider_name,
|
||||||
relationship.raw_data
|
relationship.raw_data
|
||||||
):
|
):
|
||||||
pass
|
pass # Edge was successfully added
|
||||||
|
|
||||||
|
# Add target to discovered nodes for further processing
|
||||||
if _is_valid_domain(target_node) or _is_valid_ip(target_node):
|
if _is_valid_domain(target_node) or _is_valid_ip(target_node):
|
||||||
discovered_targets.add(target_node)
|
discovered_targets.add(target_node)
|
||||||
|
|
||||||
|
|||||||
@ -298,7 +298,7 @@ class CrtShProvider(BaseProvider):
|
|||||||
result.add_relationship(
|
result.add_relationship(
|
||||||
source_node=domain,
|
source_node=domain,
|
||||||
target_node=issuer_name,
|
target_node=issuer_name,
|
||||||
relationship_type='issued_by',
|
relationship_type='crtsh_cert_issuer',
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
confidence=0.95
|
confidence=0.95
|
||||||
)
|
)
|
||||||
|
|||||||
@ -211,31 +211,48 @@ class ShodanProvider(BaseProvider):
|
|||||||
|
|
||||||
def _process_shodan_data(self, ip: str, data: Dict[str, Any]) -> ProviderResult:
|
def _process_shodan_data(self, ip: str, data: Dict[str, Any]) -> ProviderResult:
|
||||||
"""
|
"""
|
||||||
UPDATED: Process Shodan data with raw attribute names and values.
|
VERIFIED: Process Shodan data creating ISP nodes with ASN attributes and proper relationships.
|
||||||
"""
|
"""
|
||||||
result = ProviderResult()
|
result = ProviderResult()
|
||||||
|
|
||||||
|
# VERIFIED: Extract ISP information and create proper ISP node with ASN
|
||||||
isp_name = data.get('org')
|
isp_name = data.get('org')
|
||||||
asn_value = data.get('asn')
|
asn_value = data.get('asn')
|
||||||
|
|
||||||
if isp_name and asn_value:
|
if isp_name and asn_value:
|
||||||
|
# Create relationship from IP to ISP
|
||||||
result.add_relationship(
|
result.add_relationship(
|
||||||
source_node=ip,
|
source_node=ip,
|
||||||
target_node=isp_name,
|
target_node=isp_name,
|
||||||
relationship_type='ip_to_isp',
|
relationship_type='shodan_isp',
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
confidence=0.9,
|
confidence=0.9,
|
||||||
raw_data={'asn': asn_value}
|
raw_data={'asn': asn_value, 'shodan_org': isp_name}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add ASN as attribute to the ISP node
|
||||||
result.add_attribute(
|
result.add_attribute(
|
||||||
target_node=isp_name,
|
target_node=isp_name,
|
||||||
name='asn',
|
name='asn',
|
||||||
value=asn_value,
|
value=asn_value,
|
||||||
attr_type='isp_info',
|
attr_type='isp_info',
|
||||||
provider=self.name,
|
provider=self.name,
|
||||||
confidence=0.9
|
confidence=0.9,
|
||||||
|
metadata={'description': 'Autonomous System Number from Shodan'}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Also add organization name as attribute to ISP node for completeness
|
||||||
|
result.add_attribute(
|
||||||
|
target_node=isp_name,
|
||||||
|
name='organization_name',
|
||||||
|
value=isp_name,
|
||||||
|
attr_type='isp_info',
|
||||||
|
provider=self.name,
|
||||||
|
confidence=0.9,
|
||||||
|
metadata={'description': 'Organization name from Shodan'}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process hostnames (reverse DNS)
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
if key == 'hostnames':
|
if key == 'hostnames':
|
||||||
for hostname in value:
|
for hostname in value:
|
||||||
@ -257,6 +274,7 @@ class ShodanProvider(BaseProvider):
|
|||||||
discovery_method="shodan_host_lookup"
|
discovery_method="shodan_host_lookup"
|
||||||
)
|
)
|
||||||
elif key == 'ports':
|
elif key == 'ports':
|
||||||
|
# Add open ports as attributes to the IP
|
||||||
for port in value:
|
for port in value:
|
||||||
result.add_attribute(
|
result.add_attribute(
|
||||||
target_node=ip,
|
target_node=ip,
|
||||||
@ -267,7 +285,7 @@ class ShodanProvider(BaseProvider):
|
|||||||
confidence=0.9
|
confidence=0.9
|
||||||
)
|
)
|
||||||
elif isinstance(value, (str, int, float, bool)) and value is not None:
|
elif isinstance(value, (str, int, float, bool)) and value is not None:
|
||||||
# UPDATED: Keep raw Shodan field names (no "shodan_" prefix)
|
# Add other Shodan fields as IP attributes (keep raw field names)
|
||||||
result.add_attribute(
|
result.add_attribute(
|
||||||
target_node=ip,
|
target_node=ip,
|
||||||
name=key, # Raw field name from Shodan API
|
name=key, # Raw field name from Shodan API
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user