new node types
This commit is contained in:
		
							parent
							
								
									cd14198452
								
							
						
					
					
						commit
						98e1b2280b
					
				@ -43,12 +43,13 @@ class GraphManager:
 | 
				
			|||||||
        # Compile regex for date filtering for efficiency
 | 
					        # Compile regex for date filtering for efficiency
 | 
				
			||||||
        self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
 | 
					        self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        # These are the actual attribute names created in providers, WITHOUT provider prefix
 | 
					        # FIXED: Exclude cert_issuer_name since we already create proper CA relationships
 | 
				
			||||||
        self.EXCLUDED_KEYS = [
 | 
					        self.EXCLUDED_KEYS = [
 | 
				
			||||||
            # Certificate metadata that creates noise
 | 
					            # Certificate metadata that creates noise or has dedicated node types
 | 
				
			||||||
            'cert_source',                    # Always 'crtsh' for crtsh provider
 | 
					            'cert_source',                    # Always 'crtsh' for crtsh provider
 | 
				
			||||||
            'cert_common_name',
 | 
					            'cert_common_name',
 | 
				
			||||||
            'cert_validity_period_days',      # Numerical, not useful for correlation
 | 
					            'cert_validity_period_days',      # Numerical, not useful for correlation
 | 
				
			||||||
 | 
					            'cert_issuer_name',              # FIXED: Has dedicated CA nodes, don't correlate
 | 
				
			||||||
            #'cert_certificate_id',            # Unique per certificate
 | 
					            #'cert_certificate_id',            # Unique per certificate
 | 
				
			||||||
            #'cert_serial_number',            # Unique per certificate
 | 
					            #'cert_serial_number',            # Unique per certificate
 | 
				
			||||||
            'cert_entry_timestamp',          # Timestamp, filtered by date regex anyway
 | 
					            'cert_entry_timestamp',          # Timestamp, filtered by date regex anyway
 | 
				
			||||||
@ -211,7 +212,7 @@ class GraphManager:
 | 
				
			|||||||
    def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
 | 
					    def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        Check if there's a direct edge between two nodes in either direction.
 | 
					        Check if there's a direct edge between two nodes in either direction.
 | 
				
			||||||
        Returns True if node_aâ†'node_b OR node_bâ†'node_a exists.
 | 
					        Returns True if node_aâ†'node_b OR node_bâ†'node_a exists.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        return (self.graph.has_edge(node_a, node_b) or 
 | 
					        return (self.graph.has_edge(node_a, node_b) or 
 | 
				
			||||||
                self.graph.has_edge(node_b, node_a))
 | 
					                self.graph.has_edge(node_b, node_a))
 | 
				
			||||||
 | 
				
			|||||||
@ -547,9 +547,10 @@ class Scanner:
 | 
				
			|||||||
            return None
 | 
					            return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _process_provider_result_unified(self, target: str, provider: BaseProvider,
 | 
					    def _process_provider_result_unified(self, target: str, provider: BaseProvider,
 | 
				
			||||||
                                       provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
 | 
					                                    provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        Process a unified ProviderResult object to update the graph.
 | 
					        Process a unified ProviderResult object to update the graph.
 | 
				
			||||||
 | 
					        VERIFIED: Proper ISP and CA node type assignment.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        provider_name = provider.get_name()
 | 
					        provider_name = provider.get_name()
 | 
				
			||||||
        discovered_targets = set()
 | 
					        discovered_targets = set()
 | 
				
			||||||
@ -557,6 +558,7 @@ class Scanner:
 | 
				
			|||||||
        if self._is_stop_requested():
 | 
					        if self._is_stop_requested():
 | 
				
			||||||
            return discovered_targets, False
 | 
					            return discovered_targets, False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Process all attributes first, grouping by target node
 | 
				
			||||||
        attributes_by_node = defaultdict(list)
 | 
					        attributes_by_node = defaultdict(list)
 | 
				
			||||||
        for attribute in provider_result.attributes:
 | 
					        for attribute in provider_result.attributes:
 | 
				
			||||||
            attr_dict = {
 | 
					            attr_dict = {
 | 
				
			||||||
@ -569,8 +571,10 @@ class Scanner:
 | 
				
			|||||||
            }
 | 
					            }
 | 
				
			||||||
            attributes_by_node[attribute.target_node].append(attr_dict)
 | 
					            attributes_by_node[attribute.target_node].append(attr_dict)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Add attributes to existing nodes (important for ISP nodes to get ASN attributes)
 | 
				
			||||||
        for node_id, node_attributes_list in attributes_by_node.items():
 | 
					        for node_id, node_attributes_list in attributes_by_node.items():
 | 
				
			||||||
            if self.graph.graph.has_node(node_id):
 | 
					            if self.graph.graph.has_node(node_id):
 | 
				
			||||||
 | 
					                # Node already exists, just add attributes
 | 
				
			||||||
                if _is_valid_ip(node_id):
 | 
					                if _is_valid_ip(node_id):
 | 
				
			||||||
                    node_type = NodeType.IP
 | 
					                    node_type = NodeType.IP
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
@ -578,10 +582,12 @@ class Scanner:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
 | 
					                self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Check if this should be a large entity
 | 
				
			||||||
        if provider_result.get_relationship_count() > self.config.large_entity_threshold:
 | 
					        if provider_result.get_relationship_count() > self.config.large_entity_threshold:
 | 
				
			||||||
            members = self._create_large_entity_from_provider_result(target, provider_name, provider_result, current_depth)
 | 
					            members = self._create_large_entity_from_provider_result(target, provider_name, provider_result, current_depth)
 | 
				
			||||||
            return members, True
 | 
					            return members, True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Process relationships and create nodes with proper types
 | 
				
			||||||
        for i, relationship in enumerate(provider_result.relationships):
 | 
					        for i, relationship in enumerate(provider_result.relationships):
 | 
				
			||||||
            if i % 5 == 0 and self._is_stop_requested():
 | 
					            if i % 5 == 0 and self._is_stop_requested():
 | 
				
			||||||
                break
 | 
					                break
 | 
				
			||||||
@ -589,20 +595,24 @@ class Scanner:
 | 
				
			|||||||
            source_node = relationship.source_node
 | 
					            source_node = relationship.source_node
 | 
				
			||||||
            target_node = relationship.target_node
 | 
					            target_node = relationship.target_node
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # VERIFIED: Determine source node type
 | 
				
			||||||
            source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN
 | 
					            source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            if provider_name == 'shodan' and relationship.relationship_type == 'ip_to_isp':
 | 
					            # VERIFIED: Determine target node type based on provider and relationship
 | 
				
			||||||
                target_type = NodeType.ISP
 | 
					            if provider_name == 'shodan' and relationship.relationship_type == 'shodan_isp':
 | 
				
			||||||
            elif provider_name == 'crtsh' and relationship.relationship_type == 'issued_by':
 | 
					                target_type = NodeType.ISP  # ISP node for Shodan organization data
 | 
				
			||||||
                target_type = NodeType.CA
 | 
					            elif provider_name == 'crtsh' and relationship.relationship_type == 'crtsh_cert_issuer':
 | 
				
			||||||
 | 
					                target_type = NodeType.CA   # CA node for certificate issuers
 | 
				
			||||||
            elif _is_valid_ip(target_node):
 | 
					            elif _is_valid_ip(target_node):
 | 
				
			||||||
                target_type = NodeType.IP
 | 
					                target_type = NodeType.IP
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                target_type = NodeType.DOMAIN
 | 
					                target_type = NodeType.DOMAIN
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Create or update nodes with proper types
 | 
				
			||||||
            self.graph.add_node(source_node, source_type)
 | 
					            self.graph.add_node(source_node, source_type)
 | 
				
			||||||
            self.graph.add_node(target_node, target_type)
 | 
					            self.graph.add_node(target_node, target_type)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Add the relationship edge
 | 
				
			||||||
            if self.graph.add_edge(
 | 
					            if self.graph.add_edge(
 | 
				
			||||||
                source_node, target_node,
 | 
					                source_node, target_node,
 | 
				
			||||||
                relationship.relationship_type,
 | 
					                relationship.relationship_type,
 | 
				
			||||||
@ -610,8 +620,9 @@ class Scanner:
 | 
				
			|||||||
                provider_name,
 | 
					                provider_name,
 | 
				
			||||||
                relationship.raw_data
 | 
					                relationship.raw_data
 | 
				
			||||||
            ):
 | 
					            ):
 | 
				
			||||||
                pass
 | 
					                pass  # Edge was successfully added
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # Add target to discovered nodes for further processing
 | 
				
			||||||
            if _is_valid_domain(target_node) or _is_valid_ip(target_node):
 | 
					            if _is_valid_domain(target_node) or _is_valid_ip(target_node):
 | 
				
			||||||
                discovered_targets.add(target_node)
 | 
					                discovered_targets.add(target_node)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -298,7 +298,7 @@ class CrtShProvider(BaseProvider):
 | 
				
			|||||||
                result.add_relationship(
 | 
					                result.add_relationship(
 | 
				
			||||||
                    source_node=domain,
 | 
					                    source_node=domain,
 | 
				
			||||||
                    target_node=issuer_name,
 | 
					                    target_node=issuer_name,
 | 
				
			||||||
                    relationship_type='issued_by',
 | 
					                    relationship_type='crtsh_cert_issuer',
 | 
				
			||||||
                    provider=self.name,
 | 
					                    provider=self.name,
 | 
				
			||||||
                    confidence=0.95
 | 
					                    confidence=0.95
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
				
			|||||||
@ -211,31 +211,48 @@ class ShodanProvider(BaseProvider):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def _process_shodan_data(self, ip: str, data: Dict[str, Any]) -> ProviderResult:
 | 
					    def _process_shodan_data(self, ip: str, data: Dict[str, Any]) -> ProviderResult:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        UPDATED: Process Shodan data with raw attribute names and values.
 | 
					        VERIFIED: Process Shodan data creating ISP nodes with ASN attributes and proper relationships.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        result = ProviderResult()
 | 
					        result = ProviderResult()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # VERIFIED: Extract ISP information and create proper ISP node with ASN
 | 
				
			||||||
        isp_name = data.get('org')
 | 
					        isp_name = data.get('org')
 | 
				
			||||||
        asn_value = data.get('asn')
 | 
					        asn_value = data.get('asn')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if isp_name and asn_value:
 | 
					        if isp_name and asn_value:
 | 
				
			||||||
 | 
					            # Create relationship from IP to ISP
 | 
				
			||||||
            result.add_relationship(
 | 
					            result.add_relationship(
 | 
				
			||||||
                source_node=ip,
 | 
					                source_node=ip,
 | 
				
			||||||
                target_node=isp_name,
 | 
					                target_node=isp_name,
 | 
				
			||||||
                relationship_type='ip_to_isp',
 | 
					                relationship_type='shodan_isp',
 | 
				
			||||||
                provider=self.name,
 | 
					                provider=self.name,
 | 
				
			||||||
                confidence=0.9,
 | 
					                confidence=0.9,
 | 
				
			||||||
                raw_data={'asn': asn_value}
 | 
					                raw_data={'asn': asn_value, 'shodan_org': isp_name}
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            # Add ASN as attribute to the ISP node
 | 
				
			||||||
            result.add_attribute(
 | 
					            result.add_attribute(
 | 
				
			||||||
                target_node=isp_name,
 | 
					                target_node=isp_name,
 | 
				
			||||||
                name='asn',
 | 
					                name='asn',
 | 
				
			||||||
                value=asn_value,
 | 
					                value=asn_value,
 | 
				
			||||||
                attr_type='isp_info',
 | 
					                attr_type='isp_info',
 | 
				
			||||||
                provider=self.name,
 | 
					                provider=self.name,
 | 
				
			||||||
                confidence=0.9
 | 
					                confidence=0.9,
 | 
				
			||||||
 | 
					                metadata={'description': 'Autonomous System Number from Shodan'}
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					            # Also add organization name as attribute to ISP node for completeness
 | 
				
			||||||
 | 
					            result.add_attribute(
 | 
				
			||||||
 | 
					                target_node=isp_name,
 | 
				
			||||||
 | 
					                name='organization_name',
 | 
				
			||||||
 | 
					                value=isp_name,
 | 
				
			||||||
 | 
					                attr_type='isp_info',
 | 
				
			||||||
 | 
					                provider=self.name,
 | 
				
			||||||
 | 
					                confidence=0.9,
 | 
				
			||||||
 | 
					                metadata={'description': 'Organization name from Shodan'}
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Process hostnames (reverse DNS)
 | 
				
			||||||
        for key, value in data.items():
 | 
					        for key, value in data.items():
 | 
				
			||||||
            if key == 'hostnames':
 | 
					            if key == 'hostnames':
 | 
				
			||||||
                for hostname in value:
 | 
					                for hostname in value:
 | 
				
			||||||
@ -257,6 +274,7 @@ class ShodanProvider(BaseProvider):
 | 
				
			|||||||
                            discovery_method="shodan_host_lookup"
 | 
					                            discovery_method="shodan_host_lookup"
 | 
				
			||||||
                        )
 | 
					                        )
 | 
				
			||||||
            elif key == 'ports':
 | 
					            elif key == 'ports':
 | 
				
			||||||
 | 
					                # Add open ports as attributes to the IP
 | 
				
			||||||
                for port in value:
 | 
					                for port in value:
 | 
				
			||||||
                    result.add_attribute(
 | 
					                    result.add_attribute(
 | 
				
			||||||
                        target_node=ip,
 | 
					                        target_node=ip,
 | 
				
			||||||
@ -267,7 +285,7 @@ class ShodanProvider(BaseProvider):
 | 
				
			|||||||
                        confidence=0.9
 | 
					                        confidence=0.9
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
            elif isinstance(value, (str, int, float, bool)) and value is not None:
 | 
					            elif isinstance(value, (str, int, float, bool)) and value is not None:
 | 
				
			||||||
                # UPDATED: Keep raw Shodan field names (no "shodan_" prefix)
 | 
					                # Add other Shodan fields as IP attributes (keep raw field names)
 | 
				
			||||||
                result.add_attribute(
 | 
					                result.add_attribute(
 | 
				
			||||||
                    target_node=ip,
 | 
					                    target_node=ip,
 | 
				
			||||||
                    name=key,  # Raw field name from Shodan API
 | 
					                    name=key,  # Raw field name from Shodan API
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user