gradient
This commit is contained in:
@@ -229,7 +229,6 @@ class BaseProvider(ABC):
|
||||
|
||||
def log_relationship_discovery(self, source_node: str, target_node: str,
|
||||
relationship_type: str,
|
||||
confidence_score: float,
|
||||
raw_data: Dict[str, Any],
|
||||
discovery_method: str) -> None:
|
||||
"""
|
||||
@@ -239,7 +238,6 @@ class BaseProvider(ABC):
|
||||
source_node: Source node identifier
|
||||
target_node: Target node identifier
|
||||
relationship_type: Type of relationship
|
||||
confidence_score: Confidence score
|
||||
raw_data: Raw data from provider
|
||||
discovery_method: Method used for discovery
|
||||
"""
|
||||
@@ -249,7 +247,6 @@ class BaseProvider(ABC):
|
||||
source_node=source_node,
|
||||
target_node=target_node,
|
||||
relationship_type=relationship_type,
|
||||
confidence_score=confidence_score,
|
||||
provider=self.name,
|
||||
raw_data=raw_data,
|
||||
discovery_method=discovery_method
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import re
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from .base_provider import BaseProvider
|
||||
from core.provider_result import ProviderResult
|
||||
@@ -10,6 +11,7 @@ from core.graph_manager import NodeType, GraphManager
|
||||
class CorrelationProvider(BaseProvider):
|
||||
"""
|
||||
A provider that finds correlations between nodes in the graph.
|
||||
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str = "correlation", session_config=None):
|
||||
@@ -61,12 +63,14 @@ class CorrelationProvider(BaseProvider):
|
||||
def query_domain(self, domain: str) -> ProviderResult:
|
||||
"""
|
||||
Query the provider for information about a domain.
|
||||
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
|
||||
"""
|
||||
return self._find_correlations(domain)
|
||||
|
||||
def query_ip(self, ip: str) -> ProviderResult:
|
||||
"""
|
||||
Query the provider for information about an IP address.
|
||||
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
|
||||
"""
|
||||
return self._find_correlations(ip)
|
||||
|
||||
@@ -79,8 +83,10 @@ class CorrelationProvider(BaseProvider):
|
||||
def _find_correlations(self, node_id: str) -> ProviderResult:
|
||||
"""
|
||||
Find correlations for a given node with enhanced filtering and error handling.
|
||||
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
|
||||
"""
|
||||
result = ProviderResult()
|
||||
discovery_time = datetime.now(timezone.utc)
|
||||
|
||||
# Enhanced safety checks
|
||||
if not self.graph or not self.graph.graph.has_node(node_id):
|
||||
@@ -133,7 +139,7 @@ class CorrelationProvider(BaseProvider):
|
||||
|
||||
# Create correlation if we have multiple nodes with this value
|
||||
if len(self.correlation_index[attr_value]['nodes']) > 1:
|
||||
self._create_correlation_relationships(attr_value, self.correlation_index[attr_value], result)
|
||||
self._create_correlation_relationships(attr_value, self.correlation_index[attr_value], result, discovery_time)
|
||||
correlations_found += 1
|
||||
|
||||
# Log correlation results
|
||||
@@ -187,9 +193,11 @@ class CorrelationProvider(BaseProvider):
|
||||
|
||||
return False
|
||||
|
||||
def _create_correlation_relationships(self, value: Any, correlation_data: Dict[str, Any], result: ProviderResult):
|
||||
def _create_correlation_relationships(self, value: Any, correlation_data: Dict[str, Any],
|
||||
result: ProviderResult, discovery_time: datetime):
|
||||
"""
|
||||
Create correlation relationships with enhanced deduplication and validation.
|
||||
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
|
||||
"""
|
||||
correlation_node_id = f"corr_{hash(str(value)) & 0x7FFFFFFF}"
|
||||
nodes = correlation_data['nodes']
|
||||
@@ -216,7 +224,6 @@ class CorrelationProvider(BaseProvider):
|
||||
value=value,
|
||||
attr_type=str(type(value).__name__),
|
||||
provider=self.name,
|
||||
confidence=0.9,
|
||||
metadata={
|
||||
'correlated_nodes': list(nodes),
|
||||
'sources': sources,
|
||||
@@ -225,7 +232,7 @@ class CorrelationProvider(BaseProvider):
|
||||
}
|
||||
)
|
||||
|
||||
# Create relationships with source validation
|
||||
# Create relationships with source validation and enhanced timestamps
|
||||
created_relationships = set()
|
||||
|
||||
for source in sources:
|
||||
@@ -240,19 +247,23 @@ class CorrelationProvider(BaseProvider):
|
||||
|
||||
relationship_label = f"corr_{provider}_{attribute}"
|
||||
|
||||
# Enhanced raw_data with discovery timestamp for time-based edge coloring
|
||||
raw_data = {
|
||||
'correlation_value': value,
|
||||
'original_attribute': attribute,
|
||||
'correlation_type': 'attribute_matching',
|
||||
'correlation_size': len(nodes),
|
||||
'discovery_timestamp': discovery_time.isoformat(),
|
||||
'relevance_timestamp': discovery_time.isoformat() # Correlation data is "fresh" when discovered
|
||||
}
|
||||
|
||||
# Add the relationship to the result
|
||||
result.add_relationship(
|
||||
source_node=node_id,
|
||||
target_node=correlation_node_id,
|
||||
relationship_type=relationship_label,
|
||||
provider=self.name,
|
||||
confidence=0.9,
|
||||
raw_data={
|
||||
'correlation_value': value,
|
||||
'original_attribute': attribute,
|
||||
'correlation_type': 'attribute_matching',
|
||||
'correlation_size': len(nodes)
|
||||
}
|
||||
raw_data=raw_data
|
||||
)
|
||||
|
||||
created_relationships.add(relationship_key)
|
||||
@@ -18,6 +18,7 @@ class CrtShProvider(BaseProvider):
|
||||
Provider for querying crt.sh certificate transparency database.
|
||||
FIXED: Improved caching logic and error handling to prevent infinite retry loops.
|
||||
Returns standardized ProviderResult objects with caching support.
|
||||
UPDATED: Enhanced with certificate timestamps for time-based edge coloring.
|
||||
"""
|
||||
|
||||
def __init__(self, name=None, session_config=None):
|
||||
@@ -131,6 +132,7 @@ class CrtShProvider(BaseProvider):
|
||||
def query_domain(self, domain: str) -> ProviderResult:
|
||||
"""
|
||||
FIXED: Simplified and more robust domain querying with better error handling.
|
||||
UPDATED: Enhanced with certificate timestamps for time-based edge coloring.
|
||||
"""
|
||||
if not _is_valid_domain(domain):
|
||||
return ProviderResult()
|
||||
@@ -245,7 +247,6 @@ class CrtShProvider(BaseProvider):
|
||||
target_node=rel_data.get("target_node", ""),
|
||||
relationship_type=rel_data.get("relationship_type", ""),
|
||||
provider=rel_data.get("provider", self.name),
|
||||
confidence=float(rel_data.get("confidence", 0.8)),
|
||||
raw_data=rel_data.get("raw_data", {})
|
||||
)
|
||||
except (ValueError, TypeError) as e:
|
||||
@@ -265,7 +266,6 @@ class CrtShProvider(BaseProvider):
|
||||
value=attr_data.get("value"),
|
||||
attr_type=attr_data.get("type", "unknown"),
|
||||
provider=attr_data.get("provider", self.name),
|
||||
confidence=float(attr_data.get("confidence", 0.9)),
|
||||
metadata=attr_data.get("metadata", {})
|
||||
)
|
||||
except (ValueError, TypeError) as e:
|
||||
@@ -293,7 +293,6 @@ class CrtShProvider(BaseProvider):
|
||||
"source_node": rel.source_node,
|
||||
"target_node": rel.target_node,
|
||||
"relationship_type": rel.relationship_type,
|
||||
"confidence": rel.confidence,
|
||||
"provider": rel.provider,
|
||||
"raw_data": rel.raw_data
|
||||
} for rel in result.relationships
|
||||
@@ -305,7 +304,6 @@ class CrtShProvider(BaseProvider):
|
||||
"value": attr.value,
|
||||
"type": attr.type,
|
||||
"provider": attr.provider,
|
||||
"confidence": attr.confidence,
|
||||
"metadata": attr.metadata
|
||||
} for attr in result.attributes
|
||||
]
|
||||
@@ -372,6 +370,7 @@ class CrtShProvider(BaseProvider):
|
||||
"""
|
||||
Process certificates to create proper domain and CA nodes.
|
||||
FIXED: Better error handling and progress tracking.
|
||||
UPDATED: Enhanced with certificate timestamps for time-based edge coloring.
|
||||
"""
|
||||
result = ProviderResult()
|
||||
|
||||
@@ -391,8 +390,7 @@ class CrtShProvider(BaseProvider):
|
||||
name="crtsh_data_warning",
|
||||
value=incompleteness_warning,
|
||||
attr_type='metadata',
|
||||
provider=self.name,
|
||||
confidence=1.0
|
||||
provider=self.name
|
||||
)
|
||||
|
||||
all_discovered_domains = set()
|
||||
@@ -415,16 +413,28 @@ class CrtShProvider(BaseProvider):
|
||||
if cert_domains:
|
||||
all_discovered_domains.update(cert_domains)
|
||||
|
||||
# Create CA nodes for certificate issuers
|
||||
# Create CA nodes for certificate issuers with timestamp
|
||||
issuer_name = self._parse_issuer_organization(cert_data.get('issuer_name', ''))
|
||||
if issuer_name and issuer_name not in processed_issuers:
|
||||
# Enhanced raw_data with certificate timestamp for time-based edge coloring
|
||||
issuer_raw_data = {'issuer_dn': cert_data.get('issuer_name', '')}
|
||||
|
||||
# Add certificate issue date (not_before) as relevance timestamp
|
||||
not_before = cert_data.get('not_before')
|
||||
if not_before:
|
||||
try:
|
||||
not_before_date = self._parse_certificate_date(not_before)
|
||||
issuer_raw_data['cert_not_before'] = not_before_date.isoformat()
|
||||
issuer_raw_data['relevance_timestamp'] = not_before_date.isoformat() # Standardized field
|
||||
except Exception as e:
|
||||
self.logger.logger.debug(f"Failed to parse not_before date for issuer: {e}")
|
||||
|
||||
result.add_relationship(
|
||||
source_node=query_domain,
|
||||
target_node=issuer_name,
|
||||
relationship_type='crtsh_cert_issuer',
|
||||
provider=self.name,
|
||||
confidence=0.95,
|
||||
raw_data={'issuer_dn': cert_data.get('issuer_name', '')}
|
||||
raw_data=issuer_raw_data
|
||||
)
|
||||
processed_issuers.add(issuer_name)
|
||||
|
||||
@@ -442,7 +452,6 @@ class CrtShProvider(BaseProvider):
|
||||
value=value,
|
||||
attr_type='certificate_data',
|
||||
provider=self.name,
|
||||
confidence=0.9,
|
||||
metadata={'certificate_id': cert_data.get('id')}
|
||||
)
|
||||
|
||||
@@ -457,7 +466,7 @@ class CrtShProvider(BaseProvider):
|
||||
self.logger.logger.info(f"CrtSh query cancelled before relationship creation for domain: {query_domain}")
|
||||
return result
|
||||
|
||||
# Create selective relationships to avoid large entities
|
||||
# Create selective relationships to avoid large entities with enhanced timestamps
|
||||
relationships_created = 0
|
||||
for discovered_domain in all_discovered_domains:
|
||||
if discovered_domain == query_domain:
|
||||
@@ -467,25 +476,36 @@ class CrtShProvider(BaseProvider):
|
||||
continue
|
||||
|
||||
if self._should_create_relationship(query_domain, discovered_domain):
|
||||
confidence = self._calculate_domain_relationship_confidence(
|
||||
query_domain, discovered_domain, [], all_discovered_domains
|
||||
# Enhanced raw_data with certificate timestamp for domain relationships
|
||||
domain_raw_data = {'relationship_type': 'certificate_discovery'}
|
||||
|
||||
# Find the most recent certificate for this domain pair to use as timestamp
|
||||
most_recent_cert = self._find_most_recent_cert_for_domains(
|
||||
certificates, query_domain, discovered_domain
|
||||
)
|
||||
if most_recent_cert:
|
||||
not_before = most_recent_cert.get('not_before')
|
||||
if not_before:
|
||||
try:
|
||||
not_before_date = self._parse_certificate_date(not_before)
|
||||
domain_raw_data['cert_not_before'] = not_before_date.isoformat()
|
||||
domain_raw_data['relevance_timestamp'] = not_before_date.isoformat()
|
||||
except Exception as e:
|
||||
self.logger.logger.debug(f"Failed to parse not_before date for domain relationship: {e}")
|
||||
|
||||
result.add_relationship(
|
||||
source_node=query_domain,
|
||||
target_node=discovered_domain,
|
||||
relationship_type='crtsh_san_certificate',
|
||||
provider=self.name,
|
||||
confidence=confidence,
|
||||
raw_data={'relationship_type': 'certificate_discovery'}
|
||||
raw_data=domain_raw_data
|
||||
)
|
||||
|
||||
self.log_relationship_discovery(
|
||||
source_node=query_domain,
|
||||
target_node=discovered_domain,
|
||||
relationship_type='crtsh_san_certificate',
|
||||
confidence_score=confidence,
|
||||
raw_data={'relationship_type': 'certificate_discovery'},
|
||||
raw_data=domain_raw_data,
|
||||
discovery_method="certificate_transparency_analysis"
|
||||
)
|
||||
relationships_created += 1
|
||||
@@ -493,6 +513,31 @@ class CrtShProvider(BaseProvider):
|
||||
self.logger.logger.info(f"CrtSh processing completed for {query_domain}: processed {processed_certs}/{len(certificates)} certificates, {len(all_discovered_domains)} domains, {relationships_created} relationships")
|
||||
return result
|
||||
|
||||
def _find_most_recent_cert_for_domains(self, certificates: List[Dict[str, Any]],
|
||||
domain1: str, domain2: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Find the most recent certificate that contains both domains.
|
||||
Used for determining the relevance timestamp for domain relationships.
|
||||
"""
|
||||
most_recent_cert = None
|
||||
most_recent_date = None
|
||||
|
||||
for cert in certificates:
|
||||
# Check if this certificate contains both domains
|
||||
cert_domains = self._extract_domains_from_certificate(cert)
|
||||
if domain1 in cert_domains and domain2 in cert_domains:
|
||||
not_before = cert.get('not_before')
|
||||
if not_before:
|
||||
try:
|
||||
cert_date = self._parse_certificate_date(not_before)
|
||||
if most_recent_date is None or cert_date > most_recent_date:
|
||||
most_recent_date = cert_date
|
||||
most_recent_cert = cert
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return most_recent_cert
|
||||
|
||||
# [Rest of the methods remain the same as in the original file]
|
||||
def _should_create_relationship(self, source_domain: str, target_domain: str) -> bool:
|
||||
"""
|
||||
@@ -664,25 +709,6 @@ class CrtShProvider(BaseProvider):
|
||||
|
||||
return [d for d in final_domains if _is_valid_domain(d)]
|
||||
|
||||
def _calculate_domain_relationship_confidence(self, domain1: str, domain2: str,
|
||||
shared_certificates: List[Dict[str, Any]],
|
||||
all_discovered_domains: Set[str]) -> float:
|
||||
"""Calculate confidence score for domain relationship based on various factors."""
|
||||
base_confidence = 0.9
|
||||
|
||||
relationship_context = self._determine_relationship_context(domain2, domain1)
|
||||
|
||||
if relationship_context == 'exact_match':
|
||||
context_bonus = 0.0
|
||||
elif relationship_context == 'subdomain':
|
||||
context_bonus = 0.1
|
||||
elif relationship_context == 'parent_domain':
|
||||
context_bonus = 0.05
|
||||
else:
|
||||
context_bonus = 0.0
|
||||
|
||||
final_confidence = base_confidence + context_bonus
|
||||
return max(0.1, min(1.0, final_confidence))
|
||||
|
||||
def _determine_relationship_context(self, cert_domain: str, query_domain: str) -> str:
|
||||
"""Determine the context of the relationship between certificate domain and query domain."""
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
from dns import resolver, reversename
|
||||
from typing import Dict
|
||||
from datetime import datetime, timezone
|
||||
from .base_provider import BaseProvider
|
||||
from core.provider_result import ProviderResult
|
||||
from utils.helpers import _is_valid_ip, _is_valid_domain, get_ip_version
|
||||
@@ -11,6 +12,7 @@ class DNSProvider(BaseProvider):
|
||||
"""
|
||||
Provider for standard DNS resolution and reverse DNS lookups.
|
||||
Now returns standardized ProviderResult objects with IPv4 and IPv6 support.
|
||||
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
|
||||
"""
|
||||
|
||||
def __init__(self, name=None, session_config=None):
|
||||
@@ -51,6 +53,7 @@ class DNSProvider(BaseProvider):
|
||||
"""
|
||||
Query DNS records for the domain to discover relationships and attributes.
|
||||
FIXED: Now creates separate attributes for each DNS record type.
|
||||
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
|
||||
|
||||
Args:
|
||||
domain: Domain to investigate
|
||||
@@ -62,11 +65,12 @@ class DNSProvider(BaseProvider):
|
||||
return ProviderResult()
|
||||
|
||||
result = ProviderResult()
|
||||
discovery_time = datetime.now(timezone.utc)
|
||||
|
||||
# Query all record types - each gets its own attribute
|
||||
for record_type in ['A', 'AAAA', 'CNAME', 'MX', 'NS', 'SOA', 'TXT', 'SRV', 'CAA']:
|
||||
try:
|
||||
self._query_record(domain, record_type, result)
|
||||
self._query_record(domain, record_type, result, discovery_time)
|
||||
#except resolver.NoAnswer:
|
||||
# This is not an error, just a confirmation that the record doesn't exist.
|
||||
#self.logger.logger.debug(f"No {record_type} record found for {domain}")
|
||||
@@ -79,6 +83,7 @@ class DNSProvider(BaseProvider):
|
||||
def query_ip(self, ip: str) -> ProviderResult:
|
||||
"""
|
||||
Query reverse DNS for the IP address (supports both IPv4 and IPv6).
|
||||
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
|
||||
|
||||
Args:
|
||||
ip: IP address to investigate (IPv4 or IPv6)
|
||||
@@ -91,6 +96,7 @@ class DNSProvider(BaseProvider):
|
||||
|
||||
result = ProviderResult()
|
||||
ip_version = get_ip_version(ip)
|
||||
discovery_time = datetime.now(timezone.utc)
|
||||
|
||||
try:
|
||||
# Perform reverse DNS lookup (works for both IPv4 and IPv6)
|
||||
@@ -112,20 +118,24 @@ class DNSProvider(BaseProvider):
|
||||
relationship_type = 'dns_a_record'
|
||||
record_prefix = 'A'
|
||||
|
||||
# Enhanced raw_data with discovery timestamp for time-based edge coloring
|
||||
raw_data = {
|
||||
'query_type': 'PTR',
|
||||
'ip_address': ip,
|
||||
'ip_version': ip_version,
|
||||
'hostname': hostname,
|
||||
'ttl': response.ttl,
|
||||
'discovery_timestamp': discovery_time.isoformat(),
|
||||
'relevance_timestamp': discovery_time.isoformat() # DNS data is "fresh" when discovered
|
||||
}
|
||||
|
||||
# Add the relationship
|
||||
result.add_relationship(
|
||||
source_node=ip,
|
||||
target_node=hostname,
|
||||
relationship_type='dns_ptr_record',
|
||||
provider=self.name,
|
||||
confidence=0.8,
|
||||
raw_data={
|
||||
'query_type': 'PTR',
|
||||
'ip_address': ip,
|
||||
'ip_version': ip_version,
|
||||
'hostname': hostname,
|
||||
'ttl': response.ttl
|
||||
}
|
||||
raw_data=raw_data
|
||||
)
|
||||
|
||||
# Add to PTR records list
|
||||
@@ -136,14 +146,7 @@ class DNSProvider(BaseProvider):
|
||||
source_node=ip,
|
||||
target_node=hostname,
|
||||
relationship_type='dns_ptr_record',
|
||||
confidence_score=0.8,
|
||||
raw_data={
|
||||
'query_type': 'PTR',
|
||||
'ip_address': ip,
|
||||
'ip_version': ip_version,
|
||||
'hostname': hostname,
|
||||
'ttl': response.ttl
|
||||
},
|
||||
raw_data=raw_data,
|
||||
discovery_method=f"reverse_dns_lookup_ipv{ip_version}"
|
||||
)
|
||||
|
||||
@@ -155,7 +158,6 @@ class DNSProvider(BaseProvider):
|
||||
value=ptr_records,
|
||||
attr_type='dns_record',
|
||||
provider=self.name,
|
||||
confidence=0.8,
|
||||
metadata={'ttl': response.ttl, 'ip_version': ip_version}
|
||||
)
|
||||
|
||||
@@ -170,10 +172,11 @@ class DNSProvider(BaseProvider):
|
||||
|
||||
return result
|
||||
|
||||
def _query_record(self, domain: str, record_type: str, result: ProviderResult) -> None:
|
||||
def _query_record(self, domain: str, record_type: str, result: ProviderResult, discovery_time: datetime) -> None:
|
||||
"""
|
||||
FIXED: Query DNS records with unique attribute names for each record type.
|
||||
Enhanced to better handle IPv6 AAAA records.
|
||||
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
|
||||
"""
|
||||
try:
|
||||
self.total_requests += 1
|
||||
@@ -217,18 +220,20 @@ class DNSProvider(BaseProvider):
|
||||
if record_type in ['A', 'AAAA'] and _is_valid_ip(target):
|
||||
ip_version = get_ip_version(target)
|
||||
|
||||
# Enhanced raw_data with discovery timestamp for time-based edge coloring
|
||||
raw_data = {
|
||||
'query_type': record_type,
|
||||
'domain': domain,
|
||||
'value': target,
|
||||
'ttl': response.ttl
|
||||
'ttl': response.ttl,
|
||||
'discovery_timestamp': discovery_time.isoformat(),
|
||||
'relevance_timestamp': discovery_time.isoformat() # DNS data is "fresh" when discovered
|
||||
}
|
||||
|
||||
if ip_version:
|
||||
raw_data['ip_version'] = ip_version
|
||||
|
||||
relationship_type = f"dns_{record_type.lower()}_record"
|
||||
confidence = 0.8
|
||||
|
||||
# Add relationship
|
||||
result.add_relationship(
|
||||
@@ -236,7 +241,6 @@ class DNSProvider(BaseProvider):
|
||||
target_node=target,
|
||||
relationship_type=relationship_type,
|
||||
provider=self.name,
|
||||
confidence=confidence,
|
||||
raw_data=raw_data
|
||||
)
|
||||
|
||||
@@ -252,7 +256,6 @@ class DNSProvider(BaseProvider):
|
||||
source_node=domain,
|
||||
target_node=target,
|
||||
relationship_type=relationship_type,
|
||||
confidence_score=confidence,
|
||||
raw_data=raw_data,
|
||||
discovery_method=discovery_method
|
||||
)
|
||||
@@ -276,7 +279,6 @@ class DNSProvider(BaseProvider):
|
||||
value=dns_records,
|
||||
attr_type='dns_record_list',
|
||||
provider=self.name,
|
||||
confidence=0.8,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ class ShodanProvider(BaseProvider):
|
||||
"""
|
||||
Provider for querying Shodan API for IP address information.
|
||||
Now returns standardized ProviderResult objects with caching support for IPv4 and IPv6.
|
||||
UPDATED: Enhanced with last_seen timestamp for time-based edge coloring.
|
||||
"""
|
||||
|
||||
def __init__(self, name=None, session_config=None):
|
||||
@@ -145,6 +146,7 @@ class ShodanProvider(BaseProvider):
|
||||
"""
|
||||
Query Shodan for information about an IP address (IPv4 or IPv6), with caching of processed data.
|
||||
FIXED: Proper 404 handling to prevent unnecessary retries.
|
||||
UPDATED: Enhanced with last_seen timestamp extraction for time-based edge coloring.
|
||||
|
||||
Args:
|
||||
ip: IP address to investigate (IPv4 or IPv6)
|
||||
@@ -304,7 +306,6 @@ class ShodanProvider(BaseProvider):
|
||||
target_node=rel_data["target_node"],
|
||||
relationship_type=rel_data["relationship_type"],
|
||||
provider=rel_data["provider"],
|
||||
confidence=rel_data["confidence"],
|
||||
raw_data=rel_data.get("raw_data", {})
|
||||
)
|
||||
|
||||
@@ -316,7 +317,6 @@ class ShodanProvider(BaseProvider):
|
||||
value=attr_data["value"],
|
||||
attr_type=attr_data["type"],
|
||||
provider=attr_data["provider"],
|
||||
confidence=attr_data["confidence"],
|
||||
metadata=attr_data.get("metadata", {})
|
||||
)
|
||||
|
||||
@@ -336,7 +336,6 @@ class ShodanProvider(BaseProvider):
|
||||
"source_node": rel.source_node,
|
||||
"target_node": rel.target_node,
|
||||
"relationship_type": rel.relationship_type,
|
||||
"confidence": rel.confidence,
|
||||
"provider": rel.provider,
|
||||
"raw_data": rel.raw_data
|
||||
} for rel in result.relationships
|
||||
@@ -348,7 +347,6 @@ class ShodanProvider(BaseProvider):
|
||||
"value": attr.value,
|
||||
"type": attr.type,
|
||||
"provider": attr.provider,
|
||||
"confidence": attr.confidence,
|
||||
"metadata": attr.metadata
|
||||
} for attr in result.attributes
|
||||
]
|
||||
@@ -362,25 +360,40 @@ class ShodanProvider(BaseProvider):
|
||||
"""
|
||||
VERIFIED: Process Shodan data creating ISP nodes with ASN attributes and proper relationships.
|
||||
Enhanced to include IP version information for IPv6 addresses.
|
||||
UPDATED: Enhanced with last_seen timestamp for time-based edge coloring.
|
||||
"""
|
||||
result = ProviderResult()
|
||||
|
||||
# Determine IP version for metadata
|
||||
ip_version = get_ip_version(ip)
|
||||
|
||||
# Extract last_seen timestamp for time-based edge coloring
|
||||
last_seen = data.get('last_seen')
|
||||
|
||||
# VERIFIED: Extract ISP information and create proper ISP node with ASN
|
||||
isp_name = data.get('org')
|
||||
asn_value = data.get('asn')
|
||||
|
||||
if isp_name and asn_value:
|
||||
# Enhanced raw_data with last_seen timestamp
|
||||
raw_data = {
|
||||
'asn': asn_value,
|
||||
'shodan_org': isp_name,
|
||||
'ip_version': ip_version
|
||||
}
|
||||
|
||||
# Add last_seen timestamp if available
|
||||
if last_seen:
|
||||
raw_data['last_seen'] = last_seen
|
||||
raw_data['relevance_timestamp'] = last_seen # Standardized field for time-based coloring
|
||||
|
||||
# Create relationship from IP to ISP
|
||||
result.add_relationship(
|
||||
source_node=ip,
|
||||
target_node=isp_name,
|
||||
relationship_type='shodan_isp',
|
||||
provider=self.name,
|
||||
confidence=0.9,
|
||||
raw_data={'asn': asn_value, 'shodan_org': isp_name, 'ip_version': ip_version}
|
||||
raw_data=raw_data
|
||||
)
|
||||
|
||||
# Add ASN as attribute to the ISP node
|
||||
@@ -390,7 +403,6 @@ class ShodanProvider(BaseProvider):
|
||||
value=asn_value,
|
||||
attr_type='isp_info',
|
||||
provider=self.name,
|
||||
confidence=0.9,
|
||||
metadata={'description': 'Autonomous System Number from Shodan', 'ip_version': ip_version}
|
||||
)
|
||||
|
||||
@@ -401,7 +413,6 @@ class ShodanProvider(BaseProvider):
|
||||
value=isp_name,
|
||||
attr_type='isp_info',
|
||||
provider=self.name,
|
||||
confidence=0.9,
|
||||
metadata={'description': 'Organization name from Shodan', 'ip_version': ip_version}
|
||||
)
|
||||
|
||||
@@ -416,20 +427,24 @@ class ShodanProvider(BaseProvider):
|
||||
else:
|
||||
relationship_type = 'shodan_a_record'
|
||||
|
||||
# Enhanced raw_data with last_seen timestamp
|
||||
hostname_raw_data = {**data, 'ip_version': ip_version}
|
||||
if last_seen:
|
||||
hostname_raw_data['last_seen'] = last_seen
|
||||
hostname_raw_data['relevance_timestamp'] = last_seen
|
||||
|
||||
result.add_relationship(
|
||||
source_node=ip,
|
||||
target_node=hostname,
|
||||
relationship_type=relationship_type,
|
||||
provider=self.name,
|
||||
confidence=0.8,
|
||||
raw_data={**data, 'ip_version': ip_version}
|
||||
raw_data=hostname_raw_data
|
||||
)
|
||||
self.log_relationship_discovery(
|
||||
source_node=ip,
|
||||
target_node=hostname,
|
||||
relationship_type=relationship_type,
|
||||
confidence_score=0.8,
|
||||
raw_data={**data, 'ip_version': ip_version},
|
||||
raw_data=hostname_raw_data,
|
||||
discovery_method=f"shodan_host_lookup_ipv{ip_version}"
|
||||
)
|
||||
elif key == 'ports':
|
||||
@@ -441,7 +456,6 @@ class ShodanProvider(BaseProvider):
|
||||
value=port,
|
||||
attr_type='shodan_network_info',
|
||||
provider=self.name,
|
||||
confidence=0.9,
|
||||
metadata={'ip_version': ip_version}
|
||||
)
|
||||
elif isinstance(value, (str, int, float, bool)) and value is not None:
|
||||
@@ -452,7 +466,6 @@ class ShodanProvider(BaseProvider):
|
||||
value=value,
|
||||
attr_type='shodan_info',
|
||||
provider=self.name,
|
||||
confidence=0.9,
|
||||
metadata={'ip_version': ip_version}
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user