remove-large-entity-temporarily #3

Merged
mstoeck3 merged 5 commits from remove-large-entity-temporarily into main 2025-09-19 12:29:27 +00:00
3 changed files with 105 additions and 11 deletions
Showing only changes of commit 7472e6f416 - Show all commits

View File

@ -3,7 +3,7 @@
import json
import re
from pathlib import Path
from typing import List, Dict, Any, Set
from typing import List, Dict, Any, Set, Optional
from urllib.parse import quote
from datetime import datetime, timezone
import requests
@ -285,6 +285,17 @@ class CrtShProvider(BaseProvider):
if self._stop_event and self._stop_event.is_set():
self.logger.logger.info(f"CrtSh processing cancelled before processing for domain: {query_domain}")
return result
incompleteness_warning = self._check_for_incomplete_data(query_domain, certificates)
if incompleteness_warning:
result.add_attribute(
target_node=query_domain,
name="crtsh_data_warning",
value=incompleteness_warning,
attr_type='metadata',
provider=self.name,
confidence=1.0
)
all_discovered_domains = set()
processed_issuers = set()
@ -577,4 +588,30 @@ class CrtShProvider(BaseProvider):
elif query_domain.endswith(f'.{cert_domain}'):
return 'parent_domain'
else:
return 'related_domain'
return 'related_domain'
def _check_for_incomplete_data(self, domain: str, certificates: List[Dict[str, Any]]) -> Optional[str]:
"""
Analyzes the certificate list to heuristically detect if the data from crt.sh is incomplete.
"""
cert_count = len(certificates)
# Heuristic 1: Check if the number of certs hits a known hard limit.
if cert_count >= 10000:
return f"Result likely truncated; received {cert_count} certificates, which may be the maximum limit."
# Heuristic 2: Check if all returned certificates are old.
if cert_count > 1000: # Only apply this for a reasonable number of certs
latest_expiry = None
for cert in certificates:
try:
not_after = self._parse_certificate_date(cert.get('not_after'))
if latest_expiry is None or not_after > latest_expiry:
latest_expiry = not_after
except (ValueError, TypeError):
continue
if latest_expiry and (datetime.now(timezone.utc) - latest_expiry).days > 365:
return f"Incomplete data suspected: The latest certificate expired more than a year ago ({latest_expiry.strftime('%Y-%m-%d')})."
return None

View File

@ -1565,19 +1565,42 @@ class GraphManager {
}
/**
* Unhide all hidden nodes, excluding those within a large entity.
* FIXED: Unhide all hidden nodes, excluding large entity members and disconnected nodes.
* This prevents orphaned large entity members from appearing as free-floating nodes.
*/
unhideAll() {
const allHiddenNodes = this.nodes.get({
filter: (node) => {
// Condition: Node is hidden AND it is NOT part of a large entity.
return node.hidden === true && !(node.metadata && node.metadata.large_entity_id);
// Skip nodes that are part of a large entity
if (node.metadata && node.metadata.large_entity_id) {
return false;
}
// Skip nodes that are not hidden
if (node.hidden !== true) {
return false;
}
// Skip nodes that have no edges (would appear disconnected)
const nodeId = node.id;
const hasIncomingEdges = this.edges.get().some(edge => edge.to === nodeId && !edge.hidden);
const hasOutgoingEdges = this.edges.get().some(edge => edge.from === nodeId && !edge.hidden);
if (!hasIncomingEdges && !hasOutgoingEdges) {
console.log(`Skipping disconnected node ${nodeId} from unhide`);
return false;
}
return true;
}
});
if (allHiddenNodes.length > 0) {
console.log(`Unhiding ${allHiddenNodes.length} nodes with valid connections`);
const updates = allHiddenNodes.map(node => ({ id: node.id, hidden: false }));
this.nodes.update(updates);
} else {
console.log('No eligible nodes to unhide');
}
}

View File

@ -1397,28 +1397,62 @@ class DNSReconApp {
}
/**
* UPDATED: Generate details for standard nodes with organized attribute grouping
* UPDATED: Generate details for standard nodes with organized attribute grouping and data warnings
*/
generateStandardNodeDetails(node) {
let html = '';
// Check for and display a crt.sh data warning if it exists
const crtshWarningAttr = this.findAttributeByName(node.attributes, 'crtsh_data_warning');
if (crtshWarningAttr) {
html += `
<div class="modal-section" style="border-left: 3px solid #ff9900; background: rgba(255, 153, 0, 0.05);">
<details open>
<summary style="color: #ff9900;">
<span> Data Integrity Warning</span>
</summary>
<div class="modal-section-content">
<p class="placeholder-subtext" style="color: #e0e0e0; font-size: 0.8rem; line-height: 1.5;">
${this.escapeHtml(crtshWarningAttr.value)}
<br><br>
This can occur for very large domains (e.g., google.com) where crt.sh may return a limited subset of all available certificates. As a result, the certificate status may not be fully representative.
</p>
</div>
</details>
</div>
`;
}
// Relationships sections
html += this.generateRelationshipsSection(node);
// UPDATED: Enhanced attributes section with intelligent grouping (no formatting)
if (node.attributes && Array.isArray(node.attributes) && node.attributes.length > 0) {
html += this.generateOrganizedAttributesSection(node.attributes, node.type);
}
// Description section
html += this.generateDescriptionSection(node);
// Metadata section (collapsed by default)
html += this.generateMetadataSection(node);
return html;
}
/**
* Helper method to find an attribute by name in the standardized attributes list
* @param {Array} attributes - List of StandardAttribute objects
* @param {string} name - Attribute name to find
* @returns {Object|null} The attribute object if found, null otherwise
*/
findAttributeByName(attributes, name) {
if (!Array.isArray(attributes)) {
return null;
}
return attributes.find(attr => attr.name === name) || null;
}
generateOrganizedAttributesSection(attributes, nodeType) {
if (!Array.isArray(attributes) || attributes.length === 0) {
return '';