From 06b76809825dc62f9f6e76d87ed5848ea27bf9f6 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Dec 2025 20:45:22 +0000 Subject: [PATCH] Clarify timestamp format used in hash calculations Added comprehensive documentation to make it crystal clear that: - Timestamps are Unix epoch timestamps (seconds since 1970-01-01 00:00:00 UTC) stored as floats - Hash input format is "{timestamp}:{content}" with float-to-string conversion - Example: "1702345678.123456:Suspicious process detected" - Full float precision is preserved, ensuring forensic tamper-evidence Updated documentation in: - trace/models/__init__.py: Added field comments and detailed docstring for calculate_hash() - trace/crypto.py: Added comprehensive docstring for hash_content() with examples - CLAUDE.md: Added detailed explanation in Integrity System section --- CLAUDE.md | 8 ++++++++ trace/crypto.py | 20 ++++++++++++++++++++ trace/models/__init__.py | 14 +++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index 939496a..953ca13 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -145,6 +145,14 @@ The codebase is organized into focused, single-responsibility modules to make it **Integrity System**: Every note automatically gets: 1. SHA256 hash of `timestamp:content` (via `Note.calculate_hash()`) + - **Timestamp Format**: Unix epoch timestamp as float (seconds since 1970-01-01 00:00:00 UTC) + - **Hash Input Format**: `"{timestamp}:{content}"` where timestamp is converted to string using Python's default str() conversion + - **Example**: For content "Suspicious process detected" with timestamp 1702345678.123456, the hash input is: + ``` + 1702345678.123456:Suspicious process detected + ``` + - This ensures integrity of both WHAT was said (content) and WHEN it was said (timestamp) + - The exact float precision is preserved in the hash, making timestamps forensically tamper-evident 2. Optional GPG clearsign signature (if `pgp_enabled` in settings and GPG available) **Tag System**: Regex-based hashtag extraction (`#word`) diff --git a/trace/crypto.py b/trace/crypto.py index 18f723e..48deded 100644 --- a/trace/crypto.py +++ b/trace/crypto.py @@ -184,5 +184,25 @@ class Crypto: @staticmethod def hash_content(content: str, timestamp: float) -> str: + """Calculate SHA256 hash of timestamp:content. + + Hash input format: "{timestamp}:{content}" + - timestamp: Unix epoch timestamp as float (seconds since 1970-01-01 00:00:00 UTC) + Example: 1702345678.123456 + - The float is converted to string using Python's default str() conversion + - Colon (':') separator between timestamp and content + - Ensures integrity of both WHAT was said and WHEN it was said + + Args: + content: The note content to hash + timestamp: Unix epoch timestamp as float + + Returns: + SHA256 hash as hexadecimal string (64 characters) + + Example: + >>> hash_content("Suspicious process detected", 1702345678.123456) + Computes SHA256 of: "1702345678.123456:Suspicious process detected" + """ data = f"{timestamp}:{content}".encode('utf-8') return hashlib.sha256(data).hexdigest() diff --git a/trace/models/__init__.py b/trace/models/__init__.py index 3502401..51cba8e 100644 --- a/trace/models/__init__.py +++ b/trace/models/__init__.py @@ -12,6 +12,9 @@ from .extractors import TagExtractor, IOCExtractor @dataclass class Note: content: str + # Unix timestamp: seconds since 1970-01-01 00:00:00 UTC as float + # Example: 1702345678.123456 + # This exact float value (with full precision) is used in hash calculation timestamp: float = field(default_factory=time.time) note_id: str = field(default_factory=lambda: str(uuid.uuid4())) content_hash: str = "" @@ -28,7 +31,16 @@ class Note: self.iocs = IOCExtractor.extract_iocs(self.content) def calculate_hash(self): - # We hash the content + timestamp to ensure integrity of 'when' it was said + """Calculate SHA256 hash of timestamp:content. + + Hash input format: "{timestamp}:{content}" + - timestamp: Unix epoch timestamp as float (e.g., "1702345678.123456") + - The float is converted to string using Python's default str() conversion + - Colon separator between timestamp and content + - Ensures integrity of both WHAT was said and WHEN it was said + + Example hash input: "1702345678.123456:Suspicious process detected" + """ data = f"{self.timestamp}:{self.content}".encode('utf-8') self.content_hash = hashlib.sha256(data).hexdigest()