unicode-decoder/decoder.py

71 lines
2.6 KiB
Python

#!/usr/bin/env python3
"""
Decoder for Paul Butler's emoji steganography method
Usage: python3 decoder.py "🥜[encoded-text]"
"""
import sys
def variation_selector_to_byte(char):
"""Convert a variation selector character back to its byte value"""
code = ord(char)
# Original variation selectors (VS1-VS16): U+FE00 to U+FE0F
if 0xFE00 <= code <= 0xFE0F:
return code - 0xFE00
# Supplementary variation selectors (VS17-VS256): U+E0100 to U+E01EF
elif 0xE0100 <= code <= 0xE01EF:
return code - 0xE0100 + 16
else:
return None
def decode_emoji_text(encoded_text):
"""Decode hidden text from emoji with variation selectors"""
bytes_data = []
found_variation_selector = False
for char in encoded_text:
byte_val = variation_selector_to_byte(char)
if byte_val is not None:
bytes_data.append(byte_val)
found_variation_selector = True
elif found_variation_selector:
# Stop when we hit a non-variation selector after finding some
break
try:
return bytes(bytes_data).decode('utf-8')
except UnicodeDecodeError:
return f"Raw bytes: {bytes_data}"
if __name__ == "__main__":
if len(sys.argv) < 2 or len(sys.argv) > 3:
print("Usage: python3 decoder.py '<encoded-emoji-text>' [output-file]")
print(" If no output file specified, uses 'decoded_output.txt'")
sys.exit(1)
encoded = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) == 3 else "decoded_output.txt"
decoded = decode_emoji_text(encoded)
# Write results to file
with open(output_file, 'w', encoding='utf-8') as f:
f.write("=== EMOJI STEGANOGRAPHY DECODER RESULTS ===\n\n")
f.write(f"Encoded text: {encoded}\n\n")
f.write(f"Decoded result:\n{decoded}\n\n")
# Unicode breakdown
f.write("=== CHARACTER ANALYSIS ===\n")
for i, char in enumerate(encoded):
code = ord(char)
vs_byte = variation_selector_to_byte(char)
if vs_byte is not None:
printable_char = chr(vs_byte) if 32 <= vs_byte <= 126 else f'\\x{vs_byte:02x}'
f.write(f" {i:3d}: U+{code:04X} -> byte {vs_byte:3d} ('{printable_char}')\n")
else:
f.write(f" {i:3d}: U+{code:04X} '{char}' (base character)\n")
print(f"✓ Decoding complete! Results saved to: {output_file}")
print(f"✓ Decoded {len([c for c in encoded if variation_selector_to_byte(c) is not None])} hidden bytes")
print(f"✓ Preview: {decoded[:100]}{'...' if len(decoded) > 100 else ''}")