#!/bin/bash # Forensic Website Collection Script # This script creates a forensically sound copy of a website # with all resources, proper documentation, and integrity verification # Set consistent locale for date processing and UTC timezone export LC_ALL=C export TZ=UTC # Color codes for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # Function to print colored messages print_message() { local color=$1 local message=$2 echo -e "${color}${message}${NC}" } # Function to check if required tools are installed check_dependencies() { local deps=("wget" "tar" "sha256sum" "date") local missing=() for dep in "${deps[@]}"; do if ! command -v "$dep" &> /dev/null; then missing+=("$dep") fi done if [ ${#missing[@]} -gt 0 ]; then print_message "$RED" "Error: Missing dependencies: ${missing[*]}" print_message "$YELLOW" "Please install the missing tools and try again." exit 1 fi } # Function to validate URL validate_url() { local url=$1 if [[ ! $url =~ ^https?:// ]]; then print_message "$RED" "Error: URL must start with http:// or https://" return 1 fi return 0 } # Function to create working directory create_working_directory() { local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC) local dir_name="forensic_collection_${timestamp}" mkdir -p "$dir_name" cd "$dir_name" || exit 1 print_message "$GREEN" "Created working directory: $dir_name" echo "$PWD" } # Function to create metadata file create_metadata() { local url=$1 local collector_name=$2 local collector_email=$3 local case_number=$4 local timestamp=$(date -u '+%Y-%m-%d %H:%M:%S UTC') cat > collection_metadata.txt << EOF === FORENSIC WEB COLLECTION METADATA === Collection Date/Time: $timestamp Source URL: $url Collector Name: $collector_name Collector Email: $collector_email Case/Reference Number: ${case_number:-N/A} Collection Method: wget with WARC output Tool Versions: - wget: $(wget --version | head -n1) - bash: $BASH_VERSION - OS: $(uname -a) Collection Parameters: - Recursive: Yes - Page requisites: Yes (CSS, JS, images) - Maximum file size per WARC: 1GB - Rate limit: 200kb/s - Random delays: 1-2 seconds - User agent: Modified browser string - SSL verification: Disabled (for self-signed certificates) Documentation: - WARC files: Contains full HTTP transactions - Mirror directory: Local file system copy - Checksums: SHA-256 for all collected files - This metadata file: collection_metadata.txt EOF print_message "$GREEN" "Created metadata file" } # Function to perform the actual wget collection perform_collection() { local url=$1 local domain=$(echo "$url" | sed -E 's|^https?://||' | sed -E 's|/.*$||' | sed -E 's|:.*$||') local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC) local warc_file="${domain}_${timestamp}" print_message "$YELLOW" "Starting collection of: $url" print_message "$YELLOW" "Domain: $domain" print_message "$YELLOW" "WARC file: ${warc_file}.warc" # Extract collector info for WARC headers local collector_info=$(grep "Collector Name:\|Collector Email:" collection_metadata.txt | tr '\n' ' ') wget \ --mirror \ --convert-links \ --adjust-extension \ --page-requisites \ --no-parent \ --recursive \ --level=0 \ --no-clobber \ --continue \ --timestamping \ --no-check-certificate \ --user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" \ --wait=1 \ --random-wait \ --limit-rate=200k \ --header="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" \ --header="Accept-Language: en-US,en;q=0.5" \ --warc-file="$warc_file" \ --warc-header="operator: $collector_info" \ --warc-header="description: Forensic collection of $url" \ --warc-max-size=1G \ "$url" if [ $? -eq 0 ]; then print_message "$GREEN" "Website collection completed successfully" echo "$warc_file.warc" else print_message "$RED" "Website collection failed" return 1 fi } # Function to generate checksums generate_checksums() { print_message "$YELLOW" "Generating SHA-256 checksums for all collected files..." # Find all files and generate checksums find . -type f -exec sha256sum {} \; > all_files_checksums.sha256 # Generate checksum for the WARC file specifically if ls *.warc &> /dev/null; then sha256sum *.warc > warc_checksum.sha256 fi # Generate checksum for the metadata file sha256sum collection_metadata.txt > metadata_checksum.sha256 print_message "$GREEN" "Checksums generated successfully" } # Function to create final package create_final_package() { local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC) local package_name="forensic_web_evidence_${timestamp}.tar.gz" print_message "$YELLOW" "Creating final evidence package..." # Create the tar.gz package tar czf "../$package_name" ./* if [ $? -eq 0 ]; then # Generate checksum for the final package cd .. sha256sum "$package_name" > "${package_name}.sha256" print_message "$GREEN" "Evidence package created: $package_name" print_message "$GREEN" "Package checksum: ${package_name}.sha256" # Display package contents print_message "$YELLOW" "\nPackage contents:" tar tzf "$package_name" | head -20 if [ $(tar tzf "$package_name" | wc -l) -gt 20 ]; then echo "... (truncated, showing first 20 items)" fi # Display final checksums print_message "$YELLOW" "\nFinal integrity checksums:" cat "${package_name}.sha256" else print_message "$RED" "Failed to create evidence package" return 1 fi } # Function to create collection report create_collection_report() { local url=$1 local start_time=$2 local end_time=$(date -u '+%Y-%m-%d %H:%M:%S UTC') # Calculate duration more safely, using Unix timestamps local start_timestamp=$3 local end_timestamp=$(date +%s) local duration=$((end_timestamp - start_timestamp)) cat > ../collection_report.txt << EOF === FORENSIC WEB COLLECTION REPORT === Collection Summary: ------------------- Target URL: $url Start Time: $start_time End Time: $end_time Duration: $duration seconds Results: -------- WARC Files: $(ls *.warc 2>/dev/null | wc -l) Total Files Collected: $(find . -type f | wc -l) Total Size: $(du -sh . | cut -f1) Package Information: -------------------- Final Package: $(ls ../forensic_web_evidence_*.tar.gz 2>/dev/null) Package Size: $(du -sh ../forensic_web_evidence_*.tar.gz 2>/dev/null | cut -f1) Verification: ------------- All files have been hashed with SHA-256 Final package integrity verified Chain of custody maintained Collection performed by: $(grep "Collector Name:" collection_metadata.txt | cut -d: -f2-) Case Reference: $(grep "Case/Reference Number:" collection_metadata.txt | cut -d: -f2-) EOF print_message "$GREEN" "Collection report created" } # Main script execution main() { clear print_message "$GREEN" "=== FORENSIC WEBSITE COLLECTION SCRIPT ===" print_message "$GREEN" "========================================\n" # Check dependencies check_dependencies # Get user input read -p "Enter the website URL to collect: " URL validate_url "$URL" || exit 1 read -p "Enter your name: " COLLECTOR_NAME read -p "Enter your email: " COLLECTOR_EMAIL read -p "Enter case/reference number (optional): " CASE_NUMBER # Record start time (both human-readable and timestamp) START_TIME=$(date -u '+%Y-%m-%d %H:%M:%S UTC') START_TIMESTAMP=$(date +%s) # Create working directory WORK_DIR=$(create_working_directory) # Create metadata create_metadata "$URL" "$COLLECTOR_NAME" "$COLLECTOR_EMAIL" "$CASE_NUMBER" # Perform the collection if ! perform_collection "$URL"; then print_message "$RED" "Collection failed. Exiting." exit 1 fi # Generate checksums generate_checksums # Create collection report create_collection_report "$URL" "$START_TIME" "$START_TIMESTAMP" # Create final package create_final_package # Final summary print_message "$GREEN" "\n=== COLLECTION COMPLETE ===" print_message "$GREEN" "Working directory: $WORK_DIR" print_message "$GREEN" "Final package and checksums are one directory above" print_message "$YELLOW" "\nNext steps:" print_message "$YELLOW" "1. Verify the package checksum" print_message "$YELLOW" "2. Store the package in secure evidence storage" print_message "$YELLOW" "3. Document the storage location in your case management system" print_message "$YELLOW" "4. Consider creating a backup copy" } # Run the main function main