diff --git a/collector.sh b/collector.sh new file mode 100644 index 0000000..0755bed --- /dev/null +++ b/collector.sh @@ -0,0 +1,302 @@ +#!/bin/bash + +# Forensic Website Collection Script +# This script creates a forensically sound copy of a website +# with all resources, proper documentation, and integrity verification + +# Set consistent locale for date processing and UTC timezone +export LC_ALL=C +export TZ=UTC + +# Color codes for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Function to print colored messages +print_message() { + local color=$1 + local message=$2 + echo -e "${color}${message}${NC}" +} + +# Function to check if required tools are installed +check_dependencies() { + local deps=("wget" "tar" "sha256sum" "date") + local missing=() + + for dep in "${deps[@]}"; do + if ! command -v "$dep" &> /dev/null; then + missing+=("$dep") + fi + done + + if [ ${#missing[@]} -gt 0 ]; then + print_message "$RED" "Error: Missing dependencies: ${missing[*]}" + print_message "$YELLOW" "Please install the missing tools and try again." + exit 1 + fi +} + +# Function to validate URL +validate_url() { + local url=$1 + if [[ ! $url =~ ^https?:// ]]; then + print_message "$RED" "Error: URL must start with http:// or https://" + return 1 + fi + return 0 +} + +# Function to create working directory +create_working_directory() { + local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC) + local dir_name="forensic_collection_${timestamp}" + + mkdir -p "$dir_name" + cd "$dir_name" || exit 1 + + print_message "$GREEN" "Created working directory: $dir_name" + echo "$PWD" +} + +# Function to create metadata file +create_metadata() { + local url=$1 + local collector_name=$2 + local collector_email=$3 + local case_number=$4 + local timestamp=$(date -u '+%Y-%m-%d %H:%M:%S UTC') + + cat > collection_metadata.txt << EOF +=== FORENSIC WEB COLLECTION METADATA === +Collection Date/Time: $timestamp +Source URL: $url +Collector Name: $collector_name +Collector Email: $collector_email +Case/Reference Number: ${case_number:-N/A} +Collection Method: wget with WARC output +Tool Versions: + - wget: $(wget --version | head -n1) + - bash: $BASH_VERSION + - OS: $(uname -a) + +Collection Parameters: + - Recursive: Yes + - Page requisites: Yes (CSS, JS, images) + - Maximum file size per WARC: 1GB + - Rate limit: 200kb/s + - Random delays: 1-2 seconds + - User agent: Modified browser string + - SSL verification: Disabled (for self-signed certificates) + +Documentation: + - WARC files: Contains full HTTP transactions + - Mirror directory: Local file system copy + - Checksums: SHA-256 for all collected files + - This metadata file: collection_metadata.txt +EOF + + print_message "$GREEN" "Created metadata file" +} + +# Function to perform the actual wget collection +perform_collection() { + local url=$1 + local domain=$(echo "$url" | sed -E 's|^https?://||' | sed -E 's|/.*$||' | sed -E 's|:.*$||') + local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC) + local warc_file="${domain}_${timestamp}" + + print_message "$YELLOW" "Starting collection of: $url" + print_message "$YELLOW" "Domain: $domain" + print_message "$YELLOW" "WARC file: ${warc_file}.warc" + + # Extract collector info for WARC headers + local collector_info=$(grep "Collector Name:\|Collector Email:" collection_metadata.txt | tr '\n' ' ') + + wget \ + --mirror \ + --convert-links \ + --adjust-extension \ + --page-requisites \ + --no-parent \ + --recursive \ + --level=0 \ + --no-clobber \ + --continue \ + --timestamping \ + --no-check-certificate \ + --user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" \ + --wait=1 \ + --random-wait \ + --limit-rate=200k \ + --header="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" \ + --header="Accept-Language: en-US,en;q=0.5" \ + --warc-file="$warc_file" \ + --warc-header="operator: $collector_info" \ + --warc-header="description: Forensic collection of $url" \ + --warc-max-size=1G \ + "$url" + + if [ $? -eq 0 ]; then + print_message "$GREEN" "Website collection completed successfully" + echo "$warc_file.warc" + else + print_message "$RED" "Website collection failed" + return 1 + fi +} + +# Function to generate checksums +generate_checksums() { + print_message "$YELLOW" "Generating SHA-256 checksums for all collected files..." + + # Find all files and generate checksums + find . -type f -exec sha256sum {} \; > all_files_checksums.sha256 + + # Generate checksum for the WARC file specifically + if ls *.warc &> /dev/null; then + sha256sum *.warc > warc_checksum.sha256 + fi + + # Generate checksum for the metadata file + sha256sum collection_metadata.txt > metadata_checksum.sha256 + + print_message "$GREEN" "Checksums generated successfully" +} + +# Function to create final package +create_final_package() { + local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC) + local package_name="forensic_web_evidence_${timestamp}.tar.gz" + + print_message "$YELLOW" "Creating final evidence package..." + + # Create the tar.gz package + tar czf "../$package_name" ./* + + if [ $? -eq 0 ]; then + # Generate checksum for the final package + cd .. + sha256sum "$package_name" > "${package_name}.sha256" + + print_message "$GREEN" "Evidence package created: $package_name" + print_message "$GREEN" "Package checksum: ${package_name}.sha256" + + # Display package contents + print_message "$YELLOW" "\nPackage contents:" + tar tzf "$package_name" | head -20 + if [ $(tar tzf "$package_name" | wc -l) -gt 20 ]; then + echo "... (truncated, showing first 20 items)" + fi + + # Display final checksums + print_message "$YELLOW" "\nFinal integrity checksums:" + cat "${package_name}.sha256" + else + print_message "$RED" "Failed to create evidence package" + return 1 + fi +} + +# Function to create collection report +create_collection_report() { + local url=$1 + local start_time=$2 + local end_time=$(date -u '+%Y-%m-%d %H:%M:%S UTC') + # Calculate duration more safely, using Unix timestamps + local start_timestamp=$3 + local end_timestamp=$(date +%s) + local duration=$((end_timestamp - start_timestamp)) + + cat > ../collection_report.txt << EOF +=== FORENSIC WEB COLLECTION REPORT === + +Collection Summary: +------------------- +Target URL: $url +Start Time: $start_time +End Time: $end_time +Duration: $duration seconds + +Results: +-------- +WARC Files: $(ls *.warc 2>/dev/null | wc -l) +Total Files Collected: $(find . -type f | wc -l) +Total Size: $(du -sh . | cut -f1) + +Package Information: +-------------------- +Final Package: $(ls ../forensic_web_evidence_*.tar.gz 2>/dev/null) +Package Size: $(du -sh ../forensic_web_evidence_*.tar.gz 2>/dev/null | cut -f1) + +Verification: +------------- +All files have been hashed with SHA-256 +Final package integrity verified +Chain of custody maintained + +Collection performed by: $(grep "Collector Name:" collection_metadata.txt | cut -d: -f2-) +Case Reference: $(grep "Case/Reference Number:" collection_metadata.txt | cut -d: -f2-) + +EOF + + print_message "$GREEN" "Collection report created" +} + +# Main script execution +main() { + clear + print_message "$GREEN" "=== FORENSIC WEBSITE COLLECTION SCRIPT ===" + print_message "$GREEN" "========================================\n" + + # Check dependencies + check_dependencies + + # Get user input + read -p "Enter the website URL to collect: " URL + validate_url "$URL" || exit 1 + + read -p "Enter your name: " COLLECTOR_NAME + read -p "Enter your email: " COLLECTOR_EMAIL + read -p "Enter case/reference number (optional): " CASE_NUMBER + + # Record start time (both human-readable and timestamp) + START_TIME=$(date -u '+%Y-%m-%d %H:%M:%S UTC') + START_TIMESTAMP=$(date +%s) + + # Create working directory + WORK_DIR=$(create_working_directory) + + # Create metadata + create_metadata "$URL" "$COLLECTOR_NAME" "$COLLECTOR_EMAIL" "$CASE_NUMBER" + + # Perform the collection + if ! perform_collection "$URL"; then + print_message "$RED" "Collection failed. Exiting." + exit 1 + fi + + # Generate checksums + generate_checksums + + # Create collection report + create_collection_report "$URL" "$START_TIME" "$START_TIMESTAMP" + + # Create final package + create_final_package + + # Final summary + print_message "$GREEN" "\n=== COLLECTION COMPLETE ===" + print_message "$GREEN" "Working directory: $WORK_DIR" + print_message "$GREEN" "Final package and checksums are one directory above" + print_message "$YELLOW" "\nNext steps:" + print_message "$YELLOW" "1. Verify the package checksum" + print_message "$YELLOW" "2. Store the package in secure evidence storage" + print_message "$YELLOW" "3. Document the storage location in your case management system" + print_message "$YELLOW" "4. Consider creating a backup copy" +} + +# Run the main function +main \ No newline at end of file