302 lines
9.0 KiB
Bash
Executable File
302 lines
9.0 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Forensic Website Collection Script
|
|
# This script creates a forensically sound copy of a website
|
|
# with all resources, proper documentation, and integrity verification
|
|
|
|
# Set consistent locale for date processing and UTC timezone
|
|
export LC_ALL=C
|
|
export TZ=UTC
|
|
|
|
# Color codes for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Function to print colored messages
|
|
print_message() {
|
|
local color=$1
|
|
local message=$2
|
|
echo -e "${color}${message}${NC}"
|
|
}
|
|
|
|
# Function to check if required tools are installed
|
|
check_dependencies() {
|
|
local deps=("wget" "tar" "sha256sum" "date")
|
|
local missing=()
|
|
|
|
for dep in "${deps[@]}"; do
|
|
if ! command -v "$dep" &> /dev/null; then
|
|
missing+=("$dep")
|
|
fi
|
|
done
|
|
|
|
if [ ${#missing[@]} -gt 0 ]; then
|
|
print_message "$RED" "Error: Missing dependencies: ${missing[*]}"
|
|
print_message "$YELLOW" "Please install the missing tools and try again."
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Function to validate URL
|
|
validate_url() {
|
|
local url=$1
|
|
if [[ ! $url =~ ^https?:// ]]; then
|
|
print_message "$RED" "Error: URL must start with http:// or https://"
|
|
return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
# Function to create working directory
|
|
create_working_directory() {
|
|
local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC)
|
|
local dir_name="forensic_collection_${timestamp}"
|
|
|
|
mkdir -p "$dir_name"
|
|
cd "$dir_name" || exit 1
|
|
|
|
print_message "$GREEN" "Created working directory: $dir_name"
|
|
echo "$PWD"
|
|
}
|
|
|
|
# Function to create metadata file
|
|
create_metadata() {
|
|
local url=$1
|
|
local collector_name=$2
|
|
local collector_email=$3
|
|
local case_number=$4
|
|
local timestamp=$(date -u '+%Y-%m-%d %H:%M:%S UTC')
|
|
|
|
cat > collection_metadata.txt << EOF
|
|
=== FORENSIC WEB COLLECTION METADATA ===
|
|
Collection Date/Time: $timestamp
|
|
Source URL: $url
|
|
Collector Name: $collector_name
|
|
Collector Email: $collector_email
|
|
Case/Reference Number: ${case_number:-N/A}
|
|
Collection Method: wget with WARC output
|
|
Tool Versions:
|
|
- wget: $(wget --version | head -n1)
|
|
- bash: $BASH_VERSION
|
|
- OS: $(uname -a)
|
|
|
|
Collection Parameters:
|
|
- Recursive: Yes
|
|
- Page requisites: Yes (CSS, JS, images)
|
|
- Maximum file size per WARC: 1GB
|
|
- Rate limit: 200kb/s
|
|
- Random delays: 1-2 seconds
|
|
- User agent: Modified browser string
|
|
- SSL verification: Disabled (for self-signed certificates)
|
|
|
|
Documentation:
|
|
- WARC files: Contains full HTTP transactions
|
|
- Mirror directory: Local file system copy
|
|
- Checksums: SHA-256 for all collected files
|
|
- This metadata file: collection_metadata.txt
|
|
EOF
|
|
|
|
print_message "$GREEN" "Created metadata file"
|
|
}
|
|
|
|
# Function to perform the actual wget collection
|
|
perform_collection() {
|
|
local url=$1
|
|
local domain=$(echo "$url" | sed -E 's|^https?://||' | sed -E 's|/.*$||' | sed -E 's|:.*$||')
|
|
local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC)
|
|
local warc_file="${domain}_${timestamp}"
|
|
|
|
print_message "$YELLOW" "Starting collection of: $url"
|
|
print_message "$YELLOW" "Domain: $domain"
|
|
print_message "$YELLOW" "WARC file: ${warc_file}.warc"
|
|
|
|
# Extract collector info for WARC headers
|
|
local collector_info=$(grep "Collector Name:\|Collector Email:" collection_metadata.txt | tr '\n' ' ')
|
|
|
|
wget \
|
|
--mirror \
|
|
--convert-links \
|
|
--adjust-extension \
|
|
--page-requisites \
|
|
--no-parent \
|
|
--recursive \
|
|
--level=0 \
|
|
--no-clobber \
|
|
--continue \
|
|
--timestamping \
|
|
--no-check-certificate \
|
|
--user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" \
|
|
--wait=1 \
|
|
--random-wait \
|
|
--limit-rate=200k \
|
|
--header="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" \
|
|
--header="Accept-Language: en-US,en;q=0.5" \
|
|
--warc-file="$warc_file" \
|
|
--warc-header="operator: $collector_info" \
|
|
--warc-header="description: Forensic collection of $url" \
|
|
--warc-max-size=1G \
|
|
"$url"
|
|
|
|
if [ $? -eq 0 ]; then
|
|
print_message "$GREEN" "Website collection completed successfully"
|
|
echo "$warc_file.warc"
|
|
else
|
|
print_message "$RED" "Website collection failed"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Function to generate checksums
|
|
generate_checksums() {
|
|
print_message "$YELLOW" "Generating SHA-256 checksums for all collected files..."
|
|
|
|
# Find all files and generate checksums
|
|
find . -type f -exec sha256sum {} \; > all_files_checksums.sha256
|
|
|
|
# Generate checksum for the WARC file specifically
|
|
if ls *.warc &> /dev/null; then
|
|
sha256sum *.warc > warc_checksum.sha256
|
|
fi
|
|
|
|
# Generate checksum for the metadata file
|
|
sha256sum collection_metadata.txt > metadata_checksum.sha256
|
|
|
|
print_message "$GREEN" "Checksums generated successfully"
|
|
}
|
|
|
|
# Function to create final package
|
|
create_final_package() {
|
|
local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC)
|
|
local package_name="forensic_web_evidence_${timestamp}.tar.gz"
|
|
|
|
print_message "$YELLOW" "Creating final evidence package..."
|
|
|
|
# Create the tar.gz package
|
|
tar czf "../$package_name" ./*
|
|
|
|
if [ $? -eq 0 ]; then
|
|
# Generate checksum for the final package
|
|
cd ..
|
|
sha256sum "$package_name" > "${package_name}.sha256"
|
|
|
|
print_message "$GREEN" "Evidence package created: $package_name"
|
|
print_message "$GREEN" "Package checksum: ${package_name}.sha256"
|
|
|
|
# Display package contents
|
|
print_message "$YELLOW" "\nPackage contents:"
|
|
tar tzf "$package_name" | head -20
|
|
if [ $(tar tzf "$package_name" | wc -l) -gt 20 ]; then
|
|
echo "... (truncated, showing first 20 items)"
|
|
fi
|
|
|
|
# Display final checksums
|
|
print_message "$YELLOW" "\nFinal integrity checksums:"
|
|
cat "${package_name}.sha256"
|
|
else
|
|
print_message "$RED" "Failed to create evidence package"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Function to create collection report
|
|
create_collection_report() {
|
|
local url=$1
|
|
local start_time=$2
|
|
local end_time=$(date -u '+%Y-%m-%d %H:%M:%S UTC')
|
|
# Calculate duration more safely, using Unix timestamps
|
|
local start_timestamp=$3
|
|
local end_timestamp=$(date +%s)
|
|
local duration=$((end_timestamp - start_timestamp))
|
|
|
|
cat > ../collection_report.txt << EOF
|
|
=== FORENSIC WEB COLLECTION REPORT ===
|
|
|
|
Collection Summary:
|
|
-------------------
|
|
Target URL: $url
|
|
Start Time: $start_time
|
|
End Time: $end_time
|
|
Duration: $duration seconds
|
|
|
|
Results:
|
|
--------
|
|
WARC Files: $(ls *.warc 2>/dev/null | wc -l)
|
|
Total Files Collected: $(find . -type f | wc -l)
|
|
Total Size: $(du -sh . | cut -f1)
|
|
|
|
Package Information:
|
|
--------------------
|
|
Final Package: $(ls ../forensic_web_evidence_*.tar.gz 2>/dev/null)
|
|
Package Size: $(du -sh ../forensic_web_evidence_*.tar.gz 2>/dev/null | cut -f1)
|
|
|
|
Verification:
|
|
-------------
|
|
All files have been hashed with SHA-256
|
|
Final package integrity verified
|
|
Chain of custody maintained
|
|
|
|
Collection performed by: $(grep "Collector Name:" collection_metadata.txt | cut -d: -f2-)
|
|
Case Reference: $(grep "Case/Reference Number:" collection_metadata.txt | cut -d: -f2-)
|
|
|
|
EOF
|
|
|
|
print_message "$GREEN" "Collection report created"
|
|
}
|
|
|
|
# Main script execution
|
|
main() {
|
|
clear
|
|
print_message "$GREEN" "=== FORENSIC WEBSITE COLLECTION SCRIPT ==="
|
|
print_message "$GREEN" "========================================\n"
|
|
|
|
# Check dependencies
|
|
check_dependencies
|
|
|
|
# Get user input
|
|
read -p "Enter the website URL to collect: " URL
|
|
validate_url "$URL" || exit 1
|
|
|
|
read -p "Enter your name: " COLLECTOR_NAME
|
|
read -p "Enter your email: " COLLECTOR_EMAIL
|
|
read -p "Enter case/reference number (optional): " CASE_NUMBER
|
|
|
|
# Record start time (both human-readable and timestamp)
|
|
START_TIME=$(date -u '+%Y-%m-%d %H:%M:%S UTC')
|
|
START_TIMESTAMP=$(date +%s)
|
|
|
|
# Create working directory
|
|
WORK_DIR=$(create_working_directory)
|
|
|
|
# Create metadata
|
|
create_metadata "$URL" "$COLLECTOR_NAME" "$COLLECTOR_EMAIL" "$CASE_NUMBER"
|
|
|
|
# Perform the collection
|
|
if ! perform_collection "$URL"; then
|
|
print_message "$RED" "Collection failed. Exiting."
|
|
exit 1
|
|
fi
|
|
|
|
# Generate checksums
|
|
generate_checksums
|
|
|
|
# Create collection report
|
|
create_collection_report "$URL" "$START_TIME" "$START_TIMESTAMP"
|
|
|
|
# Create final package
|
|
create_final_package
|
|
|
|
# Final summary
|
|
print_message "$GREEN" "\n=== COLLECTION COMPLETE ==="
|
|
print_message "$GREEN" "Working directory: $WORK_DIR"
|
|
print_message "$GREEN" "Final package and checksums are one directory above"
|
|
print_message "$YELLOW" "\nNext steps:"
|
|
print_message "$YELLOW" "1. Verify the package checksum"
|
|
print_message "$YELLOW" "2. Store the package in secure evidence storage"
|
|
print_message "$YELLOW" "3. Document the storage location in your case management system"
|
|
print_message "$YELLOW" "4. Consider creating a backup copy"
|
|
}
|
|
|
|
# Run the main function
|
|
main |