From afa0454f7412c6bea24ba818fd57713477fa5be9 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Sat, 10 May 2025 21:07:26 +0200 Subject: [PATCH] commit --- collector.sh | 509 +++++++++++++++++++++++---------------------------- 1 file changed, 233 insertions(+), 276 deletions(-) diff --git a/collector.sh b/collector.sh index 0755bed..811c319 100755 --- a/collector.sh +++ b/collector.sh @@ -1,302 +1,259 @@ #!/bin/bash -# Forensic Website Collection Script -# This script creates a forensically sound copy of a website -# with all resources, proper documentation, and integrity verification +# Forensic Collector +# Version 0.1 +# Chain of custody and integrity verification -# Set consistent locale for date processing and UTC timezone -export LC_ALL=C -export TZ=UTC +#Copyright [yyyy] [name of copyright owner] + +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. -# Color codes for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color -# Function to print colored messages -print_message() { - local color=$1 - local message=$2 - echo -e "${color}${message}${NC}" -} +# header +echo -e "${GREEN}==========================================" +echo " FORENSIC COLLECTOR" +echo +echo +echo +echo "\"Experten, die keine KI verwenden, werden aussterben." +echo " Ja, und Experten wie meine Frau, die KI verwenden," +echo " werden die anderen Experten ersetzen.\"" +echo -e "${YELLOW} - Dirk Labudde, 29.10.2024${NC}" +echo -e "==========================================${NC}" +echo -# Function to check if required tools are installed -check_dependencies() { - local deps=("wget" "tar" "sha256sum" "date") - local missing=() - - for dep in "${deps[@]}"; do - if ! command -v "$dep" &> /dev/null; then - missing+=("$dep") - fi - done - - if [ ${#missing[@]} -gt 0 ]; then - print_message "$RED" "Error: Missing dependencies: ${missing[*]}" - print_message "$YELLOW" "Please install the missing tools and try again." - exit 1 - fi -} +START_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ) +HOSTNAME=$(hostname) +OS_INFO=$(uname -a) +WGET_VERSION=$(wget --version | head -n1) +USERNAME=$(whoami) -# Function to validate URL -validate_url() { - local url=$1 - if [[ ! $url =~ ^https?:// ]]; then - print_message "$RED" "Error: URL must start with http:// or https://" - return 1 - fi - return 0 -} +echo -e "${YELLOW}Öffentliche IP wird abgerufen...${NC}" +EXTERNAL_IP=$(curl -s https://api.ipify.org) +if [ -z "$EXTERNAL_IP" ]; then + EXTERNAL_IP="Fehler beim Abruf der öffentlichen IP-Adresse." + echo -e "${RED}WARNUNG: Die öffentliche IP-Adresse konnte nicht abgerufen werden.${NC}" + echo -e "&{RED}Prüfen Sie die Netzwerkverbindung.${NC}" +else + echo -e "${GREEN}Öffentliche IP: $EXTERNAL_IP${NC}" + echo -e "${RED}Hinweis:${NC}" + echo -e "${YELLOW}Diese IP-Adresse wird bei der Sicherung der Webseite im Webserver-${NC}" + echo -e "${YELLOW}oder Firewall-Log des Ziels auffallen.${NC}" + echo -e "${NC}Die Nutzung von VPN oder TOR ist zu empfehlen.${NC}" +fi -# Function to create working directory -create_working_directory() { - local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC) - local dir_name="forensic_collection_${timestamp}" - - mkdir -p "$dir_name" - cd "$dir_name" || exit 1 - - print_message "$GREEN" "Created working directory: $dir_name" - echo "$PWD" -} +echo -e "\n${YELLOW}Eingabe der Ziel-URI:${NC}" +read -p "URI: " TARGET_URL -# Function to create metadata file -create_metadata() { - local url=$1 - local collector_name=$2 - local collector_email=$3 - local case_number=$4 - local timestamp=$(date -u '+%Y-%m-%d %H:%M:%S UTC') - - cat > collection_metadata.txt << EOF -=== FORENSIC WEB COLLECTION METADATA === -Collection Date/Time: $timestamp -Source URL: $url -Collector Name: $collector_name -Collector Email: $collector_email -Case/Reference Number: ${case_number:-N/A} -Collection Method: wget with WARC output -Tool Versions: - - wget: $(wget --version | head -n1) - - bash: $BASH_VERSION - - OS: $(uname -a) +# Validation +if [[ ! $TARGET_URL =~ ^https?:// ]]; then + echo -e "${RED}FEHLER: Geben Sie eine valide URI ein, beginnend mit http:// oder https://${NC}" + exit 1 +fi -Collection Parameters: - - Recursive: Yes - - Page requisites: Yes (CSS, JS, images) - - Maximum file size per WARC: 1GB - - Rate limit: 200kb/s - - Random delays: 1-2 seconds - - User agent: Modified browser string - - SSL verification: Disabled (for self-signed certificates) +echo -e "\n${YELLOW}Angabe des Geschäftszeichens (optional):${NC}" +read -p "Geschäftszeichen: " CASE_NUMBER -Documentation: - - WARC files: Contains full HTTP transactions - - Mirror directory: Local file system copy - - Checksums: SHA-256 for all collected files - - This metadata file: collection_metadata.txt -EOF - - print_message "$GREEN" "Created metadata file" -} +echo -e "\n${YELLOW}MAximale Rekursion eingeben (Enter=default=unlimitiert):${NC}" +read -p "Rekursion (default: unlimitiert): " MAX_DEPTH -# Function to perform the actual wget collection -perform_collection() { - local url=$1 - local domain=$(echo "$url" | sed -E 's|^https?://||' | sed -E 's|/.*$||' | sed -E 's|:.*$||') - local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC) - local warc_file="${domain}_${timestamp}" - - print_message "$YELLOW" "Starting collection of: $url" - print_message "$YELLOW" "Domain: $domain" - print_message "$YELLOW" "WARC file: ${warc_file}.warc" - - # Extract collector info for WARC headers - local collector_info=$(grep "Collector Name:\|Collector Email:" collection_metadata.txt | tr '\n' ' ') - - wget \ - --mirror \ - --convert-links \ - --adjust-extension \ - --page-requisites \ - --no-parent \ - --recursive \ - --level=0 \ - --no-clobber \ - --continue \ - --timestamping \ - --no-check-certificate \ - --user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" \ - --wait=1 \ - --random-wait \ - --limit-rate=200k \ - --header="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" \ - --header="Accept-Language: en-US,en;q=0.5" \ - --warc-file="$warc_file" \ - --warc-header="operator: $collector_info" \ - --warc-header="description: Forensic collection of $url" \ - --warc-max-size=1G \ - "$url" - - if [ $? -eq 0 ]; then - print_message "$GREEN" "Website collection completed successfully" - echo "$warc_file.warc" - else - print_message "$RED" "Website collection failed" - return 1 - fi -} +if [ -z "$MAX_DEPTH" ]; then + MAX_DEPTH="unlimitiert" + DEPTH_PARAM="" +else + DEPTH_PARAM="--level=$MAX_DEPTH" +fi -# Function to generate checksums -generate_checksums() { - print_message "$YELLOW" "Generating SHA-256 checksums for all collected files..." - - # Find all files and generate checksums - find . -type f -exec sha256sum {} \; > all_files_checksums.sha256 - - # Generate checksum for the WARC file specifically - if ls *.warc &> /dev/null; then - sha256sum *.warc > warc_checksum.sha256 - fi - - # Generate checksum for the metadata file - sha256sum collection_metadata.txt > metadata_checksum.sha256 - - print_message "$GREEN" "Checksums generated successfully" -} +HOSTNAME_FROM_URL=$(echo "$TARGET_URL" | sed -E 's/^https?:\/\///' | sed 's/\/.*$//' | sed 's/:.*$//') -# Function to create final package -create_final_package() { - local timestamp=$(date -u +%Y%m%d_%H%M%S_UTC) - local package_name="forensic_web_evidence_${timestamp}.tar.gz" - - print_message "$YELLOW" "Creating final evidence package..." - - # Create the tar.gz package - tar czf "../$package_name" ./* - - if [ $? -eq 0 ]; then - # Generate checksum for the final package - cd .. - sha256sum "$package_name" > "${package_name}.sha256" - - print_message "$GREEN" "Evidence package created: $package_name" - print_message "$GREEN" "Package checksum: ${package_name}.sha256" - - # Display package contents - print_message "$YELLOW" "\nPackage contents:" - tar tzf "$package_name" | head -20 - if [ $(tar tzf "$package_name" | wc -l) -gt 20 ]; then - echo "... (truncated, showing first 20 items)" - fi - - # Display final checksums - print_message "$YELLOW" "\nFinal integrity checksums:" - cat "${package_name}.sha256" - else - print_message "$RED" "Failed to create evidence package" - return 1 - fi -} +DATE_STR=$(date +%Y%m%d_%H%M%S) +if [ -z "$CASE_NUMBER" ]; then + OUTPUT_DIR="${DATE_STR}_${HOSTNAME_FROM_URL}" +else + OUTPUT_DIR="${DATE_STR}_${CASE_NUMBER}_${HOSTNAME_FROM_URL}" +fi -# Function to create collection report -create_collection_report() { - local url=$1 - local start_time=$2 - local end_time=$(date -u '+%Y-%m-%d %H:%M:%S UTC') - # Calculate duration more safely, using Unix timestamps - local start_timestamp=$3 - local end_timestamp=$(date +%s) - local duration=$((end_timestamp - start_timestamp)) - - cat > ../collection_report.txt << EOF -=== FORENSIC WEB COLLECTION REPORT === +mkdir -p "$OUTPUT_DIR" +cd "$OUTPUT_DIR" || exit 1 -Collection Summary: +WEBSITE_DIR="website" +mkdir -p "$WEBSITE_DIR" + +# Log file for wget output +WGET_LOG="wget.log" + +# Initialize report +REPORT_FILE="forensic_report.txt" +cat > "$REPORT_FILE" << EOF +FORENSIC COLLECTOR +=================================== + +CASE INFORMATION +----------------- +CASE NUMBER: ${CASE_NUMBER:-N/A} +PRESERVATION TARGET: $TARGET_URL +PRESERVATION TIMESTAMP (UTC): $START_TIME +OUTPUT FOLDER: $OUTPUT_DIR + +HOST SYSTEM INFORMATION ------------------- -Target URL: $url -Start Time: $start_time -End Time: $end_time -Duration: $duration seconds +USERNAME: $USERNAME +HOSTNAME: $HOSTNAME +OPERATING SYSTEM: $OS_INFO +wget-version: $WGET_VERSION +PUBLIC IP ADDRESS: $EXTERNAL_IP -Results: --------- -WARC Files: $(ls *.warc 2>/dev/null | wc -l) -Total Files Collected: $(find . -type f | wc -l) -Total Size: $(du -sh . | cut -f1) - -Package Information: --------------------- -Final Package: $(ls ../forensic_web_evidence_*.tar.gz 2>/dev/null) -Package Size: $(du -sh ../forensic_web_evidence_*.tar.gz 2>/dev/null | cut -f1) - -Verification: -------------- -All files have been hashed with SHA-256 -Final package integrity verified -Chain of custody maintained - -Collection performed by: $(grep "Collector Name:" collection_metadata.txt | cut -d: -f2-) -Case Reference: $(grep "Case/Reference Number:" collection_metadata.txt | cut -d: -f2-) +PARAMETERS +----------------- +MAX RECURSION: $MAX_DEPTH +EXTRACTED TARGET DOMAIN: $HOSTNAME_FROM_URL EOF - - print_message "$GREEN" "Collection report created" -} -# Main script execution -main() { - clear - print_message "$GREEN" "=== FORENSIC WEBSITE COLLECTION SCRIPT ===" - print_message "$GREEN" "========================================\n" - - # Check dependencies - check_dependencies - - # Get user input - read -p "Enter the website URL to collect: " URL - validate_url "$URL" || exit 1 - - read -p "Enter your name: " COLLECTOR_NAME - read -p "Enter your email: " COLLECTOR_EMAIL - read -p "Enter case/reference number (optional): " CASE_NUMBER - - # Record start time (both human-readable and timestamp) - START_TIME=$(date -u '+%Y-%m-%d %H:%M:%S UTC') - START_TIMESTAMP=$(date +%s) - - # Create working directory - WORK_DIR=$(create_working_directory) - - # Create metadata - create_metadata "$URL" "$COLLECTOR_NAME" "$COLLECTOR_EMAIL" "$CASE_NUMBER" - - # Perform the collection - if ! perform_collection "$URL"; then - print_message "$RED" "Collection failed. Exiting." - exit 1 - fi - - # Generate checksums - generate_checksums - - # Create collection report - create_collection_report "$URL" "$START_TIME" "$START_TIMESTAMP" - - # Create final package - create_final_package - - # Final summary - print_message "$GREEN" "\n=== COLLECTION COMPLETE ===" - print_message "$GREEN" "Working directory: $WORK_DIR" - print_message "$GREEN" "Final package and checksums are one directory above" - print_message "$YELLOW" "\nNext steps:" - print_message "$YELLOW" "1. Verify the package checksum" - print_message "$YELLOW" "2. Store the package in secure evidence storage" - print_message "$YELLOW" "3. Document the storage location in your case management system" - print_message "$YELLOW" "4. Consider creating a backup copy" -} +WGET_CMD="wget --recursive --page-requisites --html-extension --convert-links \ + --restrict-file-names=windows --domains=$HOSTNAME_FROM_URL \ + --user-agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' \ + --wait=1 --random-wait --timeout=30 --tries=3 \ + --no-parent --timestamping --backup-converted \ + $DEPTH_PARAM --directory-prefix='$WEBSITE_DIR' '$TARGET_URL' 2>&1 | tee '$WGET_LOG'" -# Run the main function -main \ No newline at end of file +# wget-command documentation +echo "wget-method:" >> "$REPORT_FILE" +echo "----------------------------------------" >> "$REPORT_FILE" +echo "$WGET_CMD" >> "$REPORT_FILE" +echo "----------------------------------------" >> "$REPORT_FILE" +echo >> "$REPORT_FILE" + +echo -e "\n${YELLOW}Beginne Sicherungsmaßnahme...${NC}" +echo -e "${GREEN}Methode:${NC} $WGET_CMD" +echo + +# Execute wget (eval is used to properly handle the command with variables) +eval "$WGET_CMD" +WGET_EXIT_CODE=$? +if [ $WGET_EXIT_CODE -ne 0 ]; then + echo -e "${RED}WARNUNG: wget mit Fehlerstatus (exit code: $WGET_EXIT_CODE). Prüfen Sie $WGET_LOG für Details.${NC}" + echo "ERROR NOTICE: wget execution produced runtime errors. Exit code: $WGET_EXIT_CODE" >> "$REPORT_FILE" + echo "wget.log contains runtime logs." >> "$REPORT_FILE" +fi + +echo -e "\n${YELLOW}Kalkulation der Sicherungsstatistiken...${NC}" +TOTAL_FILES=$(find "$WEBSITE_DIR" -type f | wc -l) +TOTAL_SIZE=$(du -sh "$WEBSITE_DIR" | cut -f1) +FILE_TYPES=$(find "$WEBSITE_DIR" -type f -name "*.*" | sed 's/.*\.//' | sort | uniq -c | sort -rn) + +echo "Preservation Statistics:" >> "$REPORT_FILE" +echo "-------------------" >> "$REPORT_FILE" +echo "Total File Count: $TOTAL_FILES" >> "$REPORT_FILE" +echo "Total Filesize: $TOTAL_SIZE" >> "$REPORT_FILE" +echo "Filetype-Distribution:" >> "$REPORT_FILE" +echo "$FILE_TYPES" >> "$REPORT_FILE" +echo >> "$REPORT_FILE" + +if [ -s "$WGET_LOG" ]; then + echo "wget Error Log:" >> "$REPORT_FILE" + echo "----------------" >> "$REPORT_FILE" + cat "$WGET_LOG" >> "$REPORT_FILE" + echo >> "$REPORT_FILE" +fi + +# Generate hash list +echo -e "\n${YELLOW}Generiere Hashwerte...${NC}" +HASH_FILE="file_hashes.sha256" +echo "File Hash List (SHA-256)" > "$HASH_FILE" +echo "========================" >> "$HASH_FILE" +echo "Generated on: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$HASH_FILE" +echo >> "$HASH_FILE" + +find "$WEBSITE_DIR" -type f -print0 | while IFS= read -r -d '' file; do + sha256sum "$file" >> "$HASH_FILE" +done + +# Hash the report file itself +sha256sum "$REPORT_FILE" >> "$HASH_FILE" +sha256sum "$HASH_FILE" >> "$HASH_FILE" + +echo "Hash Verification:" >> "$REPORT_FILE" +echo "------------------" >> "$REPORT_FILE" +echo "Hash Algorithm: SHA-256" >> "$REPORT_FILE" +echo "Hash File: $HASH_FILE" >> "$HASH_FILE" +echo "Report File Hash: $(sha256sum "$REPORT_FILE" | cut -d' ' -f1)" >> "$REPORT_FILE" +echo >> "$REPORT_FILE" + +# Create final archive +echo -e "\n${YELLOW}Erstelle Archiv...${NC}" +ARCHIVE_NAME="../${OUTPUT_DIR}.tar.gz" +tar -czf "$ARCHIVE_NAME" . + +ARCHIVE_HASH=$(sha256sum "$ARCHIVE_NAME" | cut -d' ' -f1) + +END_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ) +echo "Preservation Completion:" >> "$REPORT_FILE" +echo "------------------------" >> "$REPORT_FILE" +echo "End Time (UTC): $END_TIME" >> "$REPORT_FILE" +echo "Archive Name: ${OUTPUT_DIR}.tar.gz" >> "$REPORT_FILE" +echo "Archive Hash (SHA-256): $ARCHIVE_HASH" >> "$REPORT_FILE" + +SUMMARY_FILE="../${OUTPUT_DIR}_summary.txt" +cat > "$SUMMARY_FILE" << EOF +FORENSISCHE SICHERUNG - ZUSAMMENFASSUNG +============================= + +Archiv: ${OUTPUT_DIR}.tar.gz +Archivhash (SHA-256): $ARCHIVE_HASH +Geschäftszeichen: ${CASE_NUMBER:-N/A} +Ziel-URL: $TARGET_URL +Beginn der Sicherung: $START_TIME +Ende der Sicherung: $END_TIME +Anzahl der Dateien: $TOTAL_FILES +Gesamtvolumen: $TOTAL_SIZE + +Das Archiv enthält: +1. Komplettsicherung im Ordner 'website/' +2. Dokumentation der technischen Sicherung (forensic_report.txt) +3. SHA-256-Hashwerte aller Dateien +4. wget-Log + +Zur Verifizierung der Integrität: sha256sum ${OUTPUT_DIR}.tar.gz +Hashwert: $ARCHIVE_HASH +EOF + +cd .. +rm -rf "$OUTPUT_DIR" + +# Display completion information +echo -e "\n${GREEN}==========================================" +echo " SICHERUNG ABGESCHLOSSEN" +echo -e "==========================================${NC}" +echo +echo -e "${GREEN}Archiv erstellt:${NC} ${OUTPUT_DIR}.tar.gz" +echo -e "${GREEN}Archivhash:${NC} $ARCHIVE_HASH" +echo -e "${GREEN}Zusammenfassung:${NC} ${OUTPUT_DIR}_summary.txt" +echo +echo -e "${YELLOW}Zugriff auf die Webseitensicherung:${NC}" +echo "1. Extraktion des Archivs: tar -xzf ${OUTPUT_DIR}.tar.gz" +echo "2. Navigieren Sie zu: ${OUTPUT_DIR}/website/" +echo "3. Öffnen Sie index.html in einem Browser" +echo +echo -e "${RED}ACHTUNG!${NC}" +echo -e "${RED}Das Öffnen der Offlinesicherung schließt nicht aus, dass Javascript aus dem Internet nachgeladen wird!${NC}" +echo -e "${RED}Es wird dringend empfohlen, dies nur in einem Airgapped-System zu versuchen.${NC}" +echo +echo -e "${YELLOW}Verifizierung der Integrität:${NC}" +echo "sha256sum ${OUTPUT_DIR}.tar.gz" +echo "Erwartungswert: $ARCHIVE_HASH" +echo \ No newline at end of file