Merge pull request #3241 from APriestman/3250_hashkeeper

Add support for importing hashkeeper hash sets and md5sum output text
2025-07-16 17:57:43 +00:00 · 2017-11-30 15:45:28 -05:00 · 2017-11-30 15:45:28 -05:00 · 1ffeff74b5
commit 1ffeff74b5
parent 9532c14de7 05244f6fa1
4 changed files with 150 additions and 26 deletions
--- a/Core/src/org/sleuthkit/autopsy/modules/hashdatabase/HashDbImportDatabaseDialog.java
+++ b/Core/src/org/sleuthkit/autopsy/modules/hashdatabase/HashDbImportDatabaseDialog.java
@ -85,24 +85,11 @@ final class HashDbImportDatabaseDialog extends javax.swing.JDialog {
    private void initFileChooser() {
        fileChooser.setDragEnabled(false);
        fileChooser.setFileSelectionMode(JFileChooser.FILES_ONLY);
        updateFileChooserFilter();
        fileChooser.setMultiSelectionEnabled(false);
    }
    @NbBundle.Messages({"HashDbImportDatabaseDialog.centralRepoExtFilter.text=Hash Database File (.kdb, .idx or .hash)"})
    private void updateFileChooserFilter() {
        fileChooser.resetChoosableFileFilters();
        if(centralRepoRadioButton.isSelected()){
            String[] EXTENSION = new String[]{"kdb", "idx", "hash", "Hash"}; //NON-NLS
            FileNameExtensionFilter filter = new FileNameExtensionFilter(
                    NbBundle.getMessage(this.getClass(), "HashDbImportDatabaseDialog.centralRepoExtFilter.text"), EXTENSION);
            fileChooser.setFileFilter(filter);  
        } else {
        String[] EXTENSION = new String[]{"txt", "kdb", "idx", "hash", "Hash", "hsh"}; //NON-NLS
        FileNameExtensionFilter filter = new FileNameExtensionFilter(
                NbBundle.getMessage(this.getClass(), "HashDbImportDatabaseDialog.fileNameExtFilter.text"), EXTENSION);
        fileChooser.setFileFilter(filter); 
-        }
+        fileChooser.setMultiSelectionEnabled(false);
    }
    private void display() {
@ -421,7 +408,6 @@ final class HashDbImportDatabaseDialog extends javax.swing.JDialog {
            hashDbFolder.mkdir();
        }
        fileChooser.setCurrentDirectory(hashDbFolder);
        updateFileChooserFilter();
        if (fileChooser.showOpenDialog(this) == JFileChooser.APPROVE_OPTION) {
            File databaseFile = fileChooser.getSelectedFile();
            try {
--- a/Core/src/org/sleuthkit/autopsy/modules/hashdatabase/HashkeeperHashSetParser.java
+++ b/Core/src/org/sleuthkit/autopsy/modules/hashdatabase/HashkeeperHashSetParser.java
@ -0,0 +1,133 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011 - 2017 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.modules.hashdatabase;
 import java.io.File;
 import java.io.InputStreamReader;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.util.logging.Level;
 import java.util.Iterator;
 import org.apache.commons.csv.CSVFormat;
 import org.apache.commons.csv.CSVParser;
 import org.apache.commons.csv.CSVRecord;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.datamodel.TskCoreException;
 /**
 * Parser for Hashkeeper hash sets (*.hsh)
 */
 public class HashkeeperHashSetParser implements HashSetParser {
    private String filename;
    private InputStreamReader inputStreamReader;
    private CSVParser csvParser;
    private final long expectedHashCount;  // Number of hashes we expect to read from the file
    private final Iterator<CSVRecord> recordIterator;
    private final int hashColumnIndex;     // The index of the hash column
    HashkeeperHashSetParser(String filename) throws TskCoreException {
        this.filename = filename;
        try {
            // Estimate the total number of hashes in the file
            File importFile = new File(filename);
            long fileSize = importFile.length();
            expectedHashCount = fileSize / 75 + 1; // As a rough estimate, assume 75 bytes per line. We add one to prevent this from being zero
            // Create the parser
            inputStreamReader = new InputStreamReader(new FileInputStream(filename)); //NON-NLS
            csvParser = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(inputStreamReader);
            if (!csvParser.getHeaderMap().keySet().contains("hash")) {
                close();
                throw new TskCoreException("Hashkeeper file format invalid - does not contain 'hash' column");
            }
            // For efficiency, store the index of the hash column
            hashColumnIndex = csvParser.getHeaderMap().get("hash");
            // Make an iterator to loop over the entries
            recordIterator = csvParser.getRecords().listIterator();
            // We're ready to use recordIterator to get each hash
        } catch (IOException ex) {
            close();
            throw new TskCoreException("Error reading " + filename, ex);
        }
    }
    /**
     * Get the next hash to import
     *
     * @return The hash as a string, or null if the end of file was reached
     * without error
     * @throws TskCoreException
     */
    @Override
    public String getNextHash() throws TskCoreException {
        if (recordIterator.hasNext()) {
            CSVRecord record = recordIterator.next();
            String hash = record.get(hashColumnIndex);
            if (hash.length() != 32) {
                throw new TskCoreException("Hash has incorrect length: " + hash);
            }
            return (hash);
        }
        return null;
    }
    /**
     * Check if there are more hashes to read
     *
     * @return true if we've read all expected hash values, false otherwise
     */
    @Override
    public boolean doneReading() {
        return (!recordIterator.hasNext());
    }
    /**
     * Get the expected number of hashes in the file. This number can be an
     * estimate.
     *
     * @return The expected hash count
     */
    @Override
    public long getExpectedHashCount() {
        return expectedHashCount;
    }
    /**
     * Closes the import file
     */
    @Override
    public final void close() {
        if (inputStreamReader != null) {
            try {
                inputStreamReader.close();
            } catch (IOException ex) {
                Logger.getLogger(HashkeeperHashSetParser.class.getName()).log(Level.SEVERE, "Error closing Hashkeeper hash set " + filename, ex);
            } finally {
                inputStreamReader = null;
            }
        }
    }
 }
--- a/Core/src/org/sleuthkit/autopsy/modules/hashdatabase/IdxHashSetParser.java
+++ b/Core/src/org/sleuthkit/autopsy/modules/hashdatabase/IdxHashSetParser.java
@ -28,7 +28,8 @@ import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.datamodel.TskCoreException;
 /**
- * Parser for idx files (*.idx)
+ * Parser for idx files and md5sum files (*.idx or *.txt) This parsers lines
 * that start with md5 hashes and ignores any others
 */
 class IdxHashSetParser implements HashSetParser {
@ -49,6 +50,7 @@ class IdxHashSetParser implements HashSetParser {
        File importFile = new File(filename);
        long fileSize = importFile.length();
        totalHashes = fileSize / 0x33 + 1; // IDX file lines are generally 0x33 bytes long. We add one to prevent this from being zero
                                           // MD5sum output lines should be close enough to that (0x20 byte hash + filename)
    }
    /**
@ -65,14 +67,15 @@ class IdxHashSetParser implements HashSetParser {
        try {
            while ((line = reader.readLine()) != null) {
-                String[] parts = line.split("\\|");
+                // idx files have a pipe after the hash, md5sum files should have a space
                String[] parts = line.split("\\|| ");
-                // Header lines start with a 41 character dummy hash, 1 character longer than a SHA-1 hash
+                String hashStr = parts[0].toLowerCase();
-                if (parts.length != 2 || parts[0].length() == 41) {
+                if (!hashStr.matches("^[0-9a-f]{32}$")) {
                    continue;
                }
-                return parts[0].toLowerCase();
+                return hashStr;
            }
        } catch (IOException ex) {
            throw new TskCoreException("Error reading file " + filename, ex);
--- a/Core/src/org/sleuthkit/autopsy/modules/hashdatabase/ImportCentralRepoDbProgressDialog.java
+++ b/Core/src/org/sleuthkit/autopsy/modules/hashdatabase/ImportCentralRepoDbProgressDialog.java
@ -213,12 +213,14 @@ class ImportCentralRepoDbProgressDialog extends javax.swing.JDialog implements P
            // Create the hash set parser
            HashSetParser hashSetParser;
-            if (importFileName.toLowerCase().endsWith(".idx")) {
+            if (importFileName.toLowerCase().endsWith(".idx") || importFileName.toLowerCase().endsWith(".txt")) {
                hashSetParser = new IdxHashSetParser(importFileName);
-            } else if(importFileName.toLowerCase().endsWith(".hash")){
+            } else if (importFileName.toLowerCase().endsWith(".hash")) {
                hashSetParser = new EncaseHashSetParser(importFileName);
-            } else if(importFileName.toLowerCase().endsWith(".kdb")){
+            } else if (importFileName.toLowerCase().endsWith(".kdb")) {
                hashSetParser = new KdbHashSetParser(importFileName);
            } else if (importFileName.toLowerCase().endsWith(".hsh")) {
                hashSetParser = new HashkeeperHashSetParser(importFileName);
            } else {
                // We've gotten here with a format that can't be processed
                throw new TskCoreException("Hash set to import is an unknown format : " + importFileName);