From c5598a1a7a5dba2b16c7a5ca83ff38a66b8d870a Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Mon, 24 May 2021 14:03:19 -0400 Subject: [PATCH 1/6] Not holding jobs lock while scanning input directories --- .../autoingest/AutoIngestManager.java | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java index e3cace62fc..98ab0979a5 100644 --- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java +++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java @@ -1123,7 +1123,6 @@ final class AutoIngestManager extends Observable implements PropertyChangeListen private final List newPendingJobsList = new ArrayList<>(); private final List newCompletedJobsList = new ArrayList<>(); - private Lock currentDirLock; /** * Searches the input directories for manifest files. The search results @@ -1131,27 +1130,29 @@ final class AutoIngestManager extends Observable implements PropertyChangeListen * list. */ private void scan() { - synchronized (jobsLock) { - if (Thread.currentThread().isInterrupted()) { - return; - } - try { - newPendingJobsList.clear(); - newCompletedJobsList.clear(); - Files.walkFileTree(rootInputDirectory, EnumSet.of(FOLLOW_LINKS), Integer.MAX_VALUE, this); - Collections.sort(newPendingJobsList); + + if (Thread.currentThread().isInterrupted()) { + return; + } + try { + newPendingJobsList.clear(); + newCompletedJobsList.clear(); + Files.walkFileTree(rootInputDirectory, EnumSet.of(FOLLOW_LINKS), Integer.MAX_VALUE, this); + Collections.sort(newPendingJobsList); + synchronized (jobsLock) { AutoIngestManager.this.pendingJobs = newPendingJobsList; AutoIngestManager.this.completedJobs = newCompletedJobsList; - - } catch (Exception ex) { - /* - * NOTE: Need to catch all unhandled exceptions here. - * Otherwise uncaught exceptions will propagate up to the - * calling thread and may stop it from running. - */ - sysLogger.log(Level.SEVERE, String.format("Error scanning the input directory %s", rootInputDirectory), ex); } + + } catch (Exception ex) { + /* + * NOTE: Need to catch all unhandled exceptions here. Otherwise + * uncaught exceptions will propagate up to the calling thread + * and may stop it from running. + */ + sysLogger.log(Level.SEVERE, String.format("Error scanning the input directory %s", rootInputDirectory), ex); } + synchronized (scanMonitor) { scanMonitor.notify(); } From c9212c042098c2605e2b9a5c2a2b36f6ba13b413 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 2 Jun 2021 10:35:16 -0400 Subject: [PATCH 2/6] updates based on comments --- .../sleuthkit/autopsy/datamodel/AbstractContentNode.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/datamodel/AbstractContentNode.java b/Core/src/org/sleuthkit/autopsy/datamodel/AbstractContentNode.java index 7e0ee1a883..dabb45ede2 100644 --- a/Core/src/org/sleuthkit/autopsy/datamodel/AbstractContentNode.java +++ b/Core/src/org/sleuthkit/autopsy/datamodel/AbstractContentNode.java @@ -349,11 +349,7 @@ public abstract class AbstractContentNode extends ContentNode protected Pair getScorePropertyAndDescription(List tags) { Score score = Score.SCORE_UNKNOWN; try { - if (content instanceof AnalysisResult) { - score = ((AnalysisResult) content).getScore(); - } else { - score = this.content.getAggregateScore(); - } + score = this.content.getAggregateScore(); } catch (TskCoreException ex) { logger.log(Level.WARNING, "Unable to get aggregate score for content with id: " + this.content.getId(), ex); } From 16727d6f02a65ed7c6d849d786dca9ea728582eb Mon Sep 17 00:00:00 2001 From: apriestman Date: Mon, 7 Jun 2021 11:29:54 -0400 Subject: [PATCH 3/6] Check for null files --- .../autopsy/recentactivity/ExtractZoneIdentifier.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractZoneIdentifier.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractZoneIdentifier.java index 393ecb010f..cdbacce33e 100755 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractZoneIdentifier.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractZoneIdentifier.java @@ -211,6 +211,11 @@ final class ExtractZoneIdentifier extends Extract { * @return true if possibleDownloadFile corresponds to zoneFile, false otherwise. */ private boolean isZoneFileMatch(AbstractFile zoneFile, String expectedDownloadFileName, AbstractFile possibleDownloadFile) { + + if (zoneFile == null || possibleDownloadFile == null || expectedDownloadFileName == null) { + return false; + } + if (zoneFile.getMetaAddr() != possibleDownloadFile.getMetaAddr()) { return false; } From 944b89c34e4f69c799a1734e6f4fbb430292c97b Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Mon, 7 Jun 2021 21:37:42 -0400 Subject: [PATCH 4/6] scroll to top --- .../analysisresults/AnalysisResultsContentPanel.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Core/src/org/sleuthkit/autopsy/contentviewers/analysisresults/AnalysisResultsContentPanel.java b/Core/src/org/sleuthkit/autopsy/contentviewers/analysisresults/AnalysisResultsContentPanel.java index 72ab3c6853..bf3c45d0f3 100644 --- a/Core/src/org/sleuthkit/autopsy/contentviewers/analysisresults/AnalysisResultsContentPanel.java +++ b/Core/src/org/sleuthkit/autopsy/contentviewers/analysisresults/AnalysisResultsContentPanel.java @@ -147,6 +147,9 @@ public class AnalysisResultsContentPanel extends javax.swing.JPanel { Optional selectedResult = nodeResults.getSelectedResult(); if (selectedResult.isPresent()) { textPanel.scrollToReference(getAnchor(selectedResult.get())); + } else { + // otherwise, scroll to the beginning. + textPanel.setCaretPosition(0); } } From 4e655d79349a8c2a340b01ebb483c055b1d8881b Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Wed, 9 Jun 2021 10:16:54 -0400 Subject: [PATCH 5/6] Only detecting language on first chunk of the document --- .../autopsy/keywordsearch/Bundle.properties-MERGED | 2 +- .../org/sleuthkit/autopsy/keywordsearch/Ingester.java | 11 ++++++++--- .../LanguageSpecificContentIndexingHelper.java | 8 ++++---- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED index 2839859347..8a30223f6b 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties-MERGED @@ -52,7 +52,7 @@ KeywordSearchResultFactory.createNodeForKey.noResultsFound.text=No results found KeywordSearchResultFactory.query.exception.msg=Could not perform the query OpenIDE-Module-Display-Category=Ingest Module -OpenIDE-Module-Long-Description=Keyword Search ingest module.\n\nThe module indexes files found in the disk image at ingest time.\nIt then periodically runs the search on the indexed files using one or more keyword lists (containing pure words and/or regular expressions) and posts results.\n\n\The module also contains additional tools integrated in the main GUI, such as keyword list configuration, keyword search bar in the top-right corner, extracted text viewer and search results viewer showing highlighted keywords found. +OpenIDE-Module-Long-Description=Keyword Search ingest module.\n\nThe module indexes files found in the disk image at ingest time.\nIt then periodically runs the search on the indexed files using one or more keyword lists (containing pure words and/or regular expressions) and posts results.\n\nThe module also contains additional tools integrated in the main GUI, such as keyword list configuration, keyword search bar in the top-right corner, extracted text viewer and search results viewer showing highlighted keywords found. OpenIDE-Module-Name=KeywordSearch OptionsCategory_Name_KeywordSearchOptions=Keyword Search OptionsCategory_Keywords_KeywordSearchOptions=Keyword Search diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java index 0cc52149b7..884ac24f0d 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java @@ -198,6 +198,7 @@ class Ingester { int numChunks = 0; //unknown until chunking is done Map contentFields = Collections.unmodifiableMap(getContentFields(source)); + Optional language = Optional.empty(); //Get a reader for the content of the given source try (BufferedReader reader = new BufferedReader(sourceReader)) { Chunker chunker = new Chunker(reader); @@ -212,11 +213,15 @@ class Ingester { String chunkId = Server.getChunkIdString(sourceID, numChunks + 1); fields.put(Server.Schema.ID.toString(), chunkId); fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength())); - Optional language = Optional.empty(); + if (doLanguageDetection) { - language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk); - language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang)); + int size = Math.min(chunk.getBaseChunkLength(), 4096); + language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk.toString().substring(0, size)); + + // only do language detection on the first chunk of the document + doLanguageDetection = false; } + language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang)); try { //add the chunk text to Solr index indexChunk(chunk.toString(), chunk.geLowerCasedChunk(), sourceName, fields); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java index 387399d7ae..27685b0f97 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java @@ -35,15 +35,15 @@ import java.util.Optional; class LanguageSpecificContentIndexingHelper { private final LanguageDetector languageDetector = new LanguageDetector(); - - Optional detectLanguageIfNeeded(Chunker.Chunk chunk) throws NoOpenCoreException { + + Optional detectLanguageIfNeeded(String text) throws NoOpenCoreException { double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion()); if (2.2 <= indexSchemaVersion) { - return languageDetector.detect(chunk.toString()); + return languageDetector.detect(text); } else { return Optional.empty(); } - } + } void updateLanguageSpecificFields(Map fields, Chunker.Chunk chunk, Language language) { List values = new ArrayList<>(); From c84e6680104bf4dd1fe929555f464fb0482c25b2 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Wed, 9 Jun 2021 13:55:57 -0400 Subject: [PATCH 6/6] Minor --- .../src/org/sleuthkit/autopsy/keywordsearch/Ingester.java | 5 +++-- .../keywordsearch/LanguageSpecificContentIndexingHelper.java | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java index 884ac24f0d..052a0b0b16 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011-2018 Basis Technology Corp. + * Copyright 2011-2021 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -64,6 +64,7 @@ class Ingester { private static Ingester instance; private final LanguageSpecificContentIndexingHelper languageSpecificContentIndexingHelper = new LanguageSpecificContentIndexingHelper(); + private static final int LANGUAGE_DETECTION_STRING_SIZE = 4096; private Ingester() { } @@ -215,7 +216,7 @@ class Ingester { fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength())); if (doLanguageDetection) { - int size = Math.min(chunk.getBaseChunkLength(), 4096); + int size = Math.min(chunk.getBaseChunkLength(), LANGUAGE_DETECTION_STRING_SIZE); language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk.toString().substring(0, size)); // only do language detection on the first chunk of the document diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java index 27685b0f97..345126b62c 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011-2019 Basis Technology Corp. + * Copyright 2011-2021 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License");