cleaned up method names and fixed regex to ignore case

This commit is contained in:
Kelly Kelly 2022-11-09 13:31:35 -05:00
parent 760f8e027e
commit c760a6eeee
8 changed files with 16 additions and 148 deletions

View File

@ -1,124 +0,0 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package org.sleuthkit.autopsy.modules.tester;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.openide.util.lookup.ServiceProvider;
import org.sleuthkit.autopsy.coreutils.ImageUtils;
import org.sleuthkit.autopsy.ingest.DataSourceIngestModule;
import org.sleuthkit.autopsy.ingest.DataSourceIngestModuleProgress;
import org.sleuthkit.autopsy.ingest.IngestModuleFactory;
import org.sleuthkit.autopsy.ingest.IngestModuleIngestJobSettings;
import org.sleuthkit.datamodel.Content;
@ServiceProvider(service = IngestModuleFactory.class)
public class TestFactory implements IngestModuleFactory {
@Override
public String getModuleDisplayName() {
return "Kellys Test Module";
}
@Override
public String getModuleDescription() {
return "A happy fun time.";
}
@Override
public String getModuleVersionNumber() {
return "1";
}
@Override
public boolean isDataSourceIngestModuleFactory() {
return true;
}
@Override
public DataSourceIngestModule createDataSourceIngestModule(IngestModuleIngestJobSettings settings) {
return new TestModule();
}
class TestModule implements DataSourceIngestModule {
@Override
public ProcessResult process(Content dataSource, DataSourceIngestModuleProgress progressBar) {
File outputFile = new File("C:\\Temp\\output\\output.avi");
File outputDirectory = new File("C:\\Temp\\output");
// File inputFile = new File("C:\\Temp\\video\\nasa45.mp4");
File inputFile = new File("C:\\Temp\\video\\cf785edc-2735-42e7-9576-8868bb9de2ee1080p.mp4");
File strout = new File("C:\\Temp\\output\\out.txt");
File strerr = new File("C:\\Temp\\output\\err.txt");
// Path path = Paths.get(inputFile.getAbsolutePath());
// String fileName = path.getFileName().toString();
// if(fileName.contains(".")) {
// fileName = fileName.substring(0, fileName.lastIndexOf('.'));
// }
//
// try {
// File tempFile = File.createTempFile(fileName, ".avi", new File("C:\\Temp\\output\\output"));
// System.out.println(tempFile.toString());
// } catch (IOException ex) {
// Exceptions.printStackTrace(ex);
// }
List<File> imageList = new ArrayList<>();
// File imageDir = new File("C:\\DataSources\\Images");
//
// imageList.addAll(Arrays.asList(imageDir.listFiles()));
//
// imageDir = new File("C:\\dependancies\\opencv_new\\textDetection\\images");
// imageList.addAll(Arrays.asList(imageDir.listFiles()));
File imageDir = new File("C:\\Temp\\images");
imageList.addAll(Arrays.asList(imageDir.listFiles()));
if(outputFile.exists()) {
outputFile.delete();
}
if(strout.exists()) {
strout.delete();
}
if(strerr.exists()) {
strerr.delete();
}
// for(int i = 1; i < imageList.size(); i++) {
// long start = System.currentTimeMillis();
//
// boolean b = false;
//
// try{
// b = ImageUtils.areImagesSimilar(imageList.get(0).getAbsolutePath(), imageList.get(i).getAbsolutePath());
// } catch (Exception ex) {
// System.out.println(ex.getMessage());
// }
//
// long milliseconds = System.currentTimeMillis() - start;
// if(b) {
// System.out.println(imageList.get(0).getName() + " matches " + imageList.get(i).getName());
// } else {
// System.out.println(imageList.get(0).getName() + " does not match " + imageList.get(i).getName());
// }
// System.out.println("duration: " + milliseconds);
//
// System.out.println("\n-----------\n");
// }
return ProcessResult.OK;
}
}
}

View File

@ -15,7 +15,6 @@ ExtractAllTermsReport.error.noOpenCase=No currently open case.
ExtractAllTermsReport.export.error=Error During Unique Word Extraction
ExtractAllTermsReport.exportComplete=Unique Word Extraction Complete
ExtractAllTermsReport.getName.text=Extract Unique Words
# {0} - Number of extracted terms
ExtractAllTermsReport.numberExtractedTerms=Extracted {0} terms...
ExtractAllTermsReport.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and unique word extraction might yield incomplete results.<br />Do you want to proceed with unique word extraction anyway?</html>
# {0} - Keyword search commit frequency
@ -24,7 +23,6 @@ ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Try aga
ExtractAllTermsReport.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress
ExtractAllTermsReport.startExport=Starting Unique Word Extraction
ExtractedContentPanel.setMarkup.panelTxt=<span style='font-style:italic'>Loading text... Please wait</span>
# {0} - Content name
ExtractedContentPanel.SetMarkup.progress.loading=Loading text for {0}
GlobalEditListPanel.editKeyword.title=Edit Keyword
GlobalEditListPanel.warning.text=Boundary characters ^ and $ do not match word boundaries. Consider\nreplacing with an explicit list of boundary characters, such as [ \\.,]
@ -176,7 +174,7 @@ KeywordSearchListsManagementPanel.fileExtensionFilterLbl=Autopsy Keyword List Fi
KeywordSearchListsManagementPanel.fileExtensionFilterLb2=Encase Keyword List File (txt)
KeywordSearch.listImportFeatureTitle=Keyword List Import
KeywordSearchIngestModule.moduleName=Keyword Search
KeywordSearchIngestModule.moduleDescription=Performs file indexing and search using selected keyword lists.
KeywordSearchIngestModule.moduleDescription=Performs file indexing and periodic search using keywords and regular expressions in lists.
DropdownSearchPanel.keywordTextField.text=
KeywordSearchPanel.searchDropButton.text=Keyword Search
DropdownSearchPanel.exactRadioButton.text=Exact Match
@ -228,7 +226,6 @@ KeywordSearchSettings.propertiesNSRL.text={0}_NSRL
KeywordSearchSettings.propertiesScripts.text={0}_Scripts
NoOpenCoreException.err.noOpenSorlCore.msg=No currently open Solr core.
SearchRunner.query.exception.msg=Error performing query:
# {0} - colelction name
Server.deleteCore.exception.msg=Failed to delete Solr colelction {0}
Server.exceptionMessage.unableToBackupCollection=Unable to backup Solr collection
Server.exceptionMessage.unableToCreateCollection=Unable to create Solr collection

View File

@ -146,9 +146,9 @@ class Ingester {
* @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
*/
// TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
< T extends SleuthkitVisitableItem> boolean indexTextAndSearch(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException {
< T extends SleuthkitVisitableItem> boolean search(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException {
boolean doLanguageDetection = true;
return indexTextAndSearch(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, keywordListNames);
return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, keywordListNames);
}
/**
@ -170,16 +170,16 @@ class Ingester {
* @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
*/
// TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
< T extends SleuthkitVisitableItem> boolean indexStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr) throws Ingester.IngesterException {
< T extends SleuthkitVisitableItem> boolean searchStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr) throws Ingester.IngesterException {
// Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time.
boolean doLanguageDetection = false;
return indexTextAndSearch(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, null);
return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, null);
}
< T extends SleuthkitVisitableItem> boolean indexAndSearchStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException {
< T extends SleuthkitVisitableItem> boolean searchStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException {
// Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time.
boolean doLanguageDetection = false;
return indexTextAndSearch(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, keywordListNames);
return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, keywordListNames);
}
/**
@ -201,7 +201,7 @@ class Ingester {
* @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
*/
// TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
private < T extends SleuthkitVisitableItem> boolean indexTextAndSearch(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean doLanguageDetection, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException {
private < T extends SleuthkitVisitableItem> boolean search(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean doLanguageDetection, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException {
int numChunks = 0; //unknown until chunking is done
Map<String, String> contentFields = Collections.unmodifiableMap(getContentFields(source));

View File

@ -104,7 +104,7 @@ final class InlineSearcher {
try {
// validate the regex
Pattern pattern = Pattern.compile(regex);
Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(text);
if (matcher.find()) {
@ -184,7 +184,7 @@ final class InlineSearcher {
searchPattern = keywordString;
}
final java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(searchPattern, java.util.regex.Pattern.CASE_INSENSITIVE);
final java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(searchPattern, Pattern.CASE_INSENSITIVE);
try {
String content = text;

View File

@ -723,7 +723,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
return true;
}
Reader extractedTextReader = KeywordSearchUtil.getReader(aFile, stringsExtractionContext);
if (Ingester.getDefault().indexAndSearchStrings(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context, settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists())) {
if (Ingester.getDefault().searchStrings(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context, settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists())) {
putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED);
return true;
} else {

View File

@ -20,12 +20,10 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.io.Reader;
import java.util.logging.Level;
import org.openide.util.Lookup;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.ingest.AnalysisResultIngestModule;
import org.sleuthkit.autopsy.ingest.IngestJobContext;
import org.sleuthkit.autopsy.ingest.IngestModule;
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
import org.sleuthkit.autopsy.textextractors.TextExtractor;
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
import org.sleuthkit.datamodel.AnalysisResult;

View File

@ -20,12 +20,9 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.io.Reader;
import java.util.logging.Level;
import org.openide.util.Exceptions;
import org.openide.util.Lookup;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.ingest.DataArtifactIngestModule;
import org.sleuthkit.autopsy.ingest.IngestJobContext;
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
import org.sleuthkit.autopsy.textextractors.TextExtractor;
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
import org.sleuthkit.datamodel.BlackboardArtifact;
@ -58,7 +55,7 @@ public class KwsDataArtifactIngestModule implements DataArtifactIngestModule {
public ProcessResult process(DataArtifact artifact) {
try {
if (artifact.getType().getTypeID() != TSK_ASSOCIATED_OBJECT_TYPE_ID) {
Ingester ingester = Ingester.getDefault();
Ingester ingester = Ingester.getDefault();
Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(artifact);
String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
ingester.indexMetaDataOnly(artifact, sourceName);

View File

@ -96,7 +96,7 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {
Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(content);
String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
ingester.indexMetaDataOnly(artifact, sourceName);
ingester.indexTextAndSearch(blackboardExtractedTextReader, artifact.getArtifactID(), sourceName, content, null, true, null);
ingester.search(blackboardExtractedTextReader, artifact.getArtifactID(), sourceName, content, null, true, null);
} catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) {
throw new TskCoreException("Error indexing artifact", ex);
}
@ -104,7 +104,7 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {
try {
Reader reader = KeywordSearchUtil.getReader(content);
ingester.indexTextAndSearch(reader, content.getId(), content.getName(), content, null, true, null);
ingester.search(reader, content.getId(), content.getName(), content, null, true, null);
} catch (TextExtractorFactory.NoTextExtractorFound | Ingester.IngesterException | TextExtractor.InitReaderException ex) {
throw new TskCoreException("Error indexing content", ex);
}
@ -413,10 +413,10 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {
try {
String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor((Content) artifact, null);
TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(artifact, null);
Reader blackboardExtractedTextReader = blackboardExtractor.getReader();
ingester.indexMetaDataOnly(artifact, sourceName);
ingester.indexTextAndSearch(blackboardExtractedTextReader, artifact.getId(), sourceName, artifact, null, true, null);
ingester.search(blackboardExtractedTextReader, artifact.getId(), sourceName, artifact, null, true, null);
} catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) {
throw new TskCoreException(ex.getCause().getMessage(), ex);
}