From 62ad3e1eb23f7a81705b30378d2d3c024135e298 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Tue, 3 Jan 2017 16:51:57 -0500 Subject: [PATCH 01/13] First cut of index search algorithm --- .../autopsy/keywordsearch/Server.java | 185 ++++++++++++++---- 1 file changed, 142 insertions(+), 43 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index 1f4edc36a5..4fc629dd53 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -5,7 +5,7 @@ * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. + * you may not use this folder except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 @@ -39,6 +39,7 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.logging.Level; @@ -69,6 +70,7 @@ import org.sleuthkit.autopsy.coreutils.ModuleSettings; import org.sleuthkit.autopsy.coreutils.PlatformUtil; import org.sleuthkit.autopsy.coreutils.UNCPathUtilities; import org.sleuthkit.datamodel.Content; +//ELTODO import static org.sleuthkit.autopsy.casemodule.Case.MODULE_FOLDER; /** * Handles management of a either a local or centralized Solr server and its @@ -185,6 +187,10 @@ public class Server { private UNCPathUtilities uncPathUtilities = null; private static final String SOLR = "solr"; private static final String CORE_PROPERTIES = "core.properties"; + private static final String MODULE_OUTPUT = "ModuleOutput"; // ELTODO get "ModuleOutput" somehow... + private static final String KWS_OUTPUT_FOLDER_NAME = "keywordsearch"; + private static final String KWS_DATA_FOLDER_NAME = "data"; + private static final String INDEX_FOLDER_NAME = "index"; public enum CORE_EVT_STATES { @@ -380,7 +386,7 @@ public class Server { ProcessBuilder solrProcessBuilder = new ProcessBuilder(commandLine); solrProcessBuilder.directory(solrFolder); - // Redirect stdout and stderr to files to prevent blocking. + // Redirect stdout and stderr to folders to prevent blocking. Path solrStdoutPath = Paths.get(Places.getUserDirectory().getAbsolutePath(), "var", "log", "solr.log.stdout"); //NON-NLS solrProcessBuilder.redirectOutput(solrStdoutPath.toFile()); @@ -724,6 +730,99 @@ public class Server { } return indexDir; } + + + /** + * Find index dir location for the case. This is done by doing a subdirectory + * search of all existing "ModuleOutput/node_name/keywordsearch/data/" folders. + * + * @param theCase the case to get index dir for + * + * @return absolute path to index dir + */ + String findIndexDataDir(Case theCase) { + String indexDir = ""; + // look for existing index folder + if (theCase.getCaseType() == CaseType.MULTI_USER_CASE) { + // multi user cases contain a subfolder for each node that participated in case ingest or review. + // Any one (but only one!) of those subfolders may contain the actual index. + + // create a list of all sub-directories + List subfolders = getAllSubfoldersInFolder(theCase.getCaseDirectory()); + + // scan all topLevelOutputDir subfolders for presense of non-empty "keywordsearch/data/index" folder + for (String folderName : subfolders) { + String path = Paths.get(folderName, MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toString(); //NON-NLS + if (containsValidIndexFolder(path)) { + indexDir = path; + // ELTODO analyze possibilities of multiple indexes + break; // there should only be one index + } + } + } else { + String path = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toString(); //NON-NLS + if (containsValidIndexFolder(path)) { + indexDir = path; + } + } + + // if we still did not find index then it is a new case + if (indexDir.isEmpty()) { + indexDir = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toString(); //NON-NLS + } + + // ELTODO do we still need this? + if (uncPathUtilities != null) { + // if we can check for UNC paths, do so, otherwise just return the indexDir + String result = uncPathUtilities.mappedDriveToUNC(indexDir); + if (result == null) { + uncPathUtilities.rescanDrives(); + result = uncPathUtilities.mappedDriveToUNC(indexDir); + } + if (result == null) { + return indexDir; + } + return result; + } + return indexDir; + } + + /** + * Returns a list of all folder names in the folder of interest. Files + * are excluded. + * + * @param path Absolute path of the folder of interest + * + * @return List of all sub-folder names in the folder of interest + */ + private static List getAllSubfoldersInFolder(String path) { + // only returns folders, skips folders + File folder = new File(path); + String[] folders = folder.list((File current, String name) -> new File(current, name).isDirectory()); + if (folders == null) { + // null is returned when folder doesn't exist. need to check this condition, otherwise there is NullPointerException when converting to List + return Collections.emptyList(); + } + return new ArrayList<>(Arrays.asList(folders)); + } + + boolean containsValidIndexFolder(String path) { + // create a list of all sub-directories + List subfolders = getAllSubfoldersInFolder(path); + if (subfolders.isEmpty()) { + return false; + } + + // scan all the folder for presense of non-empty "index" folder + for (String folderName : subfolders) { + if (!folderName.equals(INDEX_FOLDER_NAME)) { + continue; + } + // ELTODO check that the folder is not empty + } + + return false; + } /** * ** end single-case specific methods *** @@ -788,11 +887,11 @@ public class Server { } /** - * Execute query that gets only number of all Solr files indexed without - * actually returning the files. The result does not include chunks, only - * number of actual files. + * Execute query that gets only number of all Solr folders indexed without + actually returning the folders. The result does not include chunks, only + number of actual folders. * - * @return int representing number of indexed files + * @return int representing number of indexed folders * * @throws KeywordSearchModuleException * @throws NoOpenCoreException @@ -814,8 +913,8 @@ public class Server { } /** - * Execute query that gets only number of all Solr file chunks (not logical - * files) indexed without actually returning the content. + * Execute query that gets only number of all Solr folder chunks (not logical + folders) indexed without actually returning the content. * * @return int representing number of indexed chunks * @@ -839,10 +938,10 @@ public class Server { } /** - * Execute query that gets only number of all Solr documents indexed (files - * and chunks) without actually returning the documents + * Execute query that gets only number of all Solr documents indexed (folders + and chunks) without actually returning the documents * - * @return int representing number of indexed files (files and chunks) + * @return int representing number of indexed folders (folders and chunks) * * @throws KeywordSearchModuleException * @throws NoOpenCoreException @@ -864,7 +963,7 @@ public class Server { } /** - * Return true if the file is indexed (either as a whole as a chunk) + * Return true if the folder is indexed (either as a whole as a chunk) * * @param contentID * @@ -891,12 +990,12 @@ public class Server { } /** - * Execute query that gets number of indexed file chunks for a file + * Execute query that gets number of indexed folder chunks for a folder * - * @param fileID file id of the original file broken into chunks and indexed + * @param fileID folder id of the original folder broken into chunks and indexed * - * @return int representing number of indexed file chunks, 0 if there is no - * chunks + * @return int representing number of indexed folder chunks, 0 if there is no + chunks * * @throws KeywordSearchModuleException * @throws NoOpenCoreException @@ -997,7 +1096,7 @@ public class Server { } /** - * Get the text contents of the given file as stored in SOLR. + * Get the text contents of the given folder as stored in SOLR. * * @param content to get the text for * @@ -1018,8 +1117,8 @@ public class Server { } /** - * Get the text contents of a single chunk for the given file as stored in - * SOLR. + * Get the text contents of a single chunk for the given folder as stored in + SOLR. * * @param content to get the text for * @param chunkID chunk number to query (starting at 1), or 0 if there is no @@ -1094,10 +1193,10 @@ public class Server { } /** - * Given file parent id and child chunk ID, return the ID string of the - * chunk as stored in Solr, e.g. FILEID_CHUNKID + * Given folder parent id and child chunk ID, return the ID string of the + chunk as stored in Solr, e.g. FILEID_CHUNKID * - * @param parentID the parent file id (id of the source content) + * @param parentID the parent folder id (id of the source content) * @param childID the child chunk id * * @return formatted string id @@ -1138,7 +1237,7 @@ public class Server { * exist or loaded if it already exists. */ - // In single user mode, if there is a core.properties file already, + // In single user mode, if there is a core.properties folder already, // we've hit a solr bug. Compensate by deleting it. if (caseType == CaseType.SINGLE_USER_CASE) { Path corePropertiesFile = Paths.get(solrFolder.toString(), SOLR, coreName, CORE_PROPERTIES); @@ -1202,7 +1301,7 @@ public class Server { } /** - * Determines whether or not the index files folder for a Solr core exists. + * Determines whether or not the index folders folder for a Solr core exists. * * @param coreName the name of the core. * @@ -1240,7 +1339,7 @@ public class Server { this.solrCore = new Builder(currentSolrServer.getBaseURL() + "/" + name).build(); //NON-NLS //TODO test these settings - //solrCore.setSoTimeout(1000 * 60); // socket read timeout, make large enough so can index larger files + //solrCore.setSoTimeout(1000 * 60); // socket read timeout, make large enough so can index larger folders //solrCore.setConnectionTimeout(1000); solrCore.setDefaultMaxConnectionsPerHost(2); solrCore.setMaxTotalConnections(5); @@ -1310,7 +1409,7 @@ public class Server { } /** - * get the text from the content field for the given file + * get the text from the content field for the given folder * * @param contentID * @param chunkID @@ -1337,8 +1436,8 @@ public class Server { if (fieldValues.size() == 1) // The indexed text field for artifacts will only have a single value. { return fieldValues.toArray(new String[0])[0]; - } else // The indexed text for files has 2 values, the file name and the file content. - // We return the file content value. + } else // The indexed text for folders has 2 values, the folder name and the folder content. + // We return the folder content value. { return fieldValues.toArray(new String[0])[1]; } @@ -1370,11 +1469,11 @@ public class Server { } /** - * Execute query that gets only number of all Solr files (not chunks) - * indexed without actually returning the files + * Execute query that gets only number of all Solr folders (not chunks) + indexed without actually returning the folders * - * @return int representing number of indexed files (entire files, not - * chunks) + * @return int representing number of indexed folders (entire folders, not + chunks) * * @throws SolrServerException */ @@ -1383,8 +1482,8 @@ public class Server { } /** - * Execute query that gets only number of all chunks (not logical files, - * or all documents) indexed without actually returning the content + * Execute query that gets only number of all chunks (not logical folders, + or all documents) indexed without actually returning the content * * @return int representing number of indexed chunks * @@ -1400,10 +1499,10 @@ public class Server { /** * Execute query that gets only number of all Solr documents indexed * without actually returning the documents. Documents include entire - * indexed files as well as chunks, which are treated as documents. + indexed folders as well as chunks, which are treated as documents. * - * @return int representing number of indexed documents (entire files - * and chunks) + * @return int representing number of indexed documents (entire folders + and chunks) * * @throws SolrServerException */ @@ -1414,7 +1513,7 @@ public class Server { } /** - * Return true if the file is indexed (either as a whole as a chunk) + * Return true if the folder is indexed (either as a whole as a chunk) * * @param contentID * @@ -1432,13 +1531,13 @@ public class Server { } /** - * Execute query that gets number of indexed file chunks for a file + * Execute query that gets number of indexed folder chunks for a folder * - * @param contentID file id of the original file broken into chunks and - * indexed + * @param contentID folder id of the original folder broken into chunks and + indexed * - * @return int representing number of indexed file chunks, 0 if there is - * no chunks + * @return int representing number of indexed folder chunks, 0 if there is + no chunks * * @throws SolrServerException */ From 7d252864a460e6e8345ff50c326e0e46efd410de Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Thu, 5 Jan 2017 13:41:12 -0500 Subject: [PATCH 02/13] Index folder finding algorithm seems to work --- .../autopsy/keywordsearch/Server.java | 61 +++++++++++-------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index 4fc629dd53..3f79b42f19 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -748,11 +748,15 @@ public class Server { // Any one (but only one!) of those subfolders may contain the actual index. // create a list of all sub-directories - List subfolders = getAllSubfoldersInFolder(theCase.getCaseDirectory()); + List contents = getAllContentsInFolder(theCase.getCaseDirectory()); // scan all topLevelOutputDir subfolders for presense of non-empty "keywordsearch/data/index" folder - for (String folderName : subfolders) { - String path = Paths.get(folderName, MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toString(); //NON-NLS + for (File item : contents) { + if (!item.isDirectory()) { + continue; + } + // ELTODO is it possible that index is in a different location? what about new solr6 index? + String path = Paths.get(item.getAbsolutePath(), MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toString(); //NON-NLS if (containsValidIndexFolder(path)) { indexDir = path; // ELTODO analyze possibilities of multiple indexes @@ -788,39 +792,47 @@ public class Server { } /** - * Returns a list of all folder names in the folder of interest. Files - * are excluded. + * Returns a list of all contents in the folder of interest. * * @param path Absolute path of the folder of interest * - * @return List of all sub-folder names in the folder of interest + * @return List of all contents in the folder of interest */ - private static List getAllSubfoldersInFolder(String path) { - // only returns folders, skips folders - File folder = new File(path); - String[] folders = folder.list((File current, String name) -> new File(current, name).isDirectory()); - if (folders == null) { - // null is returned when folder doesn't exist. need to check this condition, otherwise there is NullPointerException when converting to List + private static List getAllContentsInFolder(String path) { + File directory = new File(path); + File[] contents = directory.listFiles(); + // the directory file is not really a directory.. + if (contents == null) { return Collections.emptyList(); } - return new ArrayList<>(Arrays.asList(folders)); + // Folder is empty + else if (contents.length == 0) { + return Collections.emptyList(); + } + // Folder has contents + else { + return new ArrayList<>(Arrays.asList(contents)); + } } boolean containsValidIndexFolder(String path) { - // create a list of all sub-directories - List subfolders = getAllSubfoldersInFolder(path); - if (subfolders.isEmpty()) { - return false; - } - - // scan all the folder for presense of non-empty "index" folder - for (String folderName : subfolders) { - if (!folderName.equals(INDEX_FOLDER_NAME)) { + List contents = getAllContentsInFolder(path); + // scan the folder for presense of non-empty "index" folder + for (File item : contents) { + if (!item.isDirectory()) { continue; } - // ELTODO check that the folder is not empty + // scan all the folder for presense of non-empty "index" folder + if (!item.getName().equals(INDEX_FOLDER_NAME)) { + continue; + } + // check that the folder is not empty + if (item.listFiles().length > 0) { + // ELTODO does this cover "index" folder that contains no files but some sub-folders? + // ELTODO is there more evaluation that's needed? look for a specific file perhaps? + return true; + } } - return false; } @@ -852,6 +864,7 @@ public class Server { throw new KeywordSearchModuleException(NbBundle.getMessage(Server.class, "Server.connect.exception.msg"), ex); } + //String indexDir = findIndexDataDir(theCase); // ELTODO String dataDir = geCoreDataDirPath(theCase); String coreName = theCase.getTextIndexName(); return this.openCore(coreName.isEmpty() ? DEFAULT_CORE_NAME : coreName, new File(dataDir), theCase.getCaseType()); From 40cc726a11221b8cac976df853eb01b64ee7e770 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Thu, 5 Jan 2017 17:16:27 -0500 Subject: [PATCH 03/13] First cut at integrating AutopsyServiceProvider --- .../AutopsyServiceProvider.java | 4 +- .../autopsy/keywordsearch/Server.java | 45 +++++++---- .../keywordsearch/SolrSearchService.java | 76 ++++++++++++++++++- 3 files changed, 109 insertions(+), 16 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/corecomponentinterfaces/AutopsyServiceProvider.java b/Core/src/org/sleuthkit/autopsy/corecomponentinterfaces/AutopsyServiceProvider.java index 4635c584a8..889ee0b21d 100644 --- a/Core/src/org/sleuthkit/autopsy/corecomponentinterfaces/AutopsyServiceProvider.java +++ b/Core/src/org/sleuthkit/autopsy/corecomponentinterfaces/AutopsyServiceProvider.java @@ -117,7 +117,7 @@ public interface AutopsyServiceProvider { * * @param message Exception message. */ - AutopsyServiceProviderException(String message) { + public AutopsyServiceProviderException(String message) { super(message); } @@ -128,7 +128,7 @@ public interface AutopsyServiceProvider { * @param message Exception message. * @param throwable Exception cause. */ - AutopsyServiceProviderException(String message, Throwable throwable) { + public AutopsyServiceProviderException(String message, Throwable throwable) { super(message, throwable); } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index 3f79b42f19..61c3694314 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -191,6 +191,8 @@ public class Server { private static final String KWS_OUTPUT_FOLDER_NAME = "keywordsearch"; private static final String KWS_DATA_FOLDER_NAME = "data"; private static final String INDEX_FOLDER_NAME = "index"; + private static final String CURRENT_SOLR_VERSION = "6"; + private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.0"; public enum CORE_EVT_STATES { @@ -306,6 +308,14 @@ public class Server { int getCurrentSolrStopPort() { return currentSolrStopPort; } + + String getCurrentSolrVersion() { + return CURRENT_SOLR_VERSION; + } + + String getCurrentSchemaVersion() { + return CURRENT_SOLR_SCHEMA_VERSION; + } /** * Helper threads to handle stderr/stdout from Solr process @@ -731,17 +741,25 @@ public class Server { return indexDir; } + String findLatestIndexDataDir(Case theCase) { + String dataFolderName = "solr" + CURRENT_SOLR_VERSION + "_schema_" + CURRENT_SOLR_SCHEMA_VERSION; + return findIndexDataDir(theCase, dataFolderName); + } + + String findOldIndexDataDir(Case theCase) { + return findIndexDataDir(theCase, ""); + } /** - * Find index dir location for the case. This is done by doing a subdirectory + * Find index directory location for the case. This is done via subdirectory * search of all existing "ModuleOutput/node_name/keywordsearch/data/" folders. * * @param theCase the case to get index dir for * * @return absolute path to index dir */ - String findIndexDataDir(Case theCase) { - String indexDir = ""; + private List findIndexDataDir(Case theCase, String dataFolderName) { + ArrayList indexDirs = new ArrayList<>(); // look for existing index folder if (theCase.getCaseType() == CaseType.MULTI_USER_CASE) { // multi user cases contain a subfolder for each node that participated in case ingest or review. @@ -756,26 +774,28 @@ public class Server { continue; } // ELTODO is it possible that index is in a different location? what about new solr6 index? - String path = Paths.get(item.getAbsolutePath(), MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toString(); //NON-NLS + String path = Paths.get(item.getAbsolutePath(), MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME, dataFolderName).toString(); //NON-NLS if (containsValidIndexFolder(path)) { - indexDir = path; - // ELTODO analyze possibilities of multiple indexes - break; // there should only be one index + indexDirs.add(path); + // there can be multiple index folders (e.g. current version and "old" version) so keep looking } } } else { - String path = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toString(); //NON-NLS + // single user case + String path = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME, dataFolderName).toString(); //NON-NLS if (containsValidIndexFolder(path)) { - indexDir = path; + indexDirs.add(path); } } - // if we still did not find index then it is a new case + // did we find an index that requires an upgrade? if (indexDir.isEmpty()) { - indexDir = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toString(); //NON-NLS + return indexDir; + // ELTODO if we still did not find index then it is a new case + // ELTODO indexDir = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toString(); //NON-NLS } - // ELTODO do we still need this? + // ELTODO do we need to do this when searching for old index? if (uncPathUtilities != null) { // if we can check for UNC paths, do so, otherwise just return the indexDir String result = uncPathUtilities.mappedDriveToUNC(indexDir); @@ -864,7 +884,6 @@ public class Server { throw new KeywordSearchModuleException(NbBundle.getMessage(Server.class, "Server.connect.exception.msg"), ex); } - //String indexDir = findIndexDataDir(theCase); // ELTODO String dataDir = geCoreDataDirPath(theCase); String coreName = theCase.getTextIndexName(); return this.openCore(coreName.isEmpty() ? DEFAULT_CORE_NAME : coreName, new File(dataDir), theCase.getCaseType()); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java index ecbeefa98b..f1a0f3ca37 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java @@ -36,6 +36,8 @@ import org.sleuthkit.datamodel.SleuthkitCase; import org.openide.util.NbBundle; import java.net.InetAddress; import java.util.MissingResourceException; +import org.sleuthkit.autopsy.core.RuntimeProperties; +import org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider; import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException; /** @@ -43,7 +45,7 @@ import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException; * text indexing and search. */ @ServiceProvider(service = KeywordSearchService.class) -public class SolrSearchService implements KeywordSearchService { +public class SolrSearchService implements KeywordSearchService, AutopsyServiceProvider { private static final String BAD_IP_ADDRESS_FORMAT = "ioexception occurred when talking to server"; //NON-NLS private static final String SERVER_REFUSED_CONNECTION = "server refused connection"; //NON-NLS @@ -230,4 +232,76 @@ public class SolrSearchService implements KeywordSearchService { @Override public void close() throws IOException { } + + /** + * + * @param context + * + * @throws + * org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider.AutopsyServiceProviderException + */ + @Override + public void openCaseResources(Context context) throws AutopsyServiceProviderException { + /* + * Autopsy service providers may not have case-level resources. + */ + Server server = KeywordSearch.getServer(); + if (server.coreIsOpen() == false) { + throw new AutopsyServiceProviderException("ELTODO"); + } + + // do a case subdirectory search to check if latest index exists + + // do a case subdirectory search to check for the existence and upgrade status of cores + String indexDir = server.findLatestIndexDataDir(Case.getCurrentCase()); // ELTODO + + // check if index needs upgrade + boolean needsUpgrade = true; + + if (needsUpgrade && RuntimeProperties.coreComponentsAreActive()) { + //pop up a message box to indicate the restrictions on adding additional + //text and performing regex searches and give the user the option to decline the upgrade + boolean upgradeDeclined = true; + if (upgradeDeclined) { + throw new AutopsyServiceProviderException("ELTODO"); + } + } + + if (needsUpgrade) { + // ELTODO Check for cancellation at whatever points are feasible + + // Copy the contents (core) of ModuleOutput/keywordsearch/data/index into ModuleOutput/keywordsearch/data/solr6_schema_2.0/index + + // Make a “reference copy” of the configset and place it in ModuleOutput/keywordsearch/data/solr6_schema_2.0/configset + + // Run the upgrade tools on the contents (core) in ModuleOutput/keywordsearch/data/solr6_schema_2.0/index + + // Open the upgraded index + + // execute a test query + boolean success = true; + + if (!success) { + // delete the new directories + + // close the upgraded index? + + throw new AutopsyServiceProviderException("ELTODO"); + } + } + } + + /** + * + * @param context + * + * @throws + * org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider.AutopsyServiceProviderException + */ + @Override + public void closeCaseResources(Context context) throws AutopsyServiceProviderException { + /* + * Autopsy service providers may not have case-level resources. + */ + } } From b05dded08a4fc46a04615a17f0f320db4d011b1b Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Fri, 6 Jan 2017 15:48:44 -0500 Subject: [PATCH 04/13] Got inex folder search algorithm to work --- .../autopsy/keywordsearch/Server.java | 235 +++++++++++------- 1 file changed, 148 insertions(+), 87 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index 61c3694314..e2138affea 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -43,6 +43,8 @@ import java.util.Collections; import java.util.List; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.logging.Level; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import javax.swing.AbstractAction; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrRequest; @@ -193,6 +195,7 @@ public class Server { private static final String INDEX_FOLDER_NAME = "index"; private static final String CURRENT_SOLR_VERSION = "6"; private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.0"; + private static final Pattern INDEX_FOLDER_NAME_PATTERN = Pattern.compile("^solr\\d{1,2}_schema_\\d{1,2}.\\d{1,2}$"); public enum CORE_EVT_STATES { @@ -308,11 +311,11 @@ public class Server { int getCurrentSolrStopPort() { return currentSolrStopPort; } - + String getCurrentSolrVersion() { return CURRENT_SOLR_VERSION; } - + String getCurrentSchemaVersion() { return CURRENT_SOLR_SCHEMA_VERSION; } @@ -385,7 +388,9 @@ public class Server { * Run a Solr command with the given arguments. * * @param solrArguments Command line arguments to pass to the Solr command. + * * @return + * * @throws IOException */ private Process runSolrCommand(List solrArguments) throws IOException { @@ -618,7 +623,7 @@ public class Server { * request. * * @return false if the request failed with a connection error, otherwise - * true + * true */ synchronized boolean isRunning() throws KeywordSearchModuleException { try { @@ -740,79 +745,97 @@ public class Server { } return indexDir; } - + String findLatestIndexDataDir(Case theCase) { - String dataFolderName = "solr" + CURRENT_SOLR_VERSION + "_schema_" + CURRENT_SOLR_SCHEMA_VERSION; - return findIndexDataDir(theCase, dataFolderName); + String indexFolderName = "solr" + CURRENT_SOLR_VERSION + "_schema_" + CURRENT_SOLR_SCHEMA_VERSION; + List allIndexes = findAllIndexDirs(theCase); + for (String path : allIndexes) { + if (path.contains(indexFolderName)) { + return path; + } + } + return ""; } - - String findOldIndexDataDir(Case theCase) { - return findIndexDataDir(theCase, ""); - } - + /** * Find index directory location for the case. This is done via subdirectory - * search of all existing "ModuleOutput/node_name/keywordsearch/data/" folders. + * search of all existing "ModuleOutput/node_name/keywordsearch/data/" + * folders. * * @param theCase the case to get index dir for * * @return absolute path to index dir */ - private List findIndexDataDir(Case theCase, String dataFolderName) { - ArrayList indexDirs = new ArrayList<>(); - // look for existing index folder + private List findAllIndexDirs(Case theCase) { + ArrayList candidateIndexDirs = new ArrayList<>(); + // first find all existing "/ModuleOutput/keywordsearch/data/" folders if (theCase.getCaseType() == CaseType.MULTI_USER_CASE) { // multi user cases contain a subfolder for each node that participated in case ingest or review. // Any one (but only one!) of those subfolders may contain the actual index. + /* NOTE: All of the following paths are valid multi-user index paths: + X:\Case\ingest1\ModuleOutput\keywordsearch\data\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index + */ // create a list of all sub-directories List contents = getAllContentsInFolder(theCase.getCaseDirectory()); - // scan all topLevelOutputDir subfolders for presense of non-empty "keywordsearch/data/index" folder + // scan all topLevelOutputDir subfolders for presense of non-empty "/ModuleOutput/keywordsearch/data/" folder for (File item : contents) { - if (!item.isDirectory()) { - continue; - } - // ELTODO is it possible that index is in a different location? what about new solr6 index? - String path = Paths.get(item.getAbsolutePath(), MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME, dataFolderName).toString(); //NON-NLS - if (containsValidIndexFolder(path)) { - indexDirs.add(path); - // there can be multiple index folders (e.g. current version and "old" version) so keep looking + File path = Paths.get(item.getAbsolutePath(), MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toFile(); //NON-NLS + // must be a non-empty directory + if (path.exists() && path.isDirectory()) { + candidateIndexDirs.add(path.toString()); } } } else { // single user case - String path = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME, dataFolderName).toString(); //NON-NLS - if (containsValidIndexFolder(path)) { - indexDirs.add(path); + /* NOTE: All of the following paths are valid single user index paths: + X:\Case\ModuleOutput\keywordsearch\data\index + X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index + X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index + X:\Case\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index + */ + File path = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toFile(); //NON-NLS + // must be a non-empty directory + if (path.exists() && path.isDirectory()) { + candidateIndexDirs.add(path.toString()); } } - // did we find an index that requires an upgrade? - if (indexDir.isEmpty()) { - return indexDir; - // ELTODO if we still did not find index then it is a new case - // ELTODO indexDir = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toString(); //NON-NLS - } - - // ELTODO do we need to do this when searching for old index? - if (uncPathUtilities != null) { - // if we can check for UNC paths, do so, otherwise just return the indexDir - String result = uncPathUtilities.mappedDriveToUNC(indexDir); - if (result == null) { - uncPathUtilities.rescanDrives(); - result = uncPathUtilities.mappedDriveToUNC(indexDir); + // analyze possible index folders + ArrayList indexDirs = new ArrayList<>(); + for (String path : candidateIndexDirs) { + List validIndexPaths = containsValidIndexFolder(path); + for (String validPath : validIndexPaths) { + indexDirs.add(convertPathToUNC(validPath)); + // there can be multiple index folders (e.g. current version and "old" version) so keep looking } - if (result == null) { - return indexDir; - } - return result; } - return indexDir; + return indexDirs; } - /** - * Returns a list of all contents in the folder of interest. + String convertPathToUNC(String indexDir) { + // ELTODO do we need to do this when searching for old index? + if (uncPathUtilities == null) { + return indexDir; + } + // if we can check for UNC paths, do so, otherwise just return the indexDir + String result = uncPathUtilities.mappedDriveToUNC(indexDir); + if (result == null) { + uncPathUtilities.rescanDrives(); + result = uncPathUtilities.mappedDriveToUNC(indexDir); + } + if (result == null) { + return indexDir; + } + return result; + } + + /** + * Returns a list of all contents in the folder of interest. * * @param path Absolute path of the folder of interest * @@ -824,37 +847,69 @@ public class Server { // the directory file is not really a directory.. if (contents == null) { return Collections.emptyList(); - } - // Folder is empty + } // Folder is empty else if (contents.length == 0) { return Collections.emptyList(); - } - // Folder has contents + } // Folder has contents else { return new ArrayList<>(Arrays.asList(contents)); } } - - boolean containsValidIndexFolder(String path) { + + private List containsValidIndexFolder(String path) { + /* NOTE: All of the following paths are valid index paths: + X:\Case\ModuleOutput\keywordsearch\data\index + X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index + X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index + X:\Case\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index + */ + + List indexFolders = new ArrayList<>(); List contents = getAllContentsInFolder(path); // scan the folder for presense of non-empty "index" folder for (File item : contents) { - if (!item.isDirectory()) { + // scan all subfolders for presense of non-empty "index" folder + if (isNonEmptyIndexFolder(item)) { + indexFolders.add(item.getAbsolutePath()); + // keep looking as there may be more index folders continue; } - // scan all the folder for presense of non-empty "index" folder - if (!item.getName().equals(INDEX_FOLDER_NAME)) { - continue; - } - // check that the folder is not empty - if (item.listFiles().length > 0) { - // ELTODO does this cover "index" folder that contains no files but some sub-folders? - // ELTODO is there more evaluation that's needed? look for a specific file perhaps? - return true; + + // check if the folder matches "solrX_schema_Y" patern + if (matchesIndexFolderNameStandard(item.getName())) { + File nextLevelIndexFolder = Paths.get(item.getAbsolutePath(), INDEX_FOLDER_NAME).toFile(); + // look for "index" sub-folder one level deeper + if (isNonEmptyIndexFolder(nextLevelIndexFolder)) { + indexFolders.add(nextLevelIndexFolder.getAbsolutePath()); + // keep looking as there may be more index folders + } } } + return indexFolders; + } + + private boolean isNonEmptyIndexFolder(File path) { + if (path.exists() && path.isDirectory() && path.getName().equals(INDEX_FOLDER_NAME) && path.listFiles().length > 0) { + return true; + } return false; } + + /** + * Checks whether a name matches index folder name standard + * + * @param inputString The string to check. + * + * @return True or false. + */ + public static boolean matchesIndexFolderNameStandard(String inputString) { + Matcher m = INDEX_FOLDER_NAME_PATTERN.matcher(inputString); + return m.find(); + } /** * ** end single-case specific methods *** @@ -870,6 +925,9 @@ public class Server { * creating/opening the core. */ private Core openCore(Case theCase) throws KeywordSearchModuleException { + + String indexDir = findLatestIndexDataDir(Case.getCurrentCase()); // ELTODO + try { if (theCase.getCaseType() == CaseType.SINGLE_USER_CASE) { currentSolrServer = this.localSolrServer; @@ -920,8 +978,8 @@ public class Server { /** * Execute query that gets only number of all Solr folders indexed without - actually returning the folders. The result does not include chunks, only - number of actual folders. + * actually returning the folders. The result does not include chunks, only + * number of actual folders. * * @return int representing number of indexed folders * @@ -945,8 +1003,8 @@ public class Server { } /** - * Execute query that gets only number of all Solr folder chunks (not logical - folders) indexed without actually returning the content. + * Execute query that gets only number of all Solr folder chunks (not + * logical folders) indexed without actually returning the content. * * @return int representing number of indexed chunks * @@ -970,8 +1028,8 @@ public class Server { } /** - * Execute query that gets only number of all Solr documents indexed (folders - and chunks) without actually returning the documents + * Execute query that gets only number of all Solr documents indexed + * (folders and chunks) without actually returning the documents * * @return int representing number of indexed folders (folders and chunks) * @@ -1024,10 +1082,11 @@ public class Server { /** * Execute query that gets number of indexed folder chunks for a folder * - * @param fileID folder id of the original folder broken into chunks and indexed + * @param fileID folder id of the original folder broken into chunks and + * indexed * - * @return int representing number of indexed folder chunks, 0 if there is no - chunks + * @return int representing number of indexed folder chunks, 0 if there is + * no chunks * * @throws KeywordSearchModuleException * @throws NoOpenCoreException @@ -1150,7 +1209,7 @@ public class Server { /** * Get the text contents of a single chunk for the given folder as stored in - SOLR. + * SOLR. * * @param content to get the text for * @param chunkID chunk number to query (starting at 1), or 0 if there is no @@ -1226,7 +1285,7 @@ public class Server { /** * Given folder parent id and child chunk ID, return the ID string of the - chunk as stored in Solr, e.g. FILEID_CHUNKID + * chunk as stored in Solr, e.g. FILEID_CHUNKID * * @param parentID the parent folder id (id of the source content) * @param childID the child chunk id @@ -1333,7 +1392,8 @@ public class Server { } /** - * Determines whether or not the index folders folder for a Solr core exists. + * Determines whether or not the index folders folder for a Solr core + * exists. * * @param coreName the name of the core. * @@ -1502,10 +1562,10 @@ public class Server { /** * Execute query that gets only number of all Solr folders (not chunks) - indexed without actually returning the folders + * indexed without actually returning the folders * - * @return int representing number of indexed folders (entire folders, not - chunks) + * @return int representing number of indexed folders (entire folders, + * not chunks) * * @throws SolrServerException */ @@ -1514,8 +1574,9 @@ public class Server { } /** - * Execute query that gets only number of all chunks (not logical folders, - or all documents) indexed without actually returning the content + * Execute query that gets only number of all chunks (not logical + * folders, or all documents) indexed without actually returning the + * content * * @return int representing number of indexed chunks * @@ -1531,10 +1592,10 @@ public class Server { /** * Execute query that gets only number of all Solr documents indexed * without actually returning the documents. Documents include entire - indexed folders as well as chunks, which are treated as documents. + * indexed folders as well as chunks, which are treated as documents. * * @return int representing number of indexed documents (entire folders - and chunks) + * and chunks) * * @throws SolrServerException */ @@ -1565,11 +1626,11 @@ public class Server { /** * Execute query that gets number of indexed folder chunks for a folder * - * @param contentID folder id of the original folder broken into chunks and - indexed + * @param contentID folder id of the original folder broken into chunks + * and indexed * - * @return int representing number of indexed folder chunks, 0 if there is - no chunks + * @return int representing number of indexed folder chunks, 0 if there + * is no chunks * * @throws SolrServerException */ From 21f2efbdcf5e31e214d0692c5b8a92bfa6d7e5d9 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Fri, 6 Jan 2017 16:05:11 -0500 Subject: [PATCH 05/13] More work --- .../autopsy/keywordsearch/Server.java | 11 ++- .../keywordsearch/SolrSearchService.java | 68 +++++++++++-------- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index e2138affea..b6eefa9d2d 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -746,9 +746,8 @@ public class Server { return indexDir; } - String findLatestIndexDataDir(Case theCase) { + String findLatestVersionIndexDir(List allIndexes) { String indexFolderName = "solr" + CURRENT_SOLR_VERSION + "_schema_" + CURRENT_SOLR_SCHEMA_VERSION; - List allIndexes = findAllIndexDirs(theCase); for (String path : allIndexes) { if (path.contains(indexFolderName)) { return path; @@ -766,7 +765,7 @@ public class Server { * * @return absolute path to index dir */ - private List findAllIndexDirs(Case theCase) { + List findAllIndexDirs(Case theCase) { ArrayList candidateIndexDirs = new ArrayList<>(); // first find all existing "/ModuleOutput/keywordsearch/data/" folders if (theCase.getCaseType() == CaseType.MULTI_USER_CASE) { @@ -808,7 +807,7 @@ public class Server { // analyze possible index folders ArrayList indexDirs = new ArrayList<>(); for (String path : candidateIndexDirs) { - List validIndexPaths = containsValidIndexFolder(path); + List validIndexPaths = containsValidIndexFolders(path); for (String validPath : validIndexPaths) { indexDirs.add(convertPathToUNC(validPath)); // there can be multiple index folders (e.g. current version and "old" version) so keep looking @@ -856,7 +855,7 @@ public class Server { } } - private List containsValidIndexFolder(String path) { + private List containsValidIndexFolders(String path) { /* NOTE: All of the following paths are valid index paths: X:\Case\ModuleOutput\keywordsearch\data\index X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index @@ -926,7 +925,7 @@ public class Server { */ private Core openCore(Case theCase) throws KeywordSearchModuleException { - String indexDir = findLatestIndexDataDir(Case.getCurrentCase()); // ELTODO + // ELTODO String indexDir = findLatestVersionIndexDir(Case.getCurrentCase()); // ELTODO try { if (theCase.getCaseType() == CaseType.SINGLE_USER_CASE) { diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java index f1a0f3ca37..fc6a922f2f 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java @@ -35,6 +35,7 @@ import org.sleuthkit.datamodel.Content; import org.sleuthkit.datamodel.SleuthkitCase; import org.openide.util.NbBundle; import java.net.InetAddress; +import java.util.List; import java.util.MissingResourceException; import org.sleuthkit.autopsy.core.RuntimeProperties; import org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider; @@ -253,40 +254,49 @@ public class SolrSearchService implements KeywordSearchService, AutopsyServicePr // do a case subdirectory search to check if latest index exists // do a case subdirectory search to check for the existence and upgrade status of cores - String indexDir = server.findLatestIndexDataDir(Case.getCurrentCase()); // ELTODO + List indexDirs = server.findAllIndexDirs(Case.getCurrentCase()); // check if index needs upgrade - boolean needsUpgrade = true; + boolean needsUpgrade = false; + String currentVersionIndexDir = server.findLatestVersionIndexDir(indexDirs); + if (currentVersionIndexDir.isEmpty()) { + needsUpgrade = true; + + // ELTODO not sure what to do when there are multiple old indexes. grab the first one? + String oldIndexDir = indexDirs.get(0); - if (needsUpgrade && RuntimeProperties.coreComponentsAreActive()) { - //pop up a message box to indicate the restrictions on adding additional - //text and performing regex searches and give the user the option to decline the upgrade - boolean upgradeDeclined = true; - if (upgradeDeclined) { - throw new AutopsyServiceProviderException("ELTODO"); + if (needsUpgrade && RuntimeProperties.coreComponentsAreActive()) { + //pop up a message box to indicate the restrictions on adding additional + //text and performing regex searches and give the user the option to decline the upgrade + boolean upgradeDeclined = true; + if (upgradeDeclined) { + throw new AutopsyServiceProviderException("ELTODO"); + } } - } - - if (needsUpgrade) { - // ELTODO Check for cancellation at whatever points are feasible - - // Copy the contents (core) of ModuleOutput/keywordsearch/data/index into ModuleOutput/keywordsearch/data/solr6_schema_2.0/index - - // Make a “reference copy” of the configset and place it in ModuleOutput/keywordsearch/data/solr6_schema_2.0/configset - - // Run the upgrade tools on the contents (core) in ModuleOutput/keywordsearch/data/solr6_schema_2.0/index - - // Open the upgraded index - - // execute a test query - boolean success = true; - - if (!success) { - // delete the new directories + + if (needsUpgrade) { + // ELTODO Check for cancellation at whatever points are feasible + + // Copy the contents (core) of ModuleOutput/keywordsearch/data/index into ModuleOutput/keywordsearch/data/solr6_schema_2.0/index + + // Make a “reference copy” of the configset and place it in ModuleOutput/keywordsearch/data/solr6_schema_2.0/configset + + // Run the upgrade tools on the contents (core) in ModuleOutput/keywordsearch/data/solr6_schema_2.0/index + + // Open the upgraded index + + // execute a test query + boolean success = true; + + if (!success) { + // delete the new directories + + // close the upgraded index? + + throw new AutopsyServiceProviderException("ELTODO"); + } - // close the upgraded index? - - throw new AutopsyServiceProviderException("ELTODO"); + // currentVersionIndexDir = upgraded index dir } } } From bb0c3e55ebfc25697a109e6f3e838137eaa178d1 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Fri, 6 Jan 2017 16:14:53 -0500 Subject: [PATCH 06/13] Fixing comments --- .../autopsy/keywordsearch/Server.java | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index b6eefa9d2d..a8edfcd54e 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -5,7 +5,7 @@ * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this folder except in compliance with the License. + * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 @@ -401,7 +401,7 @@ public class Server { ProcessBuilder solrProcessBuilder = new ProcessBuilder(commandLine); solrProcessBuilder.directory(solrFolder); - // Redirect stdout and stderr to folders to prevent blocking. + // Redirect stdout and stderr to files to prevent blocking. Path solrStdoutPath = Paths.get(Places.getUserDirectory().getAbsolutePath(), "var", "log", "solr.log.stdout"); //NON-NLS solrProcessBuilder.redirectOutput(solrStdoutPath.toFile()); @@ -976,11 +976,11 @@ public class Server { } /** - * Execute query that gets only number of all Solr folders indexed without - * actually returning the folders. The result does not include chunks, only - * number of actual folders. + * Execute query that gets only number of all Solr files indexed without + * actually returning the files. The result does not include chunks, only + * number of actual files. * - * @return int representing number of indexed folders + * @return int representing number of indexed files * * @throws KeywordSearchModuleException * @throws NoOpenCoreException @@ -1002,8 +1002,8 @@ public class Server { } /** - * Execute query that gets only number of all Solr folder chunks (not - * logical folders) indexed without actually returning the content. + * Execute query that gets only number of all Solr file chunks (not + * logical files) indexed without actually returning the content. * * @return int representing number of indexed chunks * @@ -1028,9 +1028,9 @@ public class Server { /** * Execute query that gets only number of all Solr documents indexed - * (folders and chunks) without actually returning the documents + * (files and chunks) without actually returning the documents * - * @return int representing number of indexed folders (folders and chunks) + * @return int representing number of indexed files (files and chunks) * * @throws KeywordSearchModuleException * @throws NoOpenCoreException @@ -1052,7 +1052,7 @@ public class Server { } /** - * Return true if the folder is indexed (either as a whole as a chunk) + * Return true if the file is indexed (either as a whole as a chunk) * * @param contentID * @@ -1079,12 +1079,12 @@ public class Server { } /** - * Execute query that gets number of indexed folder chunks for a folder + * Execute query that gets number of indexed file chunks for a file * - * @param fileID folder id of the original folder broken into chunks and + * @param fileID file id of the original file broken into chunks and * indexed * - * @return int representing number of indexed folder chunks, 0 if there is + * @return int representing number of indexed file chunks, 0 if there is * no chunks * * @throws KeywordSearchModuleException @@ -1186,7 +1186,7 @@ public class Server { } /** - * Get the text contents of the given folder as stored in SOLR. + * Get the text contents of the given file as stored in SOLR. * * @param content to get the text for * @@ -1207,7 +1207,7 @@ public class Server { } /** - * Get the text contents of a single chunk for the given folder as stored in + * Get the text contents of a single chunk for the given file as stored in * SOLR. * * @param content to get the text for @@ -1283,10 +1283,10 @@ public class Server { } /** - * Given folder parent id and child chunk ID, return the ID string of the + * Given file parent id and child chunk ID, return the ID string of the * chunk as stored in Solr, e.g. FILEID_CHUNKID * - * @param parentID the parent folder id (id of the source content) + * @param parentID the parent file id (id of the source content) * @param childID the child chunk id * * @return formatted string id @@ -1327,7 +1327,7 @@ public class Server { * exist or loaded if it already exists. */ - // In single user mode, if there is a core.properties folder already, + // In single user mode, if there is a core.properties file already, // we've hit a solr bug. Compensate by deleting it. if (caseType == CaseType.SINGLE_USER_CASE) { Path corePropertiesFile = Paths.get(solrFolder.toString(), SOLR, coreName, CORE_PROPERTIES); @@ -1391,7 +1391,7 @@ public class Server { } /** - * Determines whether or not the index folders folder for a Solr core + * Determines whether or not the index folder for a Solr core * exists. * * @param coreName the name of the core. From d23b78f57c1359a0a99f350161a4a0f61a1c7b77 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Fri, 6 Jan 2017 16:17:42 -0500 Subject: [PATCH 07/13] Fixing comments --- .../autopsy/keywordsearch/Server.java | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index a8edfcd54e..2a5f1e3a06 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -1391,7 +1391,7 @@ public class Server { } /** - * Determines whether or not the index folder for a Solr core + * Determines whether or not the index files folder for a Solr core * exists. * * @param coreName the name of the core. @@ -1430,7 +1430,7 @@ public class Server { this.solrCore = new Builder(currentSolrServer.getBaseURL() + "/" + name).build(); //NON-NLS //TODO test these settings - //solrCore.setSoTimeout(1000 * 60); // socket read timeout, make large enough so can index larger folders + //solrCore.setSoTimeout(1000 * 60); // socket read timeout, make large enough so can index larger files //solrCore.setConnectionTimeout(1000); solrCore.setDefaultMaxConnectionsPerHost(2); solrCore.setMaxTotalConnections(5); @@ -1500,7 +1500,7 @@ public class Server { } /** - * get the text from the content field for the given folder + * get the text from the content field for the given file * * @param contentID * @param chunkID @@ -1527,8 +1527,8 @@ public class Server { if (fieldValues.size() == 1) // The indexed text field for artifacts will only have a single value. { return fieldValues.toArray(new String[0])[0]; - } else // The indexed text for folders has 2 values, the folder name and the folder content. - // We return the folder content value. + } else // The indexed text for files has 2 values, the file name and the file content. + // We return the file content value. { return fieldValues.toArray(new String[0])[1]; } @@ -1560,10 +1560,10 @@ public class Server { } /** - * Execute query that gets only number of all Solr folders (not chunks) - * indexed without actually returning the folders + * Execute query that gets only number of all Solr files (not chunks) + * indexed without actually returning the files * - * @return int representing number of indexed folders (entire folders, + * @return int representing number of indexed files (entire files, * not chunks) * * @throws SolrServerException @@ -1593,7 +1593,7 @@ public class Server { * without actually returning the documents. Documents include entire * indexed folders as well as chunks, which are treated as documents. * - * @return int representing number of indexed documents (entire folders + * @return int representing number of indexed documents (entire files * and chunks) * * @throws SolrServerException @@ -1605,7 +1605,7 @@ public class Server { } /** - * Return true if the folder is indexed (either as a whole as a chunk) + * Return true if the file is indexed (either as a whole as a chunk) * * @param contentID * @@ -1623,12 +1623,12 @@ public class Server { } /** - * Execute query that gets number of indexed folder chunks for a folder + * Execute query that gets number of indexed file chunks for a file * - * @param contentID folder id of the original folder broken into chunks + * @param contentID file id of the original file broken into chunks * and indexed * - * @return int representing number of indexed folder chunks, 0 if there + * @return int representing number of indexed file chunks, 0 if there * is no chunks * * @throws SolrServerException From b5e3639167fb8f608c7a9b98685ce6e86a6782dd Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Fri, 6 Jan 2017 16:18:45 -0500 Subject: [PATCH 08/13] Fixing comments --- .../src/org/sleuthkit/autopsy/keywordsearch/Server.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index 2a5f1e3a06..09643a9b04 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -1591,7 +1591,7 @@ public class Server { /** * Execute query that gets only number of all Solr documents indexed * without actually returning the documents. Documents include entire - * indexed folders as well as chunks, which are treated as documents. + * indexed files as well as chunks, which are treated as documents. * * @return int representing number of indexed documents (entire files * and chunks) From 4299a2326e7261f255099112a477e5df6329a711 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Fri, 6 Jan 2017 16:22:49 -0500 Subject: [PATCH 09/13] More work --- .../sleuthkit/autopsy/keywordsearch/SolrSearchService.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java index fc6a922f2f..5e17cffdb4 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java @@ -247,13 +247,8 @@ public class SolrSearchService implements KeywordSearchService, AutopsyServicePr * Autopsy service providers may not have case-level resources. */ Server server = KeywordSearch.getServer(); - if (server.coreIsOpen() == false) { - throw new AutopsyServiceProviderException("ELTODO"); - } - // do a case subdirectory search to check if latest index exists - - // do a case subdirectory search to check for the existence and upgrade status of cores + // do a case subdirectory search to check for the existence and upgrade status of KWS indexes List indexDirs = server.findAllIndexDirs(Case.getCurrentCase()); // check if index needs upgrade From 75441946a11ea5551ae528a90553e83175c764dd Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Mon, 9 Jan 2017 10:23:03 -0500 Subject: [PATCH 10/13] Minor --- .../src/org/sleuthkit/autopsy/keywordsearch/Server.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index 09643a9b04..99bc4dfdaa 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -843,14 +843,16 @@ public class Server { private static List getAllContentsInFolder(String path) { File directory = new File(path); File[] contents = directory.listFiles(); - // the directory file is not really a directory.. if (contents == null) { + // the directory file is not really a directory.. return Collections.emptyList(); - } // Folder is empty + } else if (contents.length == 0) { + // Folder is empty return Collections.emptyList(); - } // Folder has contents + } else { + // Folder has contents return new ArrayList<>(Arrays.asList(contents)); } } From e3ed9dfc3432b483ab44ad683ca406c43ebaf405 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Mon, 9 Jan 2017 14:08:14 -0500 Subject: [PATCH 11/13] Resolved merge conflicts --- .../sleuthkit/autopsy/keywordsearch/SolrSearchService.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java index 7250cdc5c3..08caa54763 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java @@ -19,21 +19,17 @@ package org.sleuthkit.autopsy.keywordsearch; import java.io.IOException; -import java.net.InetAddress; -import java.util.MissingResourceException; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.openide.util.NbBundle; -<<<<<<< HEAD import java.net.InetAddress; import java.util.List; import java.util.MissingResourceException; import org.sleuthkit.autopsy.core.RuntimeProperties; import org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider; -======= import org.openide.util.lookup.ServiceProvider; +import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService; ->>>>>>> be7bdced9064f55ef50e6a9643f6b8d9c4927fdc import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.TskCoreException; From 09014d34b66b81bf88155e15be94e39ee86763b5 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Mon, 9 Jan 2017 15:58:50 -0500 Subject: [PATCH 12/13] Registering as service providers --- .../keywordsearch/SolrSearchService.java | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java index 08caa54763..1ca9c31ce8 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java @@ -28,6 +28,7 @@ import java.util.MissingResourceException; import org.sleuthkit.autopsy.core.RuntimeProperties; import org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider; import org.openide.util.lookup.ServiceProvider; +import org.openide.util.lookup.ServiceProviders; import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService; import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException; @@ -38,7 +39,10 @@ import org.sleuthkit.datamodel.TskCoreException; * An implementation of the KeywordSearchService interface that uses Solr for * text indexing and search. */ -@ServiceProvider(service = KeywordSearchService.class) +@ServiceProviders(value={ + @ServiceProvider(service=KeywordSearchService.class), + @ServiceProvider(service=AutopsyServiceProvider.class)} +) public class SolrSearchService implements KeywordSearchService, AutopsyServiceProvider { private static final String BAD_IP_ADDRESS_FORMAT = "ioexception occurred when talking to server"; //NON-NLS @@ -152,47 +156,43 @@ public class SolrSearchService implements KeywordSearchService, AutopsyServicePr List indexDirs = server.findAllIndexDirs(Case.getCurrentCase()); // check if index needs upgrade - boolean needsUpgrade = false; String currentVersionIndexDir = server.findLatestVersionIndexDir(indexDirs); if (currentVersionIndexDir.isEmpty()) { - needsUpgrade = true; // ELTODO not sure what to do when there are multiple old indexes. grab the first one? String oldIndexDir = indexDirs.get(0); - - if (needsUpgrade && RuntimeProperties.coreComponentsAreActive()) { + + if (RuntimeProperties.coreComponentsAreActive()) { //pop up a message box to indicate the restrictions on adding additional //text and performing regex searches and give the user the option to decline the upgrade - boolean upgradeDeclined = true; + boolean upgradeDeclined = false; if (upgradeDeclined) { throw new AutopsyServiceProviderException("ELTODO"); } } - if (needsUpgrade) { - // ELTODO Check for cancellation at whatever points are feasible + // ELTODO Check for cancellation at whatever points are feasible + + // Copy the contents (core) of ModuleOutput/keywordsearch/data/index into ModuleOutput/keywordsearch/data/solr6_schema_2.0/index + + // Make a “reference copy” of the configset and place it in ModuleOutput/keywordsearch/data/solr6_schema_2.0/configset + + // Run the upgrade tools on the contents (core) in ModuleOutput/keywordsearch/data/solr6_schema_2.0/index + + // Open the upgraded index + + // execute a test query + + boolean success = true; - // Copy the contents (core) of ModuleOutput/keywordsearch/data/index into ModuleOutput/keywordsearch/data/solr6_schema_2.0/index + if (!success) { + // delete the new directories - // Make a “reference copy” of the configset and place it in ModuleOutput/keywordsearch/data/solr6_schema_2.0/configset - - // Run the upgrade tools on the contents (core) in ModuleOutput/keywordsearch/data/solr6_schema_2.0/index - - // Open the upgraded index - - // execute a test query - boolean success = true; - - if (!success) { - // delete the new directories - - // close the upgraded index? - - throw new AutopsyServiceProviderException("ELTODO"); - } - - // currentVersionIndexDir = upgraded index dir + // close the upgraded index? + throw new AutopsyServiceProviderException("ELTODO"); } + + // currentVersionIndexDir = upgraded index dir } } From 42360c52ca928cf8a5d1a13ddc7742f82c8fb16a Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Mon, 9 Jan 2017 17:07:58 -0500 Subject: [PATCH 13/13] Moved all the logic into new IndexHandling class --- .../autopsy/keywordsearch/IndexHandling.java | 225 ++++++++++++++++++ .../autopsy/keywordsearch/Server.java | 188 +-------------- .../keywordsearch/SolrSearchService.java | 7 +- 3 files changed, 232 insertions(+), 188 deletions(-) create mode 100644 KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexHandling.java diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexHandling.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexHandling.java new file mode 100644 index 0000000000..3ee8e1f118 --- /dev/null +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexHandling.java @@ -0,0 +1,225 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2011-2016 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.keywordsearch; + +import java.io.File; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.sleuthkit.autopsy.casemodule.Case; +import org.sleuthkit.autopsy.coreutils.UNCPathUtilities; + +/** + * This class handles the task of finding KWS index folders and upgrading old + * indexes to the latest supported Solr version. + */ +class IndexHandling { + + private UNCPathUtilities uncPathUtilities = new UNCPathUtilities(); + private static final String MODULE_OUTPUT = "ModuleOutput"; // ELTODO get "ModuleOutput" somehow... + private static final String KWS_OUTPUT_FOLDER_NAME = "keywordsearch"; + private static final String KWS_DATA_FOLDER_NAME = "data"; + private static final String INDEX_FOLDER_NAME = "index"; + private static final String CURRENT_SOLR_VERSION = "6"; + private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.0"; + private static final Pattern INDEX_FOLDER_NAME_PATTERN = Pattern.compile("^solr\\d{1,2}_schema_\\d{1,2}.\\d{1,2}$"); + + + static String getCurrentSolrVersion() { + return CURRENT_SOLR_VERSION; + } + + static String getCurrentSchemaVersion() { + return CURRENT_SOLR_SCHEMA_VERSION; + } + + static String findLatestVersionIndexDir(List allIndexes) { + String indexFolderName = "solr" + CURRENT_SOLR_VERSION + "_schema_" + CURRENT_SOLR_SCHEMA_VERSION; + for (String path : allIndexes) { + if (path.contains(indexFolderName)) { + return path; + } + } + return ""; + } + + /** + * Find index directory location for the case. This is done via subdirectory + * search of all existing "ModuleOutput/node_name/keywordsearch/data/" + * folders. + * + * @param theCase the case to get index dir for + * + * @return absolute path to index dir + */ + static List findAllIndexDirs(Case theCase) { + ArrayList candidateIndexDirs = new ArrayList<>(); + // first find all existing "/ModuleOutput/keywordsearch/data/" folders + if (theCase.getCaseType() == Case.CaseType.MULTI_USER_CASE) { + // multi user cases contain a subfolder for each node that participated in case ingest or review. + // Any one (but only one!) of those subfolders may contain the actual index. + /* NOTE: All of the following paths are valid multi-user index paths: + X:\Case\ingest1\ModuleOutput\keywordsearch\data\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index + */ + + // create a list of all sub-directories + List contents = getAllContentsInFolder(theCase.getCaseDirectory()); + + // ELTODO decipher "ModuleOutput" from path + + // scan all topLevelOutputDir subfolders for presence of non-empty "/ModuleOutput/keywordsearch/data/" folder + for (File item : contents) { + File path = Paths.get(item.getAbsolutePath(), MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toFile(); //NON-NLS + // must be a non-empty directory + if (path.exists() && path.isDirectory()) { + candidateIndexDirs.add(path.toString()); + } + } + } else { + // single user case + /* NOTE: All of the following paths are valid single user index paths: + X:\Case\ModuleOutput\keywordsearch\data\index + X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index + X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index + X:\Case\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index + */ + File path = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toFile(); //NON-NLS + // must be a non-empty directory + if (path.exists() && path.isDirectory()) { + candidateIndexDirs.add(path.toString()); + } + } + + // analyze possible index folders + ArrayList indexDirs = new ArrayList<>(); + for (String path : candidateIndexDirs) { + List validIndexPaths = containsValidIndexFolders(path); + for (String validPath : validIndexPaths) { + indexDirs.add(validPath); + // ELTODO indexDirs.add(convertPathToUNC(validPath)); + // there can be multiple index folders (e.g. current version and "old" version) so keep looking + } + } + return indexDirs; + } + + String convertPathToUNC(String indexDir) { + // ELTODO do we need to do this when searching for old index? + if (uncPathUtilities == null) { + return indexDir; + } + // if we can check for UNC paths, do so, otherwise just return the indexDir + String result = uncPathUtilities.mappedDriveToUNC(indexDir); + if (result == null) { + uncPathUtilities.rescanDrives(); + result = uncPathUtilities.mappedDriveToUNC(indexDir); + } + if (result == null) { + return indexDir; + } + return result; + } + + /** + * Returns a list of all contents in the folder of interest. + * + * @param path Absolute path of the folder of interest + * + * @return List of all contents in the folder of interest + */ + private static List getAllContentsInFolder(String path) { + File directory = new File(path); + File[] contents = directory.listFiles(); + if (contents == null) { + // the directory file is not really a directory.. + return Collections.emptyList(); + } + else if (contents.length == 0) { + // Folder is empty + return Collections.emptyList(); + } + else { + // Folder has contents + return new ArrayList<>(Arrays.asList(contents)); + } + } + + private static List containsValidIndexFolders(String path) { + /* NOTE: All of the following paths are valid index paths: + X:\Case\ModuleOutput\keywordsearch\data\index + X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index + X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index + X:\Case\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index + X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index + */ + + List indexFolders = new ArrayList<>(); + List contents = getAllContentsInFolder(path); + // scan the folder for presence of non-empty "index" folder + for (File item : contents) { + // scan all subfolders for presence of non-empty "index" folder + if (isNonEmptyIndexFolder(item)) { + indexFolders.add(item.getAbsolutePath()); + // keep looking as there may be more index folders + continue; + } + + // check if the folder matches "solrX_schema_Y" patern + if (matchesIndexFolderNameStandard(item.getName())) { + File nextLevelIndexFolder = Paths.get(item.getAbsolutePath(), INDEX_FOLDER_NAME).toFile(); + // look for "index" sub-folder one level deeper + if (isNonEmptyIndexFolder(nextLevelIndexFolder)) { + indexFolders.add(nextLevelIndexFolder.getAbsolutePath()); + // keep looking as there may be more index folders + } + } + } + return indexFolders; + } + + private static boolean isNonEmptyIndexFolder(File path) { + if (path.exists() && path.isDirectory() && path.getName().equals(INDEX_FOLDER_NAME) && path.listFiles().length > 0) { + return true; + } + return false; + } + + /** + * Checks whether a name matches index folder name standard + * + * @param inputString The string to check. + * + * @return True or false. + */ + public static boolean matchesIndexFolderNameStandard(String inputString) { + Matcher m = INDEX_FOLDER_NAME_PATTERN.matcher(inputString); + return m.find(); + } + +} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index 0f8ffb2c65..5d675edfa0 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -39,12 +39,9 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.List; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.logging.Level; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import javax.swing.AbstractAction; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrRequest; @@ -72,7 +69,6 @@ import org.sleuthkit.autopsy.coreutils.ModuleSettings; import org.sleuthkit.autopsy.coreutils.PlatformUtil; import org.sleuthkit.autopsy.coreutils.UNCPathUtilities; import org.sleuthkit.datamodel.Content; -//ELTODO import static org.sleuthkit.autopsy.casemodule.Case.MODULE_FOLDER; /** * Handles management of a either a local or centralized Solr server and its @@ -189,13 +185,6 @@ public class Server { private UNCPathUtilities uncPathUtilities = null; private static final String SOLR = "solr"; private static final String CORE_PROPERTIES = "core.properties"; - private static final String MODULE_OUTPUT = "ModuleOutput"; // ELTODO get "ModuleOutput" somehow... - private static final String KWS_OUTPUT_FOLDER_NAME = "keywordsearch"; - private static final String KWS_DATA_FOLDER_NAME = "data"; - private static final String INDEX_FOLDER_NAME = "index"; - private static final String CURRENT_SOLR_VERSION = "6"; - private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.0"; - private static final Pattern INDEX_FOLDER_NAME_PATTERN = Pattern.compile("^solr\\d{1,2}_schema_\\d{1,2}.\\d{1,2}$"); public enum CORE_EVT_STATES { @@ -312,14 +301,6 @@ public class Server { return currentSolrStopPort; } - String getCurrentSolrVersion() { - return CURRENT_SOLR_VERSION; - } - - String getCurrentSchemaVersion() { - return CURRENT_SOLR_SCHEMA_VERSION; - } - /** * Helper threads to handle stderr/stdout from Solr process */ @@ -730,6 +711,7 @@ public class Server { * @return absolute path to index dir */ String geCoreDataDirPath(Case theCase) { + // ELTODO this method is going to be removed String indexDir = theCase.getModuleDirectory() + File.separator + "keywordsearch" + File.separator + "data"; //NON-NLS if (uncPathUtilities != null) { // if we can check for UNC paths, do so, otherwise just return the indexDir @@ -746,171 +728,6 @@ public class Server { return indexDir; } - String findLatestVersionIndexDir(List allIndexes) { - String indexFolderName = "solr" + CURRENT_SOLR_VERSION + "_schema_" + CURRENT_SOLR_SCHEMA_VERSION; - for (String path : allIndexes) { - if (path.contains(indexFolderName)) { - return path; - } - } - return ""; - } - - /** - * Find index directory location for the case. This is done via subdirectory - * search of all existing "ModuleOutput/node_name/keywordsearch/data/" - * folders. - * - * @param theCase the case to get index dir for - * - * @return absolute path to index dir - */ - List findAllIndexDirs(Case theCase) { - ArrayList candidateIndexDirs = new ArrayList<>(); - // first find all existing "/ModuleOutput/keywordsearch/data/" folders - if (theCase.getCaseType() == CaseType.MULTI_USER_CASE) { - // multi user cases contain a subfolder for each node that participated in case ingest or review. - // Any one (but only one!) of those subfolders may contain the actual index. - /* NOTE: All of the following paths are valid multi-user index paths: - X:\Case\ingest1\ModuleOutput\keywordsearch\data\index - X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index - X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index - X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index - */ - - // create a list of all sub-directories - List contents = getAllContentsInFolder(theCase.getCaseDirectory()); - - // scan all topLevelOutputDir subfolders for presense of non-empty "/ModuleOutput/keywordsearch/data/" folder - for (File item : contents) { - File path = Paths.get(item.getAbsolutePath(), MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toFile(); //NON-NLS - // must be a non-empty directory - if (path.exists() && path.isDirectory()) { - candidateIndexDirs.add(path.toString()); - } - } - } else { - // single user case - /* NOTE: All of the following paths are valid single user index paths: - X:\Case\ModuleOutput\keywordsearch\data\index - X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index - X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index - X:\Case\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index - */ - File path = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toFile(); //NON-NLS - // must be a non-empty directory - if (path.exists() && path.isDirectory()) { - candidateIndexDirs.add(path.toString()); - } - } - - // analyze possible index folders - ArrayList indexDirs = new ArrayList<>(); - for (String path : candidateIndexDirs) { - List validIndexPaths = containsValidIndexFolders(path); - for (String validPath : validIndexPaths) { - indexDirs.add(convertPathToUNC(validPath)); - // there can be multiple index folders (e.g. current version and "old" version) so keep looking - } - } - return indexDirs; - } - - String convertPathToUNC(String indexDir) { - // ELTODO do we need to do this when searching for old index? - if (uncPathUtilities == null) { - return indexDir; - } - // if we can check for UNC paths, do so, otherwise just return the indexDir - String result = uncPathUtilities.mappedDriveToUNC(indexDir); - if (result == null) { - uncPathUtilities.rescanDrives(); - result = uncPathUtilities.mappedDriveToUNC(indexDir); - } - if (result == null) { - return indexDir; - } - return result; - } - - /** - * Returns a list of all contents in the folder of interest. - * - * @param path Absolute path of the folder of interest - * - * @return List of all contents in the folder of interest - */ - private static List getAllContentsInFolder(String path) { - File directory = new File(path); - File[] contents = directory.listFiles(); - if (contents == null) { - // the directory file is not really a directory.. - return Collections.emptyList(); - } - else if (contents.length == 0) { - // Folder is empty - return Collections.emptyList(); - } - else { - // Folder has contents - return new ArrayList<>(Arrays.asList(contents)); - } - } - - private List containsValidIndexFolders(String path) { - /* NOTE: All of the following paths are valid index paths: - X:\Case\ModuleOutput\keywordsearch\data\index - X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index - X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index - X:\Case\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index - X:\Case\ingest4\ModuleOutput\keywordsearch\data\index - X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index - X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index - X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index - */ - - List indexFolders = new ArrayList<>(); - List contents = getAllContentsInFolder(path); - // scan the folder for presense of non-empty "index" folder - for (File item : contents) { - // scan all subfolders for presense of non-empty "index" folder - if (isNonEmptyIndexFolder(item)) { - indexFolders.add(item.getAbsolutePath()); - // keep looking as there may be more index folders - continue; - } - - // check if the folder matches "solrX_schema_Y" patern - if (matchesIndexFolderNameStandard(item.getName())) { - File nextLevelIndexFolder = Paths.get(item.getAbsolutePath(), INDEX_FOLDER_NAME).toFile(); - // look for "index" sub-folder one level deeper - if (isNonEmptyIndexFolder(nextLevelIndexFolder)) { - indexFolders.add(nextLevelIndexFolder.getAbsolutePath()); - // keep looking as there may be more index folders - } - } - } - return indexFolders; - } - - private boolean isNonEmptyIndexFolder(File path) { - if (path.exists() && path.isDirectory() && path.getName().equals(INDEX_FOLDER_NAME) && path.listFiles().length > 0) { - return true; - } - return false; - } - - /** - * Checks whether a name matches index folder name standard - * - * @param inputString The string to check. - * - * @return True or false. - */ - public static boolean matchesIndexFolderNameStandard(String inputString) { - Matcher m = INDEX_FOLDER_NAME_PATTERN.matcher(inputString); - return m.find(); - } /** * ** end single-case specific methods *** @@ -927,7 +744,7 @@ public class Server { */ private Core openCore(Case theCase) throws KeywordSearchModuleException { - // ELTODO String indexDir = findLatestVersionIndexDir(Case.getCurrentCase()); // ELTODO + // ELTODO REMOVE String indexDir = findLatestVersionIndexDir(Case.getCurrentCase()); // ELTODO try { if (theCase.getCaseType() == CaseType.SINGLE_USER_CASE) { @@ -1346,6 +1163,7 @@ public class Server { throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.openCore.exception.noIndexDir.msg")); } + // ELTODO set solr and schema version of the core that is being loaded. Make that available via API. return new Core(coreName, caseType); } catch (SolrServerException | SolrException | IOException ex) { diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java index 1ca9c31ce8..6e60eccf99 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java @@ -150,13 +150,12 @@ public class SolrSearchService implements KeywordSearchService, AutopsyServicePr /* * Autopsy service providers may not have case-level resources. */ - Server server = KeywordSearch.getServer(); // do a case subdirectory search to check for the existence and upgrade status of KWS indexes - List indexDirs = server.findAllIndexDirs(Case.getCurrentCase()); + List indexDirs = IndexHandling.findAllIndexDirs(Case.getCurrentCase()); // check if index needs upgrade - String currentVersionIndexDir = server.findLatestVersionIndexDir(indexDirs); + String currentVersionIndexDir = IndexHandling.findLatestVersionIndexDir(indexDirs); if (currentVersionIndexDir.isEmpty()) { // ELTODO not sure what to do when there are multiple old indexes. grab the first one? @@ -177,6 +176,8 @@ public class SolrSearchService implements KeywordSearchService, AutopsyServicePr // Make a “reference copy” of the configset and place it in ModuleOutput/keywordsearch/data/solr6_schema_2.0/configset + // convert path to UNC path + // Run the upgrade tools on the contents (core) in ModuleOutput/keywordsearch/data/solr6_schema_2.0/index // Open the upgraded index