Merge pull request #2457 from eugene7646/solr65

Solr 4 to 6 index upgrade
This commit is contained in:
Richard Cordovano 2017-01-10 09:47:50 -05:00 committed by GitHub
commit 018a360b05
4 changed files with 340 additions and 23 deletions

View File

@ -117,7 +117,7 @@ public interface AutopsyServiceProvider {
*
* @param message Exception message.
*/
AutopsyServiceProviderException(String message) {
public AutopsyServiceProviderException(String message) {
super(message);
}
@ -128,7 +128,7 @@ public interface AutopsyServiceProvider {
* @param message Exception message.
* @param throwable Exception cause.
*/
AutopsyServiceProviderException(String message, Throwable throwable) {
public AutopsyServiceProviderException(String message, Throwable throwable) {
super(message, throwable);
}
}

View File

@ -0,0 +1,225 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2016 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import java.io.File;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.coreutils.UNCPathUtilities;
/**
* This class handles the task of finding KWS index folders and upgrading old
* indexes to the latest supported Solr version.
*/
class IndexHandling {
private UNCPathUtilities uncPathUtilities = new UNCPathUtilities();
private static final String MODULE_OUTPUT = "ModuleOutput"; // ELTODO get "ModuleOutput" somehow...
private static final String KWS_OUTPUT_FOLDER_NAME = "keywordsearch";
private static final String KWS_DATA_FOLDER_NAME = "data";
private static final String INDEX_FOLDER_NAME = "index";
private static final String CURRENT_SOLR_VERSION = "6";
private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.0";
private static final Pattern INDEX_FOLDER_NAME_PATTERN = Pattern.compile("^solr\\d{1,2}_schema_\\d{1,2}.\\d{1,2}$");
static String getCurrentSolrVersion() {
return CURRENT_SOLR_VERSION;
}
static String getCurrentSchemaVersion() {
return CURRENT_SOLR_SCHEMA_VERSION;
}
static String findLatestVersionIndexDir(List<String> allIndexes) {
String indexFolderName = "solr" + CURRENT_SOLR_VERSION + "_schema_" + CURRENT_SOLR_SCHEMA_VERSION;
for (String path : allIndexes) {
if (path.contains(indexFolderName)) {
return path;
}
}
return "";
}
/**
* Find index directory location for the case. This is done via subdirectory
* search of all existing "ModuleOutput/node_name/keywordsearch/data/"
* folders.
*
* @param theCase the case to get index dir for
*
* @return absolute path to index dir
*/
static List<String> findAllIndexDirs(Case theCase) {
ArrayList<String> candidateIndexDirs = new ArrayList<>();
// first find all existing "/ModuleOutput/keywordsearch/data/" folders
if (theCase.getCaseType() == Case.CaseType.MULTI_USER_CASE) {
// multi user cases contain a subfolder for each node that participated in case ingest or review.
// Any one (but only one!) of those subfolders may contain the actual index.
/* NOTE: All of the following paths are valid multi-user index paths:
X:\Case\ingest1\ModuleOutput\keywordsearch\data\index
X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index
X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index
X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index
*/
// create a list of all sub-directories
List<File> contents = getAllContentsInFolder(theCase.getCaseDirectory());
// ELTODO decipher "ModuleOutput" from path
// scan all topLevelOutputDir subfolders for presence of non-empty "/ModuleOutput/keywordsearch/data/" folder
for (File item : contents) {
File path = Paths.get(item.getAbsolutePath(), MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toFile(); //NON-NLS
// must be a non-empty directory
if (path.exists() && path.isDirectory()) {
candidateIndexDirs.add(path.toString());
}
}
} else {
// single user case
/* NOTE: All of the following paths are valid single user index paths:
X:\Case\ModuleOutput\keywordsearch\data\index
X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index
X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index
X:\Case\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index
*/
File path = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toFile(); //NON-NLS
// must be a non-empty directory
if (path.exists() && path.isDirectory()) {
candidateIndexDirs.add(path.toString());
}
}
// analyze possible index folders
ArrayList<String> indexDirs = new ArrayList<>();
for (String path : candidateIndexDirs) {
List<String> validIndexPaths = containsValidIndexFolders(path);
for (String validPath : validIndexPaths) {
indexDirs.add(validPath);
// ELTODO indexDirs.add(convertPathToUNC(validPath));
// there can be multiple index folders (e.g. current version and "old" version) so keep looking
}
}
return indexDirs;
}
String convertPathToUNC(String indexDir) {
// ELTODO do we need to do this when searching for old index?
if (uncPathUtilities == null) {
return indexDir;
}
// if we can check for UNC paths, do so, otherwise just return the indexDir
String result = uncPathUtilities.mappedDriveToUNC(indexDir);
if (result == null) {
uncPathUtilities.rescanDrives();
result = uncPathUtilities.mappedDriveToUNC(indexDir);
}
if (result == null) {
return indexDir;
}
return result;
}
/**
* Returns a list of all contents in the folder of interest.
*
* @param path Absolute path of the folder of interest
*
* @return List of all contents in the folder of interest
*/
private static List<File> getAllContentsInFolder(String path) {
File directory = new File(path);
File[] contents = directory.listFiles();
if (contents == null) {
// the directory file is not really a directory..
return Collections.emptyList();
}
else if (contents.length == 0) {
// Folder is empty
return Collections.emptyList();
}
else {
// Folder has contents
return new ArrayList<>(Arrays.asList(contents));
}
}
private static List<String> containsValidIndexFolders(String path) {
/* NOTE: All of the following paths are valid index paths:
X:\Case\ModuleOutput\keywordsearch\data\index
X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index
X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index
X:\Case\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index
X:\Case\ingest4\ModuleOutput\keywordsearch\data\index
X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index
X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index
X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index
*/
List<String> indexFolders = new ArrayList<>();
List<File> contents = getAllContentsInFolder(path);
// scan the folder for presence of non-empty "index" folder
for (File item : contents) {
// scan all subfolders for presence of non-empty "index" folder
if (isNonEmptyIndexFolder(item)) {
indexFolders.add(item.getAbsolutePath());
// keep looking as there may be more index folders
continue;
}
// check if the folder matches "solrX_schema_Y" patern
if (matchesIndexFolderNameStandard(item.getName())) {
File nextLevelIndexFolder = Paths.get(item.getAbsolutePath(), INDEX_FOLDER_NAME).toFile();
// look for "index" sub-folder one level deeper
if (isNonEmptyIndexFolder(nextLevelIndexFolder)) {
indexFolders.add(nextLevelIndexFolder.getAbsolutePath());
// keep looking as there may be more index folders
}
}
}
return indexFolders;
}
private static boolean isNonEmptyIndexFolder(File path) {
if (path.exists() && path.isDirectory() && path.getName().equals(INDEX_FOLDER_NAME) && path.listFiles().length > 0) {
return true;
}
return false;
}
/**
* Checks whether a name matches index folder name standard
*
* @param inputString The string to check.
*
* @return True or false.
*/
public static boolean matchesIndexFolderNameStandard(String inputString) {
Matcher m = INDEX_FOLDER_NAME_PATTERN.matcher(inputString);
return m.find();
}
}

View File

@ -369,7 +369,9 @@ public class Server {
* Run a Solr command with the given arguments.
*
* @param solrArguments Command line arguments to pass to the Solr command.
*
* @return
*
* @throws IOException
*/
private Process runSolrCommand(List<String> solrArguments) throws IOException {
@ -602,7 +604,7 @@ public class Server {
* request.
*
* @return false if the request failed with a connection error, otherwise
* true
* true
*/
synchronized boolean isRunning() throws KeywordSearchModuleException {
try {
@ -709,6 +711,7 @@ public class Server {
* @return absolute path to index dir
*/
String geCoreDataDirPath(Case theCase) {
// ELTODO this method is going to be removed
String indexDir = theCase.getModuleDirectory() + File.separator + "keywordsearch" + File.separator + "data"; //NON-NLS
if (uncPathUtilities != null) {
// if we can check for UNC paths, do so, otherwise just return the indexDir
@ -725,6 +728,7 @@ public class Server {
return indexDir;
}
/**
* ** end single-case specific methods ***
*/
@ -739,6 +743,9 @@ public class Server {
* creating/opening the core.
*/
private Core openCore(Case theCase) throws KeywordSearchModuleException {
// ELTODO REMOVE String indexDir = findLatestVersionIndexDir(Case.getCurrentCase()); // ELTODO
try {
if (theCase.getCaseType() == CaseType.SINGLE_USER_CASE) {
currentSolrServer = this.localSolrServer;
@ -814,8 +821,8 @@ public class Server {
}
/**
* Execute query that gets only number of all Solr file chunks (not logical
* files) indexed without actually returning the content.
* Execute query that gets only number of all Solr file chunks (not
* logical files) indexed without actually returning the content.
*
* @return int representing number of indexed chunks
*
@ -839,8 +846,8 @@ public class Server {
}
/**
* Execute query that gets only number of all Solr documents indexed (files
* and chunks) without actually returning the documents
* Execute query that gets only number of all Solr documents indexed
* (files and chunks) without actually returning the documents
*
* @return int representing number of indexed files (files and chunks)
*
@ -893,10 +900,11 @@ public class Server {
/**
* Execute query that gets number of indexed file chunks for a file
*
* @param fileID file id of the original file broken into chunks and indexed
* @param fileID file id of the original file broken into chunks and
* indexed
*
* @return int representing number of indexed file chunks, 0 if there is no
* chunks
* @return int representing number of indexed file chunks, 0 if there is
* no chunks
*
* @throws KeywordSearchModuleException
* @throws NoOpenCoreException
@ -1155,6 +1163,7 @@ public class Server {
throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.openCore.exception.noIndexDir.msg"));
}
// ELTODO set solr and schema version of the core that is being loaded. Make that available via API.
return new Core(coreName, caseType);
} catch (SolrServerException | SolrException | IOException ex) {
@ -1193,7 +1202,8 @@ public class Server {
}
/**
* Determines whether or not the index files folder for a Solr core exists.
* Determines whether or not the index files folder for a Solr core
* exists.
*
* @param coreName the name of the core.
*
@ -1364,8 +1374,8 @@ public class Server {
* Execute query that gets only number of all Solr files (not chunks)
* indexed without actually returning the files
*
* @return int representing number of indexed files (entire files, not
* chunks)
* @return int representing number of indexed files (entire files,
* not chunks)
*
* @throws SolrServerException
*/
@ -1374,8 +1384,9 @@ public class Server {
}
/**
* Execute query that gets only number of all chunks (not logical files,
* or all documents) indexed without actually returning the content
* Execute query that gets only number of all chunks (not logical
* folders, or all documents) indexed without actually returning the
* content
*
* @return int representing number of indexed chunks
*
@ -1425,11 +1436,11 @@ public class Server {
/**
* Execute query that gets number of indexed file chunks for a file
*
* @param contentID file id of the original file broken into chunks and
* indexed
* @param contentID file id of the original file broken into chunks
* and indexed
*
* @return int representing number of indexed file chunks, 0 if there is
* no chunks
* @return int representing number of indexed file chunks, 0 if there
* is no chunks
*
* @throws SolrServerException
*/

View File

@ -19,12 +19,17 @@
package org.sleuthkit.autopsy.keywordsearch;
import java.io.IOException;
import java.net.InetAddress;
import java.util.MissingResourceException;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.openide.util.NbBundle;
import java.net.InetAddress;
import java.util.List;
import java.util.MissingResourceException;
import org.sleuthkit.autopsy.core.RuntimeProperties;
import org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider;
import org.openide.util.lookup.ServiceProvider;
import org.openide.util.lookup.ServiceProviders;
import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
import org.sleuthkit.datamodel.BlackboardArtifact;
@ -34,8 +39,11 @@ import org.sleuthkit.datamodel.TskCoreException;
* An implementation of the KeywordSearchService interface that uses Solr for
* text indexing and search.
*/
@ServiceProvider(service = KeywordSearchService.class)
public class SolrSearchService implements KeywordSearchService {
@ServiceProviders(value={
@ServiceProvider(service=KeywordSearchService.class),
@ServiceProvider(service=AutopsyServiceProvider.class)}
)
public class SolrSearchService implements KeywordSearchService, AutopsyServiceProvider {
private static final String BAD_IP_ADDRESS_FORMAT = "ioexception occurred when talking to server"; //NON-NLS
private static final String SERVER_REFUSED_CONNECTION = "server refused connection"; //NON-NLS
@ -129,4 +137,77 @@ public class SolrSearchService implements KeywordSearchService {
@Override
public void close() throws IOException {
}
/**
*
* @param context
*
* @throws
* org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider.AutopsyServiceProviderException
*/
@Override
public void openCaseResources(Context context) throws AutopsyServiceProviderException {
/*
* Autopsy service providers may not have case-level resources.
*/
// do a case subdirectory search to check for the existence and upgrade status of KWS indexes
List<String> indexDirs = IndexHandling.findAllIndexDirs(Case.getCurrentCase());
// check if index needs upgrade
String currentVersionIndexDir = IndexHandling.findLatestVersionIndexDir(indexDirs);
if (currentVersionIndexDir.isEmpty()) {
// ELTODO not sure what to do when there are multiple old indexes. grab the first one?
String oldIndexDir = indexDirs.get(0);
if (RuntimeProperties.coreComponentsAreActive()) {
//pop up a message box to indicate the restrictions on adding additional
//text and performing regex searches and give the user the option to decline the upgrade
boolean upgradeDeclined = false;
if (upgradeDeclined) {
throw new AutopsyServiceProviderException("ELTODO");
}
}
// ELTODO Check for cancellation at whatever points are feasible
// Copy the contents (core) of ModuleOutput/keywordsearch/data/index into ModuleOutput/keywordsearch/data/solr6_schema_2.0/index
// Make a reference copy of the configset and place it in ModuleOutput/keywordsearch/data/solr6_schema_2.0/configset
// convert path to UNC path
// Run the upgrade tools on the contents (core) in ModuleOutput/keywordsearch/data/solr6_schema_2.0/index
// Open the upgraded index
// execute a test query
boolean success = true;
if (!success) {
// delete the new directories
// close the upgraded index?
throw new AutopsyServiceProviderException("ELTODO");
}
// currentVersionIndexDir = upgraded index dir
}
}
/**
*
* @param context
*
* @throws
* org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider.AutopsyServiceProviderException
*/
@Override
public void closeCaseResources(Context context) throws AutopsyServiceProviderException {
/*
* Autopsy service providers may not have case-level resources.
*/
}
}