Merge pull request #2457 from eugene7646/solr65

Solr 4 to 6 index upgrade
2025-07-17 10:17:41 +00:00 · 2017-01-10 09:47:50 -05:00 · 2017-01-10 09:47:50 -05:00 · 018a360b05
commit 018a360b05
parent 08148dde7b f01fc8c24f
4 changed files with 340 additions and 23 deletions
--- a/Core/src/org/sleuthkit/autopsy/corecomponentinterfaces/AutopsyServiceProvider.java
+++ b/Core/src/org/sleuthkit/autopsy/corecomponentinterfaces/AutopsyServiceProvider.java
@ -117,7 +117,7 @@ public interface AutopsyServiceProvider {
         *
         * @param message Exception message.
         */
-        AutopsyServiceProviderException(String message) {
+        public AutopsyServiceProviderException(String message) {
            super(message);
        }

@ -128,7 +128,7 @@ public interface AutopsyServiceProvider {
         * @param message   Exception message.
         * @param throwable Exception cause.
         */
-        AutopsyServiceProviderException(String message, Throwable throwable) {
+        public AutopsyServiceProviderException(String message, Throwable throwable) {
            super(message, throwable);
        }
    }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexHandling.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexHandling.java
@ -0,0 +1,225 @@
+/*
+ * Autopsy Forensic Browser
+ *
+ * Copyright 2011-2016 Basis Technology Corp.
+ * Contact: carrier <at> sleuthkit <dot> org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.sleuthkit.autopsy.keywordsearch;
+
+import java.io.File;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.sleuthkit.autopsy.casemodule.Case;
+import org.sleuthkit.autopsy.coreutils.UNCPathUtilities;
+
+/**
+ * This class handles the task of finding KWS index folders and upgrading old
+ * indexes to the latest supported Solr version. 
+ */
+class IndexHandling {
+    
+    private UNCPathUtilities uncPathUtilities = new UNCPathUtilities();
+    private static final String MODULE_OUTPUT = "ModuleOutput"; // ELTODO get "ModuleOutput" somehow...
+    private static final String KWS_OUTPUT_FOLDER_NAME = "keywordsearch";
+    private static final String KWS_DATA_FOLDER_NAME = "data";
+    private static final String INDEX_FOLDER_NAME = "index";
+    private static final String CURRENT_SOLR_VERSION = "6";
+    private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.0";
+    private static final Pattern INDEX_FOLDER_NAME_PATTERN = Pattern.compile("^solr\\d{1,2}_schema_\\d{1,2}.\\d{1,2}$");    
+    
+    
+    static String getCurrentSolrVersion() {
+        return CURRENT_SOLR_VERSION;
+    }
+
+    static String getCurrentSchemaVersion() {
+        return CURRENT_SOLR_SCHEMA_VERSION;
+    }
+    
+    static String findLatestVersionIndexDir(List<String> allIndexes) {
+        String indexFolderName = "solr" + CURRENT_SOLR_VERSION + "_schema_" + CURRENT_SOLR_SCHEMA_VERSION;
+        for (String path : allIndexes) {
+            if (path.contains(indexFolderName)) {
+                return path;
+            }
+        }
+        return "";
+    }
+
+    /**
+     * Find index directory location for the case. This is done via subdirectory
+     * search of all existing "ModuleOutput/node_name/keywordsearch/data/"
+     * folders.
+     *
+     * @param theCase the case to get index dir for
+     *
+     * @return absolute path to index dir
+     */
+    static List<String> findAllIndexDirs(Case theCase) {
+        ArrayList<String> candidateIndexDirs = new ArrayList<>();
+        // first find all existing "/ModuleOutput/keywordsearch/data/" folders
+        if (theCase.getCaseType() == Case.CaseType.MULTI_USER_CASE) {
+            // multi user cases contain a subfolder for each node that participated in case ingest or review.
+            // Any one (but only one!) of those subfolders may contain the actual index.
+            /* NOTE: All of the following paths are valid multi-user index paths:
+            X:\Case\ingest1\ModuleOutput\keywordsearch\data\index
+            X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index
+            X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index
+            X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index
+             */
+
+            // create a list of all sub-directories
+            List<File> contents = getAllContentsInFolder(theCase.getCaseDirectory());
+            
+            // ELTODO decipher "ModuleOutput" from path
+
+            // scan all topLevelOutputDir subfolders for presence of non-empty "/ModuleOutput/keywordsearch/data/" folder
+            for (File item : contents) {
+                File path = Paths.get(item.getAbsolutePath(), MODULE_OUTPUT, KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toFile(); //NON-NLS
+                // must be a non-empty directory
+                if (path.exists() && path.isDirectory()) {
+                    candidateIndexDirs.add(path.toString());
+                }
+            }
+        } else {
+            // single user case
+            /* NOTE: All of the following paths are valid single user index paths:
+            X:\Case\ModuleOutput\keywordsearch\data\index
+            X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index
+            X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index
+            X:\Case\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index
+             */
+            File path = Paths.get(theCase.getModuleDirectory(), KWS_OUTPUT_FOLDER_NAME, KWS_DATA_FOLDER_NAME).toFile(); //NON-NLS
+            // must be a non-empty directory
+            if (path.exists() && path.isDirectory()) {
+                candidateIndexDirs.add(path.toString());
+            }
+        }
+        
+        // analyze possible index folders
+        ArrayList<String> indexDirs = new ArrayList<>();
+        for (String path : candidateIndexDirs) {
+            List<String> validIndexPaths = containsValidIndexFolders(path);
+            for (String validPath : validIndexPaths) {
+                indexDirs.add(validPath);
+                // ELTODO indexDirs.add(convertPathToUNC(validPath));
+                // there can be multiple index folders (e.g. current version and "old" version) so keep looking
+            }
+        }
+        return indexDirs;
+    }
+    
+    String convertPathToUNC(String indexDir) {
+        // ELTODO do we need to do this when searching for old index?
+        if (uncPathUtilities == null) {
+            return indexDir;
+        }
+        // if we can check for UNC paths, do so, otherwise just return the indexDir
+        String result = uncPathUtilities.mappedDriveToUNC(indexDir);
+        if (result == null) {
+            uncPathUtilities.rescanDrives();
+            result = uncPathUtilities.mappedDriveToUNC(indexDir);
+        }
+        if (result == null) {
+            return indexDir;
+        }
+        return result;
+    }
+
+    /**
+     * Returns a list of all contents in the folder of interest.
+     *
+     * @param path Absolute path of the folder of interest
+     *
+     * @return List of all contents in the folder of interest
+     */
+    private static List<File> getAllContentsInFolder(String path) {
+        File directory = new File(path);
+        File[] contents = directory.listFiles();
+        if (contents == null) {
+            // the directory file is not really a directory..
+            return Collections.emptyList();
+        }
+        else if (contents.length == 0) {
+            // Folder is empty
+            return Collections.emptyList();
+        }
+        else {
+            // Folder has contents
+            return new ArrayList<>(Arrays.asList(contents));
+        }
+    }
+
+    private static List<String> containsValidIndexFolders(String path) {
+        /* NOTE: All of the following paths are valid index paths:
+        X:\Case\ModuleOutput\keywordsearch\data\index
+        X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index
+        X:\Case\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index
+        X:\Case\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index
+        X:\Case\ingest4\ModuleOutput\keywordsearch\data\index
+        X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_2.0\index
+        X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr6_schema_1.8\index
+        X:\Case\ingest4\ModuleOutput\keywordsearch\data\solr7_schema_2.0\index
+         */
+
+        List<String> indexFolders = new ArrayList<>();
+        List<File> contents = getAllContentsInFolder(path);
+        // scan the folder for presence of non-empty "index" folder
+        for (File item : contents) {
+            // scan all subfolders for presence of non-empty "index" folder
+            if (isNonEmptyIndexFolder(item)) {
+                indexFolders.add(item.getAbsolutePath());
+                // keep looking as there may be more index folders
+                continue;
+            }
+            
+            // check if the folder matches "solrX_schema_Y" patern
+            if (matchesIndexFolderNameStandard(item.getName())) {
+                File nextLevelIndexFolder = Paths.get(item.getAbsolutePath(), INDEX_FOLDER_NAME).toFile();
+                // look for "index" sub-folder one level deeper
+                if (isNonEmptyIndexFolder(nextLevelIndexFolder)) {
+                    indexFolders.add(nextLevelIndexFolder.getAbsolutePath());
+                    // keep looking as there may be more index folders
+                }
+            }
+        }
+        return indexFolders;
+    }
+    
+    private static boolean isNonEmptyIndexFolder(File path) {
+        if (path.exists() && path.isDirectory() && path.getName().equals(INDEX_FOLDER_NAME) && path.listFiles().length > 0) {
+            return true;
+        }
+        return false;
+    }
+    
+     /**
+     * Checks whether a name matches index folder name standard
+     *
+     * @param inputString The string to check.
+     *
+     * @return True or false.
+     */
+    public static boolean matchesIndexFolderNameStandard(String inputString) {
+        Matcher m = INDEX_FOLDER_NAME_PATTERN.matcher(inputString);
+        return m.find();
+    }    
+    
+}
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
@ -369,7 +369,9 @@ public class Server {
     * Run a Solr command with the given arguments.
     *
     * @param solrArguments Command line arguments to pass to the Solr command.
+     *
     * @return
+     *
     * @throws IOException
     */
    private Process runSolrCommand(List<String> solrArguments) throws IOException {
@ -602,7 +604,7 @@ public class Server {
     * request.
     *
     * @return false if the request failed with a connection error, otherwise
-     * true
+     *         true
     */
    synchronized boolean isRunning() throws KeywordSearchModuleException {
        try {
@ -709,6 +711,7 @@ public class Server {
     * @return absolute path to index dir
     */
    String geCoreDataDirPath(Case theCase) {
+        // ELTODO this method is going to be removed
        String indexDir = theCase.getModuleDirectory() + File.separator + "keywordsearch" + File.separator + "data"; //NON-NLS
        if (uncPathUtilities != null) {
            // if we can check for UNC paths, do so, otherwise just return the indexDir
@ -725,6 +728,7 @@ public class Server {
        return indexDir;
    }

+
    /**
     * ** end single-case specific methods ***
     */
@ -739,6 +743,9 @@ public class Server {
     *                                      creating/opening the core.
     */
    private Core openCore(Case theCase) throws KeywordSearchModuleException {
+        
+        // ELTODO REMOVE String indexDir = findLatestVersionIndexDir(Case.getCurrentCase()); // ELTODO
+        
        try {
            if (theCase.getCaseType() == CaseType.SINGLE_USER_CASE) {
                currentSolrServer = this.localSolrServer;
@ -814,8 +821,8 @@ public class Server {
    }

    /**
-     * Execute query that gets only number of all Solr file chunks (not logical
-     * files) indexed without actually returning the content.
+     * Execute query that gets only number of all Solr file chunks (not
+     * logical files) indexed without actually returning the content.
     *
     * @return int representing number of indexed chunks
     *
@ -839,8 +846,8 @@ public class Server {
    }

    /**
-     * Execute query that gets only number of all Solr documents indexed (files
-     * and chunks) without actually returning the documents
+     * Execute query that gets only number of all Solr documents indexed
+     * (files and chunks) without actually returning the documents
     *
     * @return int representing number of indexed files (files and chunks)
     *
@ -893,10 +900,11 @@ public class Server {
    /**
     * Execute query that gets number of indexed file chunks for a file
     *
-     * @param fileID file id of the original file broken into chunks and indexed
+     * @param fileID file id of the original file broken into chunks and
+     *               indexed
     *
-     * @return int representing number of indexed file chunks, 0 if there is no
-     *         chunks
+     * @return int representing number of indexed file chunks, 0 if there is
+     *         no chunks
     *
     * @throws KeywordSearchModuleException
     * @throws NoOpenCoreException
@ -1155,6 +1163,7 @@ public class Server {
                throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.openCore.exception.noIndexDir.msg"));
            }

+            // ELTODO set solr and schema version of the core that is being loaded. Make that available via API.
            return new Core(coreName, caseType);

        } catch (SolrServerException | SolrException | IOException ex) {
@ -1193,7 +1202,8 @@ public class Server {
    }

    /**
-     * Determines whether or not the index files folder for a Solr core exists.
+     * Determines whether or not the index files folder for a Solr core
+     * exists.
     *
     * @param coreName the name of the core.
     *
@ -1364,8 +1374,8 @@ public class Server {
         * Execute query that gets only number of all Solr files (not chunks)
         * indexed without actually returning the files
         *
-         * @return int representing number of indexed files (entire files, not
-         *         chunks)
+         * @return int representing number of indexed files (entire files,
+         *         not chunks)
         *
         * @throws SolrServerException
         */
@ -1374,8 +1384,9 @@ public class Server {
        }

        /**
-         * Execute query that gets only number of all chunks (not logical files,
-         * or all documents) indexed without actually returning the content
+         * Execute query that gets only number of all chunks (not logical
+         * folders, or all documents) indexed without actually returning the
+         * content
         *
         * @return int representing number of indexed chunks
         *
@ -1425,11 +1436,11 @@ public class Server {
        /**
         * Execute query that gets number of indexed file chunks for a file
         *
-         * @param contentID file id of the original file broken into chunks and
-         *                  indexed
+         * @param contentID file id of the original file broken into chunks
+         *                  and indexed
         *
-         * @return int representing number of indexed file chunks, 0 if there is
-         *         no chunks
+         * @return int representing number of indexed file chunks, 0 if there
+         *         is no chunks
         *
         * @throws SolrServerException
         */
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java
@ -19,12 +19,17 @@
 package org.sleuthkit.autopsy.keywordsearch;

 import java.io.IOException;
-import java.net.InetAddress;
-import java.util.MissingResourceException;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.openide.util.NbBundle;
+import java.net.InetAddress;
+import java.util.List;
+import java.util.MissingResourceException;
+import org.sleuthkit.autopsy.core.RuntimeProperties;
+import org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider;
 import org.openide.util.lookup.ServiceProvider;
+import org.openide.util.lookup.ServiceProviders;
+import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
 import org.sleuthkit.datamodel.BlackboardArtifact;
@ -34,8 +39,11 @@ import org.sleuthkit.datamodel.TskCoreException;
 * An implementation of the KeywordSearchService interface that uses Solr for
 * text indexing and search.
 */
-@ServiceProvider(service = KeywordSearchService.class)
-public class SolrSearchService implements KeywordSearchService {
+@ServiceProviders(value={
+    @ServiceProvider(service=KeywordSearchService.class),
+    @ServiceProvider(service=AutopsyServiceProvider.class)}
+)
+public class SolrSearchService implements KeywordSearchService, AutopsyServiceProvider  {

    private static final String BAD_IP_ADDRESS_FORMAT = "ioexception occurred when talking to server"; //NON-NLS
    private static final String SERVER_REFUSED_CONNECTION = "server refused connection"; //NON-NLS
@ -129,4 +137,77 @@ public class SolrSearchService implements KeywordSearchService {
    @Override
    public void close() throws IOException {
    }
+    
+     /**
+     *
+     * @param context
+     *
+     * @throws
+     * org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider.AutopsyServiceProviderException
+     */
+    @Override
+    public void openCaseResources(Context context) throws AutopsyServiceProviderException {
+        /*
+         * Autopsy service providers may not have case-level resources.
+         */
+        
+        // do a case subdirectory search to check for the existence and upgrade status of KWS indexes
+        List<String> indexDirs = IndexHandling.findAllIndexDirs(Case.getCurrentCase());
+        
+        // check if index needs upgrade
+        String currentVersionIndexDir = IndexHandling.findLatestVersionIndexDir(indexDirs);
+        if (currentVersionIndexDir.isEmpty()) {
+            
+            // ELTODO not sure what to do when there are multiple old indexes. grab the first one?
+            String oldIndexDir = indexDirs.get(0);
+
+            if (RuntimeProperties.coreComponentsAreActive()) {
+                //pop up a message box to indicate the restrictions on adding additional 
+                //text and performing regex searches and give the user the option to decline the upgrade
+                boolean upgradeDeclined = false;
+                if (upgradeDeclined) {
+                    throw new AutopsyServiceProviderException("ELTODO");
+                }
+            }
+
+            // ELTODO Check for cancellation at whatever points are feasible
+            
+            // Copy the contents (core) of ModuleOutput/keywordsearch/data/index into ModuleOutput/keywordsearch/data/solr6_schema_2.0/index
+            
+            // Make a “reference copy” of the configset and place it in ModuleOutput/keywordsearch/data/solr6_schema_2.0/configset
+            
+            // convert path to UNC path
+            
+            // Run the upgrade tools on the contents (core) in ModuleOutput/keywordsearch/data/solr6_schema_2.0/index
+            
+            // Open the upgraded index
+            
+            // execute a test query
+            
+            boolean success = true;
+
+            if (!success) {
+                // delete the new directories
+
+                // close the upgraded index?
+                throw new AutopsyServiceProviderException("ELTODO");
+            }
+
+            // currentVersionIndexDir = upgraded index dir
+        }
+    }
+
+    /**
+     *
+     * @param context
+     *
+     * @throws
+     * org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyServiceProvider.AutopsyServiceProviderException
+     */
+    @Override
+    public void closeCaseResources(Context context) throws AutopsyServiceProviderException {
+        /*
+         * Autopsy service providers may not have case-level resources.
+         */
+    }
 }