From 76797f3c8aa7c35a431252446d5d2bea143ced05 Mon Sep 17 00:00:00 2001
From: millmanorama <millmanorama@gmail.com>
Date: Thu, 12 Jan 2017 15:58:35 +0100
Subject: [PATCH 1/8] move Chunker to its own file; replace BufferedReader with
 PushbackReader

---
 .../sleuthkit/autopsy/casemodule/Case.java    |   8 +-
 .../CoordinationService.java                  | 255 +++++++++---------
 .../CoordinationServiceNamespace.java         |   4 +-
 .../autoingest/AutoIngestJobLogger.java       |   2 +-
 .../autoingest/AutoIngestManager.java         |   2 +-
 .../configuration/SharedConfiguration.java    |   4 +-
 .../autopsy/keywordsearch/Chunker.java        | 244 +++++++++++++++++
 .../autopsy/keywordsearch/Ingester.java       | 213 ---------------
 8 files changed, 385 insertions(+), 347 deletions(-)
 create mode 100644 KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
diff --git a/Core/src/org/sleuthkit/autopsy/casemodule/Case.java b/Core/src/org/sleuthkit/autopsy/casemodule/Case.java
index d681e81b48..3beac08648 100644
--- a/Core/src/org/sleuthkit/autopsy/casemodule/Case.java
+++ b/Core/src/org/sleuthkit/autopsy/casemodule/Case.java
@@ -1008,7 +1008,7 @@ public class Case implements SleuthkitCase.ErrorObserver {
                     // The shared lock needs to be created on a special thread so it can be released
                     // from the same thread.
                     Future<Void> future = getCurrentCaseExecutor().submit(() -> {
-                        currentCaseLock = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot()).tryGetSharedLock(CoordinationService.CategoryNode.CASES, caseDir);
+                        currentCaseLock = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot()).tryGetSharedLock(CoordinationService.CategoryNode.CASES, caseDir);
                         if (null == currentCaseLock) {
                             throw new CaseActionException(NbBundle.getMessage(Case.class, "Case.exception.errorLocking", CaseMetadata.getFileExtension()));
                         }
@@ -1019,7 +1019,7 @@ public class Case implements SleuthkitCase.ErrorObserver {
                     // The exclusive lock uses the unique case name.
                     // This lock does not need to be on a special thread since it will be released before
                     // leaving this method
-                    exclusiveResourceLock = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.RESOURCE, 
+                    exclusiveResourceLock = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.RESOURCE, 
                             dbName, 12, TimeUnit.HOURS);
                     if (null == exclusiveResourceLock) {
                         throw new CaseActionException(NbBundle.getMessage(Case.class, "Case.exception.errorLocking", CaseMetadata.getFileExtension()));
@@ -1269,7 +1269,7 @@ public class Case implements SleuthkitCase.ErrorObserver {
                     // The shared lock needs to be created on a special thread so it can be released
                     // from the same thread.
                     Future<Void> future = getCurrentCaseExecutor().submit(() -> {
-                        currentCaseLock = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot()).tryGetSharedLock(CoordinationService.CategoryNode.CASES, metadata.getCaseDirectory());
+                        currentCaseLock = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot()).tryGetSharedLock(CoordinationService.CategoryNode.CASES, metadata.getCaseDirectory());
                         if (null == currentCaseLock) {
                             throw new CaseActionException(NbBundle.getMessage(Case.class, "Case.exception.errorLocking", CaseMetadata.getFileExtension()));
                         }
@@ -1280,7 +1280,7 @@ public class Case implements SleuthkitCase.ErrorObserver {
                     // The exclusive lock uses the unique case name
                     // This lock does not need to be on a special thread since it will be released before
                     // leaving this method
-                    exclusiveResourceLock = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.RESOURCE, 
+                    exclusiveResourceLock = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.RESOURCE, 
                             metadata.getCaseDatabaseName(), 12, TimeUnit.HOURS);
                     if (null == exclusiveResourceLock) {
                         throw new CaseActionException(NbBundle.getMessage(Case.class, "Case.exception.errorLocking", CaseMetadata.getFileExtension()));
diff --git a/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationService.java b/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationService.java
index 39c5c25ba6..adc89d4afa 100644
--- a/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationService.java
+++ b/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationService.java
@@ -1,7 +1,7 @@
 /*
  * Autopsy Forensic Browser
  *
- * Copyright 2015 Basis Technology Corp.
+ * Copyright 2011-2017 Basis Technology Corp.
  * Contact: carrier <at> sleuthkit <dot> org
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,129 +18,89 @@
  */
 package org.sleuthkit.autopsy.coordinationservice;
 
+import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
-import org.apache.zookeeper.CreateMode;
-import org.apache.zookeeper.ZooDefs;
 import org.apache.curator.RetryPolicy;
-import org.apache.curator.retry.ExponentialBackoffRetry;
 import org.apache.curator.framework.CuratorFramework;
 import org.apache.curator.framework.CuratorFrameworkFactory;
 import org.apache.curator.framework.recipes.locks.InterProcessMutex;
 import org.apache.curator.framework.recipes.locks.InterProcessReadWriteLock;
+import org.apache.curator.retry.ExponentialBackoffRetry;
+import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
-import org.sleuthkit.autopsy.core.UserPreferences;
-import java.io.IOException;
-import org.apache.zookeeper.WatchedEvent;
-import org.apache.zookeeper.ZooKeeper;
 import org.apache.zookeeper.KeeperException.NoNodeException;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.ZooDefs;
+import org.apache.zookeeper.ZooKeeper;
+import org.sleuthkit.autopsy.core.UserPreferences;
 
 /**
- * A centralized service for maintaining configuration information and providing
- * distributed synchronization using a shared hierarchical namespace of nodes.
+ * A coordination service for maintaining configuration information and
+ * providing distributed synchronization using a shared hierarchical namespace
+ * of nodes.
+ *
+ * TODO (JIRA 2205): Simple refactoring for general use.
  */
 public final class CoordinationService {
 
-    /**
-     * Category nodes are the immediate children of the root node of a shared
-     * hierarchical namespace managed by the coordination service.
-     */
-    public enum CategoryNode { // RJCTODO: Move this to CoordinationServiceNamespace
-
-        CASES("cases"),
-        MANIFESTS("manifests"),
-        CONFIG("config"),
-        RESOURCE("resource");
-
-        private final String displayName;
-
-        private CategoryNode(String displayName) {
-            this.displayName = displayName;
-        }
-
-        public String getDisplayName() {
-            return displayName;
-        }
-    }
-
-    /**
-     * Exception type thrown by the coordination service.
-     */
-    public final static class CoordinationServiceException extends Exception {
-
-        private static final long serialVersionUID = 1L;
-
-        private CoordinationServiceException(String message) {
-            super(message);
-        }
-
-        private CoordinationServiceException(String message, Throwable cause) {
-            super(message, cause);
-        }
-    }
-
-    /**
-     * An opaque encapsulation of a lock for use in distributed synchronization.
-     * Instances are obtained by calling a get lock method and must be passed to
-     * a release lock method.
-     */
-    public static class Lock implements AutoCloseable {
-
-        /**
-         * This implementation uses the Curator read/write lock. see
-         * http://curator.apache.org/curator-recipes/shared-reentrant-read-write-lock.html
-         */
-        private final InterProcessMutex interProcessLock;
-        private final String nodePath;
-
-        private Lock(String nodePath, InterProcessMutex lock) {
-            this.nodePath = nodePath;
-            this.interProcessLock = lock;
-        }
-
-        public String getNodePath() {
-            return nodePath;
-        }
-
-        public void release() throws CoordinationServiceException {
-            try {
-                this.interProcessLock.release();
-            } catch (Exception ex) {
-                throw new CoordinationServiceException(String.format("Failed to release the lock on %s", nodePath), ex);
-            }
-        }
-
-        @Override
-        public void close() throws CoordinationServiceException {
-            release();
-        }
-    }
-
     private static CuratorFramework curator = null;
     private static final Map<String, CoordinationService> rootNodesToServices = new HashMap<>();
-    private final Map<String, String> categoryNodeToPath = new HashMap<>();
     private static final int SESSION_TIMEOUT_MILLISECONDS = 300000;
     private static final int CONNECTION_TIMEOUT_MILLISECONDS = 300000;
     private static final int ZOOKEEPER_SESSION_TIMEOUT_MILLIS = 3000;
     private static final int ZOOKEEPER_CONNECTION_TIMEOUT_MILLIS = 15000;
-    private static final int PORT_OFFSET = 1000;
+    private static final int PORT_OFFSET = 1000; // When run in Solr, ZooKeeper defaults to Solr port + 1000
+    private final Map<String, String> categoryNodeToPath = new HashMap<>();
 
     /**
-     * Gets an instance of the centralized coordination service for a specific
-     * namespace.
+     * Determines if ZooKeeper is accessible with the current settings. Closes
+     * the connection prior to returning.
+     *
+     * @return true if a connection was achieved, false otherwise
+     *
+     * @throws InterruptedException
+     * @throws IOException
+     */
+    private static boolean isZooKeeperAccessible() throws InterruptedException, IOException {
+        boolean result = false;
+        Object workerThreadWaitNotifyLock = new Object();
+        int zooKeeperServerPort = Integer.valueOf(UserPreferences.getIndexingServerPort()) + PORT_OFFSET;
+        String connectString = UserPreferences.getIndexingServerHost() + ":" + zooKeeperServerPort;
+        ZooKeeper zooKeeper = new ZooKeeper(connectString, ZOOKEEPER_SESSION_TIMEOUT_MILLIS,
+                (WatchedEvent event) -> {
+                    synchronized (workerThreadWaitNotifyLock) {
+                        workerThreadWaitNotifyLock.notify();
+                    }
+                });
+        synchronized (workerThreadWaitNotifyLock) {
+            workerThreadWaitNotifyLock.wait(ZOOKEEPER_CONNECTION_TIMEOUT_MILLIS);
+        }
+        ZooKeeper.States state = zooKeeper.getState();
+        if (state == ZooKeeper.States.CONNECTED || state == ZooKeeper.States.CONNECTEDREADONLY) {
+            result = true;
+        }
+        zooKeeper.close();
+        return result;
+    }
+
+    /**
+     * Gets a coordination service for a specific namespace.
      *
      * @param rootNode The name of the root node that defines the namespace.
      *
-     * @return The service for the namespace defined by the root node name.
+     * @return The coordination service.
      *
-     * @throws CoordinationServiceException If an instaNce of the coordination
+     * @throws CoordinationServiceException If an instance of the coordination
      *                                      service cannot be created.
      */
-    public static synchronized CoordinationService getInstance(String rootNode) throws CoordinationServiceException {
+    public static synchronized CoordinationService getServiceForNamespace(String rootNode) throws CoordinationServiceException {
+        /*
+         * Connect to ZooKeeper via Curator.
+         */
         if (null == curator) {
             RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3);
-            // When run in Solr, ZooKeeper defaults to Solr port + 1000
             int zooKeeperServerPort = Integer.valueOf(UserPreferences.getIndexingServerPort()) + PORT_OFFSET;
             String connectString = UserPreferences.getIndexingServerHost() + ":" + zooKeeperServerPort;
             curator = CuratorFrameworkFactory.newClient(connectString, SESSION_TIMEOUT_MILLISECONDS, CONNECTION_TIMEOUT_MILLISECONDS, retryPolicy);
@@ -157,7 +117,7 @@ public final class CoordinationService {
             CoordinationService service;
             try {
                 service = new CoordinationService(rootNode);
-            } catch (Exception ex) {
+            } catch (IOException | InterruptedException | KeeperException | CoordinationServiceException ex) {
                 curator = null;
                 throw new CoordinationServiceException("Failed to create coordination service", ex);
             }
@@ -167,15 +127,18 @@ public final class CoordinationService {
     }
 
     /**
-     * Constructs an instance of the centralized coordination service for a
-     * specific namespace.
+     * Constructs an instance of the coordination service for a specific
+     * namespace.
      *
      * @param rootNodeName The name of the root node that defines the namespace.
+     *
+     * @throws Exception (calls Curator methods that throw Exception instead of
+     *                   more specific exceptions)
      */
-    private CoordinationService(String rootNodeName) throws Exception {
+    private CoordinationService(String rootNodeName) throws InterruptedException, IOException, KeeperException, CoordinationServiceException {
 
         if (false == isZooKeeperAccessible()) {
-            throw new Exception("Unable to access ZooKeeper");
+            throw new CoordinationServiceException("Unable to access ZooKeeper");
         }
 
         String rootNode = rootNodeName;
@@ -191,6 +154,8 @@ public final class CoordinationService {
                 if (ex.code() != KeeperException.Code.NODEEXISTS) {
                     throw ex;
                 }
+            } catch (Exception ex) {
+                throw new CoordinationServiceException("Curator experienced an error", ex);
             }
             categoryNodeToPath.put(node.getDisplayName(), nodePath);
         }
@@ -385,35 +350,77 @@ public final class CoordinationService {
     }
 
     /**
-     * Determines if ZooKeeper is accessible with the current settings. Closes
-     * the connection prior to returning.
-     *
-     * @return true if a connection was achieved, false otherwise
+     * Exception type thrown by the coordination service.
      */
-    private static boolean isZooKeeperAccessible() {
-        boolean result = false;
-        Object workerThreadWaitNotifyLock = new Object();
-        int zooKeeperServerPort = Integer.valueOf(UserPreferences.getIndexingServerPort()) + PORT_OFFSET;
-        String connectString = UserPreferences.getIndexingServerHost() + ":" + zooKeeperServerPort;
+    public final static class CoordinationServiceException extends Exception {
 
-        try {
-            ZooKeeper zooKeeper = new ZooKeeper(connectString, ZOOKEEPER_SESSION_TIMEOUT_MILLIS,
-                    (WatchedEvent event) -> {
+        private static final long serialVersionUID = 1L;
 
-                        synchronized (workerThreadWaitNotifyLock) {
-                            workerThreadWaitNotifyLock.notify();
-                        }
-                    });
-            synchronized (workerThreadWaitNotifyLock) {
-                workerThreadWaitNotifyLock.wait(ZOOKEEPER_CONNECTION_TIMEOUT_MILLIS);
-            }
-            ZooKeeper.States state = zooKeeper.getState();
-            if (state == ZooKeeper.States.CONNECTED || state == ZooKeeper.States.CONNECTEDREADONLY) {
-                result = true;
-            }
-            zooKeeper.close();
-        } catch (InterruptedException | IOException ignored) {
+        private CoordinationServiceException(String message) {
+            super(message);
+        }
+
+        private CoordinationServiceException(String message, Throwable cause) {
+            super(message, cause);
+        }
+    }
+
+    /**
+     * An opaque encapsulation of a lock for use in distributed synchronization.
+     * Instances are obtained by calling a get lock method and must be passed to
+     * a release lock method.
+     */
+    public static class Lock implements AutoCloseable {
+
+        /**
+         * This implementation uses the Curator read/write lock. see
+         * http://curator.apache.org/curator-recipes/shared-reentrant-read-write-lock.html
+         */
+        private final InterProcessMutex interProcessLock;
+        private final String nodePath;
+
+        private Lock(String nodePath, InterProcessMutex lock) {
+            this.nodePath = nodePath;
+            this.interProcessLock = lock;
+        }
+
+        public String getNodePath() {
+            return nodePath;
+        }
+
+        public void release() throws CoordinationServiceException {
+            try {
+                this.interProcessLock.release();
+            } catch (Exception ex) {
+                throw new CoordinationServiceException(String.format("Failed to release the lock on %s", nodePath), ex);
+            }
+        }
+
+        @Override
+        public void close() throws CoordinationServiceException {
+            release();
+        }
+    }
+
+    /**
+     * Category nodes are the immediate children of the root node of a shared
+     * hierarchical namespace managed by a coordination service.
+     */
+    public enum CategoryNode {
+
+        CASES("cases"),
+        MANIFESTS("manifests"),
+        CONFIG("config"),
+        RESOURCE("resource");
+
+        private final String displayName;
+
+        private CategoryNode(String displayName) {
+            this.displayName = displayName;
+        }
+
+        public String getDisplayName() {
+            return displayName;
         }
-        return result;
     }
 }
diff --git a/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationServiceNamespace.java b/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationServiceNamespace.java
index e1f2a3df42..567dd38bc6 100644
--- a/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationServiceNamespace.java
+++ b/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationServiceNamespace.java
@@ -1,7 +1,7 @@
 /*
  * Autopsy Forensic Browser
  *
- * Copyright 2015 Basis Technology Corp.
+ * Copyright 2016-2017 Basis Technology Corp.
  * Contact: carrier <at> sleuthkit <dot> org
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -19,7 +19,7 @@
 package org.sleuthkit.autopsy.coordinationservice;
 
 /**
- * Namespace elements for auto ingest coordination service nodes.
+ * Root node for Autopsy coordination service namespace.
  */
 public final class CoordinationServiceNamespace {
     private static final String ROOT = "autopsy";
diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestJobLogger.java b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestJobLogger.java
index 3165ad23c9..e9406950fd 100644
--- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestJobLogger.java
+++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestJobLogger.java
@@ -431,7 +431,7 @@ final class AutoIngestJobLogger {
      *                                      log file.
      */
     private void log(MessageCategory category, String message) throws AutoIngestJobLoggerException, InterruptedException {
-        try (Lock lock = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.CASES, getLogPath(caseDirectoryPath).toString(), LOCK_TIME_OUT, LOCK_TIME_OUT_UNIT)) {
+        try (Lock lock = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.CASES, getLogPath(caseDirectoryPath).toString(), LOCK_TIME_OUT, LOCK_TIME_OUT_UNIT)) {
             if (null != lock) {
                 File logFile = getLogPath(caseDirectoryPath).toFile();
                 try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(logFile, logFile.exists())), true)) {
diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java
index 867ae631e8..bb55281227 100644
--- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java
+++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java
@@ -232,7 +232,7 @@ public final class AutoIngestManager extends Observable implements PropertyChang
     void startUp() throws AutoIngestManagerStartupException {
         SYS_LOGGER.log(Level.INFO, "Auto ingest starting");
         try {
-            coordinationService = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot());
+            coordinationService = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot());
         } catch (CoordinationServiceException ex) {
             throw new AutoIngestManagerStartupException("Failed to get coordination service", ex);
         }
diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/SharedConfiguration.java b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/SharedConfiguration.java
index 7599ed4648..653fc65807 100644
--- a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/SharedConfiguration.java
+++ b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/SharedConfiguration.java
@@ -160,7 +160,7 @@ public class SharedConfiguration {
 
         File remoteFolder = getSharedFolder();
 
-        try (Lock writeLock = CoordinationService.getInstance(LOCK_ROOT).tryGetExclusiveLock(CoordinationService.CategoryNode.CONFIG, remoteFolder.getAbsolutePath(), 30, TimeUnit.MINUTES)) {
+        try (Lock writeLock = CoordinationService.getServiceForNamespace(LOCK_ROOT).tryGetExclusiveLock(CoordinationService.CategoryNode.CONFIG, remoteFolder.getAbsolutePath(), 30, TimeUnit.MINUTES)) {
             if (writeLock == null) {
                 logger.log(Level.INFO, String.format("Failed to lock %s - another node is currently uploading or downloading configuration", remoteFolder.getAbsolutePath()));
                 return SharedConfigResult.LOCKED;
@@ -230,7 +230,7 @@ public class SharedConfiguration {
 
         File remoteFolder = getSharedFolder();
 
-        try (Lock readLock = CoordinationService.getInstance(LOCK_ROOT).tryGetSharedLock(CoordinationService.CategoryNode.CONFIG, remoteFolder.getAbsolutePath(), 30, TimeUnit.MINUTES)) {
+        try (Lock readLock = CoordinationService.getServiceForNamespace(LOCK_ROOT).tryGetSharedLock(CoordinationService.CategoryNode.CONFIG, remoteFolder.getAbsolutePath(), 30, TimeUnit.MINUTES)) {
             if (readLock == null) {
                 return SharedConfigResult.LOCKED;
             }
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
new file mode 100644
index 0000000000..ffe0ed4b0b
--- /dev/null
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
@@ -0,0 +1,244 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package org.sleuthkit.autopsy.keywordsearch;
+
+import com.google.common.base.Utf8;
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import javax.annotation.concurrent.NotThreadSafe;
+import org.sleuthkit.autopsy.coreutils.TextUtil;
+
+/**
+ * Encapsulates the content chunking algorithm in an implementation of the
+ * Iterator interface. Also implements Iterable so it can be used directly in a
+ * for loop. The base chunk is the part of the chunk before the overlapping
+ * window. The window will be included at the end of the current chunk as well
+ * as at the beginning of the next chunk.
+ */
+@NotThreadSafe
+class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
+
+    //Chunking algorithm paramaters-------------------------------------//
+    /** the maximum size of a chunk, including the window. */
+    private static final int MAX_TOTAL_CHUNK_SIZE = 32766; //bytes
+    /** the minimum to read before we start the process of looking for
+     * whitespace to break at and creating an overlapping window. */
+    private static final int MINIMUM_BASE_CHUNK_SIZE = 30 * 1024; //bytes
+    /** The maximum size of the chunk, before the overlapping window, even if we
+     * couldn't find whitespace to break at. */
+    private static final int MAXIMUM_BASE_CHUNK_SIZE = 31 * 1024; //bytes
+    /** The amount of text we will read through before we give up on finding
+     * whitespace to break the chunk/window at. */
+    private static final int WHITE_SPACE_BUFFER_SIZE = 512; //bytes
+    /** The number of characters to read in one go from the Reader. */
+    private static final int READ_CHARS_BUFFER_SIZE = 512; //chars
+
+    ////chunker state--------------------------------------------///
+    /** The Reader that this chunk reads from, and divides into chunks. It must
+     * be a buffered reader to ensure that mark/reset are supported. */
+    private final PushbackReader reader;
+    /** The local buffer of characters read from the Reader. */
+    private final char[] tempChunkBuf = new char[READ_CHARS_BUFFER_SIZE];
+    /** number of chars read in the most recent read operation. */
+    private int charsRead = 0;
+
+    /** The text of the current chunk (so far). */
+    private StringBuilder currentChunk;
+    private StringBuilder currentWindow;
+    /** the size in bytes of the chunk (so far). */
+    private int chunkSizeBytes = 0;
+    /** the size in chars of the (base) chunk (so far). */
+    private int baseChunkSizeChars;
+
+    /** has the chunker found whitespace to break on? */
+    private boolean whitespaceFound = false;
+    /** has the chunker reached the end of the Reader? If so, there are no more
+     * chunks, and the current chunk does not need a window. */
+    private boolean endOfReaderReached = false;
+
+    /**
+     * Create a Chunker that will chunk the content of the given Reader.
+     *
+     * @param reader The content to chunk.
+     */
+    Chunker(Reader reader) {
+        this.reader = new PushbackReader(reader, 2048);
+    }
+
+    @Override
+    public Iterator<Chunk> iterator() {
+        return this;
+    }
+
+    @Override
+    public boolean hasNext() {
+        return endOfReaderReached == false;
+    }
+
+    /**
+     * Sanitize the given StringBuilder by replacing non-UTF-8 characters with
+     * caret '^'
+     *
+     * @param sb the StringBuilder to sanitize
+     *
+     * //JMTODO: use Charsequence.chars() or codePoints() and then a mapping
+     * function?
+     */
+    private static StringBuilder sanitizeToUTF8(StringBuilder sb) {
+        final int length = sb.length();
+        for (int i = 0; i < length; i++) {
+            if (TextUtil.isValidSolrUTF8(sb.charAt(i)) == false) {
+                sb.replace(i, i + 1, "^");
+            }
+        }
+        return sb;
+    }
+
+    @Override
+    public Chunk next() {
+        if (endOfReaderReached) {
+            throw new NoSuchElementException("There are no more chunks.");
+        }
+        //reset state for the next chunk
+        currentChunk = new StringBuilder();
+        currentWindow = new StringBuilder();
+        chunkSizeBytes = 0;
+        baseChunkSizeChars = 0;
+
+        try {
+            readBaseChunk();
+            baseChunkSizeChars = currentChunk.length();
+//            reader.mark(2048); //mark the reader so we can rewind the reader here to begin the next chunk
+            readWindow();
+
+        } catch (IOException ioEx) {
+            throw new RuntimeException("IOException while reading chunk.", ioEx);
+        }
+        try {
+            reader.unread(currentWindow.toString().toCharArray());
+//            reader.reset(); //reset the reader the so the next chunk can begin at the position marked above
+        } catch (IOException ex) {
+            throw new RuntimeException("IOException while resetting chunk reader.", ex);
+        }
+
+        if (endOfReaderReached) {
+            /* if we have reached the end of the content,we won't make another
+             * overlapping chunk, so the base chunk can be extended to the end. */
+            baseChunkSizeChars = currentChunk.length();
+        }
+        //sanitize the text and return a Chunk object, that includes the base chunk length.
+        return new Chunk(sanitizeToUTF8(currentChunk), baseChunkSizeChars);
+    }
+
+    /**
+     * Read the base chunk from the reader, and attempt to break at whitespace.
+     *
+     * @throws IOException if there is a problem reading from the reader.
+     */
+    private void readBaseChunk() throws IOException {
+        //read the chunk until the minimum base chunk size
+        readHelper(MINIMUM_BASE_CHUNK_SIZE - 1024, false, currentChunk);
+        //keep reading until the maximum base chunk size or white space is reached.
+        whitespaceFound = false;
+        readHelper(MAXIMUM_BASE_CHUNK_SIZE - 1024, true, currentChunk);
+
+    }
+
+    /**
+     * Read the window from the reader, and attempt to break at whitespace.
+     *
+     * @throws IOException if there is a problem reading from the reader.
+     */
+    private void readWindow() throws IOException {
+        //read the window, leaving some room to look for white space to break at.
+        int windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE - 1024, chunkSizeBytes + 1024);
+        readHelper(windowEnd, false, currentWindow);
+        whitespaceFound = false;
+        //keep reading until the max chunk size, or until whitespace is reached.
+        windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - 1024, chunkSizeBytes + 1024);
+        readHelper(windowEnd, true, currentWindow);
+    }
+
+    /** Helper method that implements reading in a loop.
+     *
+     * @param maxBytes           The max cummulative length of the content,in
+     *                           bytes, to read from the Reader. That is, when
+     *                           chunkSizeBytes >= maxBytes stop reading.
+     * @param inWhiteSpaceBuffer Should the current read stop once whitespace is
+     *                           found?
+     *
+     * @throws IOException If there is a problem reading from the Reader.
+     */
+    private void readHelper(int maxBytes, boolean inWhiteSpaceBuffer, StringBuilder currentSegment) throws IOException {
+        //only read one character at a time if we are looking for whitespace.
+        final int readSize = inWhiteSpaceBuffer ? 1 : READ_CHARS_BUFFER_SIZE;
+
+        //read chars up to maxBytes, whitespaceFound if also inWhiteSpaceBuffer, or we reach the end of the reader.
+        while ((chunkSizeBytes < maxBytes)
+                && (false == (inWhiteSpaceBuffer && whitespaceFound))
+                && (endOfReaderReached == false)) {
+            charsRead = reader.read(tempChunkBuf, 0, readSize);
+            if (-1 == charsRead) {
+                //this is the last chunk
+                endOfReaderReached = true;
+            } else {
+
+                //add read chars to the chunk and update the length.
+                String chunkSegment = new String(tempChunkBuf, 0, charsRead);
+                final int segmentSize = Utf8.encodedLength(chunkSegment);
+
+                if (chunkSizeBytes + segmentSize < maxBytes) {
+                    chunkSizeBytes += segmentSize;
+                    currentSegment.append(chunkSegment);
+                } else {
+                    for (int i = 0; i < charsRead; i++) {
+                        final Character character = tempChunkBuf[i];
+                        int charSize = Utf8.encodedLength(character.toString());
+                        if (chunkSizeBytes + charSize < maxBytes
+                                && (false == (inWhiteSpaceBuffer && whitespaceFound))) {
+                            currentSegment.append(character);
+                            chunkSizeBytes += charSize;
+                            if (inWhiteSpaceBuffer) {
+                                //check for whitespace.
+                                whitespaceFound = Character.isWhitespace(character);
+                            }
+                        } else {
+                            reader.unread(tempChunkBuf, i, charsRead - i);
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+/**
+ * Represents one chunk as the text in it and the length of the base chunk, in
+ * chars.
+ */
+class Chunk {
+
+    private final StringBuilder sb;
+    private final int chunksize;
+
+    Chunk(StringBuilder sb, int baseChunkLength) {
+        this.sb = sb;
+        this.chunksize = baseChunkLength;
+    }
+
+    @Override
+    public String toString() {
+        return sb.toString();
+    }
+
+    int getBaseChunkLength() {
+        return chunksize;
+    }
+}
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
index 2e1c6feb2d..9b9ffa689f 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
@@ -18,20 +18,15 @@
  */
 package org.sleuthkit.autopsy.keywordsearch;
 
-import com.google.common.base.Utf8;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.Map;
-import java.util.NoSuchElementException;
 import java.util.logging.Level;
-import javax.annotation.concurrent.NotThreadSafe;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.common.SolrInputDocument;
 import org.openide.util.NbBundle;
 import org.sleuthkit.autopsy.coreutils.Logger;
-import org.sleuthkit.autopsy.coreutils.TextUtil;
 import org.sleuthkit.autopsy.datamodel.ContentUtils;
 import org.sleuthkit.autopsy.ingest.IngestJobContext;
 import org.sleuthkit.datamodel.AbstractFile;
@@ -370,211 +365,3 @@ class Ingester {
         }
     }
 }
-
-/**
- * Encapsulates the content chunking algorithm in an implementation of the
- * Iterator interface. Also implements Iterable so it can be used directly in a
- * for loop. The base chunk is the part of the chunk before the overlapping
- * window. The window will be included at the end of the current chunk as well
- * as at the beginning of the next chunk.
- */
-@NotThreadSafe
-class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
-
-    //Chunking algorithm paramaters-------------------------------------//
-    /** the maximum size of a chunk, including the window. */
-    private static final int MAX_TOTAL_CHUNK_SIZE = 32766; //bytes
-    /** the minimum to read before we start the process of looking for
-     * whitespace to break at and creating an overlapping window. */
-    private static final int MINIMUM_BASE_CHUNK_SIZE = 30 * 1024; //bytes
-    /** The maximum size of the chunk, before the overlapping window, even if we
-     * couldn't find whitespace to break at. */
-    private static final int MAXIMUM_BASE_CHUNK_SIZE = 31 * 1024; //bytes
-    /** The amount of text we will read through before we give up on finding
-     * whitespace to break the chunk/window at. */
-    private static final int WHITE_SPACE_BUFFER_SIZE = 512; //bytes
-    /** The number of characters to read in one go from the Reader. */
-    private static final int READ_CHARS_BUFFER_SIZE = 512; //chars
-
-    ////chunker state--------------------------------------------///
-    /** The Reader that this chunk reads from, and divides into chunks. It must
-     * be a buffered reader to ensure that mark/reset are supported. */
-    private final BufferedReader reader;
-    /** The local buffer of characters read from the Reader. */
-    private final char[] tempChunkBuf = new char[READ_CHARS_BUFFER_SIZE];
-    /** number of chars read in the most recent read operation. */
-    private int charsRead = 0;
-
-    /** The text of the current chunk (so far). */
-    private StringBuilder currentChunk;
-    /** the size in bytes of the chunk (so far). */
-    private int chunkSizeBytes = 0;
-    /** the size in chars of the (base) chunk (so far). */
-    private int baseChunkSizeChars;
-
-    /** has the chunker found whitespace to break on? */
-    private boolean whitespaceFound = false;
-    /** has the chunker reached the end of the Reader? If so, there are no more
-     * chunks, and the current chunk does not need a window. */
-    private boolean endOfReaderReached = false;
-
-    /**
-     * Create a Chunker that will chunk the content of the given Reader.
-     *
-     * @param reader The content to chunk.
-     */
-    Chunker(BufferedReader reader) {
-        this.reader = reader;
-    }
-
-    @Override
-    public Iterator<Chunk> iterator() {
-        return this;
-    }
-
-    @Override
-    public boolean hasNext() {
-        return endOfReaderReached == false;
-    }
-
-    /**
-     * Sanitize the given StringBuilder by replacing non-UTF-8 characters with
-     * caret '^'
-     *
-     * @param sb the StringBuilder to sanitize
-     *
-     * //JMTODO: use Charsequence.chars() or codePoints() and then a mapping
-     * function?
-     */
-    private static StringBuilder sanitizeToUTF8(StringBuilder sb) {
-        final int length = sb.length();
-        for (int i = 0; i < length; i++) {
-            if (TextUtil.isValidSolrUTF8(sb.charAt(i)) == false) {
-                sb.replace(i, i + 1, "^");
-            }
-        }
-        return sb;
-    }
-
-    @Override
-    public Chunk next() {
-        if (endOfReaderReached) {
-            throw new NoSuchElementException("There are no more chunks.");
-        }
-        //reset state for the next chunk
-        currentChunk = new StringBuilder();
-        chunkSizeBytes = 0;
-        baseChunkSizeChars = 0;
-
-        try {
-            readBaseChunk();
-            baseChunkSizeChars = currentChunk.length();
-            reader.mark(2048); //mark the reader so we can rewind the reader here to begin the next chunk
-            readWindow();
-        } catch (IOException ioEx) {
-            throw new RuntimeException("IOException while reading chunk.", ioEx);
-        }
-        try {
-            reader.reset(); //reset the reader the so the next chunk can begin at the position marked above
-        } catch (IOException ex) {
-            throw new RuntimeException("IOException while resetting chunk reader.", ex);
-        }
-
-        if (endOfReaderReached) {
-            /* if we have reached the end of the content,we won't make another
-             * overlapping chunk, so the base chunk can be extended to the end. */
-            baseChunkSizeChars = currentChunk.length();
-        }
-        //sanitize the text and return a Chunk object, that includes the base chunk length.
-        return new Chunk(sanitizeToUTF8(currentChunk), baseChunkSizeChars);
-    }
-
-    /**
-     * Read the base chunk from the reader, and attempt to break at whitespace.
-     *
-     * @throws IOException if there is a problem reading from the reader.
-     */
-    private void readBaseChunk() throws IOException {
-        //read the chunk until the minimum base chunk size
-        readHelper(MINIMUM_BASE_CHUNK_SIZE, false);
-        //keep reading until the maximum base chunk size or white space is reached.
-        whitespaceFound = false;
-        readHelper(MAXIMUM_BASE_CHUNK_SIZE, true);
-
-    }
-
-    /**
-     * Read the window from the reader, and attempt to break at whitespace.
-     *
-     * @throws IOException if there is a problem reading from the reader.
-     */
-    private void readWindow() throws IOException {
-        //read the window, leaving some room to look for white space to break at.
-        int windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE, chunkSizeBytes + 1024);
-        readHelper(windowEnd, false);
-        whitespaceFound = false;
-        //keep reading until the max chunk size, or until whitespace is reached.
-        windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE, chunkSizeBytes + 1024);
-        readHelper(windowEnd, true);
-    }
-
-    /** Helper method that implements reading in a loop.
-     *
-     * @param maxBytes           The max cummulative length of the content,in
-     *                           bytes, to read from the Reader. That is, when
-     *                           chunkSizeBytes >= maxBytes stop reading.
-     * @param inWhiteSpaceBuffer Should the current read stop once whitespace is
-     *                           found?
-     *
-     * @throws IOException If there is a problem reading from the Reader.
-     */
-    private void readHelper(int maxBytes, boolean inWhiteSpaceBuffer) throws IOException {
-        //only read one character at a time if we are looking for whitespace.
-        final int readSize = inWhiteSpaceBuffer ? 1 : READ_CHARS_BUFFER_SIZE;
-
-        //read chars up to maxBytes, whitespaceFound if also inWhiteSpaceBuffer, or we reach the end of the reader.
-        while ((chunkSizeBytes < maxBytes)
-                && (false == (inWhiteSpaceBuffer && whitespaceFound))
-                && (endOfReaderReached == false)) {
-            charsRead = reader.read(tempChunkBuf, 0, readSize);
-            if (-1 == charsRead) {
-                //this is the last chunk
-                endOfReaderReached = true;
-            } else {
-                if (inWhiteSpaceBuffer) {
-                    //chec for whitespace.
-                    whitespaceFound = Character.isWhitespace(tempChunkBuf[0]);
-                }
-
-                //add read chars to the chunk and update the length.
-                String chunkSegment = new String(tempChunkBuf, 0, charsRead);
-                chunkSizeBytes += Utf8.encodedLength(chunkSegment);
-                currentChunk.append(chunkSegment);
-            }
-        }
-    }
-}
-
-/**
- * Represents one chunk as the text in it and the length of the base chunk, in
- * chars.
- */
-class Chunk {
-
-    private final StringBuilder sb;
-    private final int chunksize;
-
-    Chunk(StringBuilder sb, int baseChunkLength) {
-        this.sb = sb;
-        this.chunksize = baseChunkLength;
-    }
-
-    @Override
-    public String toString() {
-        return sb.toString();
-    }
-
-    int getBaseChunkLength() {
-        return chunksize;
-    }
-}

From 0a7af485f26ab0de035cf3f9d6e2ff273b352a31 Mon Sep 17 00:00:00 2001
From: millmanorama <millmanorama@gmail.com>
Date: Fri, 13 Jan 2017 11:27:39 +0100
Subject: [PATCH 2/8] return instead of just break to stop infinite loop

---
 .../src/org/sleuthkit/autopsy/keywordsearch/Chunker.java        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
index ffe0ed4b0b..4b108558d0 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
@@ -210,7 +210,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
                             }
                         } else {
                             reader.unread(tempChunkBuf, i, charsRead - i);
-                            break;
+                            return;
                         }
                     }
                 }

From 104cbd4dc48606b18a43df992f969bb96e0f7fa4 Mon Sep 17 00:00:00 2001
From: jmillman <jmillman@basistech.com>
Date: Fri, 13 Jan 2017 06:55:42 -0500
Subject: [PATCH 3/8] restore space in "Keyword Search"

---
 .../src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties
index 43ba62c3c6..8db9e3167d 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties
@@ -199,7 +199,7 @@ KeywordSearchOptionsPanelController.moduleErr.msg1=A module caused an error list
 KeywordSearchOptionsPanelController.moduleErr.msg2=A module caused an error listening to KeywordSearchOptionsPanelController updates. See log to determine which module. Some data could be incomplete.
 KeywordSearchQueryManager.pathText.text=Keyword search
 KeywordSearchResultFactory.progress.saving=Saving results\: {0}
-KeywordSearchSettings.moduleName.text=KeywordSearch
+KeywordSearchSettings.moduleName.text=Keyword Search
 KeywordSearchSettings.properties_options.text={0}_Options
 KeywordSearchSettings.propertiesNSRL.text={0}_NSRL
 KeywordSearchSettings.propertiesScripts.text={0}_Scripts

From 506a3037a42d78079bfbca73170e4cbc34674bd6 Mon Sep 17 00:00:00 2001
From: millmanorama <millmanorama@gmail.com>
Date: Mon, 16 Jan 2017 15:51:04 +0100
Subject: [PATCH 4/8] finish PushbackReader implementation of Chunker,
 introduce TextExtractorException

---
 .../keywordsearch/ArtifactTextExtractor.java  |  35 ++--
 .../autopsy/keywordsearch/Chunker.java        | 163 ++++++++++--------
 .../keywordsearch/FileTextExtractor.java      |   2 +-
 .../keywordsearch/HtmlTextExtractor.java      |   4 +-
 .../autopsy/keywordsearch/Ingester.java       |   8 +-
 .../keywordsearch/StringsTextExtractor.java   |   2 +-
 .../autopsy/keywordsearch/TextExtractor.java  |  15 +-
 .../keywordsearch/TikaTextExtractor.java      |   7 +-
 8 files changed, 135 insertions(+), 101 deletions(-)

diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ArtifactTextExtractor.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ArtifactTextExtractor.java
index 07657f9646..962e5ba245 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ArtifactTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ArtifactTextExtractor.java
@@ -39,6 +39,7 @@ import org.sleuthkit.datamodel.TskCoreException;
  * artifact's attributes.
  */
 class ArtifactTextExtractor implements TextExtractor<BlackboardArtifact> {
+
     static final private Logger logger = Logger.getLogger(ArtifactTextExtractor.class.getName());
 
     /**
@@ -82,26 +83,31 @@ class ArtifactTextExtractor implements TextExtractor<BlackboardArtifact> {
     }
 
     @Override
-     public boolean isDisabled() {
+    public boolean isDisabled() {
         return false;
-     }
+    }
 
-     @Override
-     public void logWarning(final String msg, Exception ex) {
+    @Override
+    public void logWarning(final String msg, Exception ex) {
         logger.log(Level.WARNING, msg, ex); //NON-NLS  }
     }
 
-    private InputStream getInputStream(BlackboardArtifact artifact) {
+    private InputStream getInputStream(BlackboardArtifact artifact) throws TextExtractorException {
         // Concatenate the string values of all attributes into a single
         // "content" string to be indexed.
         StringBuilder artifactContents = new StringBuilder();
 
+        Content dataSource = null;
         try {
-            Content dataSource = getDataSource(artifact);
-            if (dataSource == null) {
-                return null;
-            }
+            dataSource = getDataSource(artifact);
+        } catch (TskCoreException tskCoreException) {
+            throw new TextExtractorException("Unable to get datasource for artifact: " + artifact.toString(), tskCoreException);
+        }
+        if (dataSource == null) {
+            throw new TextExtractorException("Datasource was null for artifact: " + artifact.toString());
+        }
 
+        try {
             for (BlackboardAttribute attribute : artifact.getAttributes()) {
                 artifactContents.append(attribute.getAttributeType().getDisplayName());
                 artifactContents.append(" : ");
@@ -119,18 +125,15 @@ class ArtifactTextExtractor implements TextExtractor<BlackboardArtifact> {
                 }
                 artifactContents.append(System.lineSeparator());
             }
-        } catch (TskCoreException ex) {
-            logger.log(Level.SEVERE, "There was a problem getting the atributes for artifact " + artifact.getArtifactID(), ex);
-            return null;
-        }
-        if (artifactContents.length() == 0) {
-            return null;
+        } catch (TskCoreException tskCoreException) {
+            throw new TextExtractorException("Unable to get attributes for artifact: " + artifact.toString(), tskCoreException);
         }
+
         return IOUtils.toInputStream(artifactContents, StandardCharsets.UTF_8);
     }
 
     @Override
-    public Reader getReader(BlackboardArtifact source) throws Ingester.IngesterException {
+    public Reader getReader(BlackboardArtifact source) throws TextExtractorException {
         return new InputStreamReader(getInputStream(source), StandardCharsets.UTF_8);
     }
 
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
index 4b108558d0..799c238b26 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
@@ -5,14 +5,15 @@
  */
 package org.sleuthkit.autopsy.keywordsearch;
 
-import com.google.common.base.Utf8;
 import java.io.IOException;
 import java.io.PushbackReader;
 import java.io.Reader;
+import java.nio.charset.StandardCharsets;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 import javax.annotation.concurrent.NotThreadSafe;
 import org.sleuthkit.autopsy.coreutils.TextUtil;
+import org.sleuthkit.autopsy.keywordsearch.Chunker.Chunk;
 
 /**
  * Encapsulates the content chunking algorithm in an implementation of the
@@ -56,19 +57,18 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
     /** the size in chars of the (base) chunk (so far). */
     private int baseChunkSizeChars;
 
-    /** has the chunker found whitespace to break on? */
-    private boolean whitespaceFound = false;
     /** has the chunker reached the end of the Reader? If so, there are no more
      * chunks, and the current chunk does not need a window. */
     private boolean endOfReaderReached = false;
 
+
     /**
      * Create a Chunker that will chunk the content of the given Reader.
      *
      * @param reader The content to chunk.
      */
     Chunker(Reader reader) {
-        this.reader = new PushbackReader(reader, 2048);
+        this.reader = new PushbackReader(reader, MAX_TOTAL_CHUNK_SIZE);
     }
 
     @Override
@@ -114,7 +114,6 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
         try {
             readBaseChunk();
             baseChunkSizeChars = currentChunk.length();
-//            reader.mark(2048); //mark the reader so we can rewind the reader here to begin the next chunk
             readWindow();
 
         } catch (IOException ioEx) {
@@ -122,7 +121,6 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
         }
         try {
             reader.unread(currentWindow.toString().toCharArray());
-//            reader.reset(); //reset the reader the so the next chunk can begin at the position marked above
         } catch (IOException ex) {
             throw new RuntimeException("IOException while resetting chunk reader.", ex);
         }
@@ -143,10 +141,10 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
      */
     private void readBaseChunk() throws IOException {
         //read the chunk until the minimum base chunk size
-        readHelper(MINIMUM_BASE_CHUNK_SIZE - 1024, false, currentChunk);
+        readHelper(MINIMUM_BASE_CHUNK_SIZE - 1024, currentChunk);
         //keep reading until the maximum base chunk size or white space is reached.
-        whitespaceFound = false;
-        readHelper(MAXIMUM_BASE_CHUNK_SIZE - 1024, true, currentChunk);
+
+        readToWhiteSpaceHelper(MAXIMUM_BASE_CHUNK_SIZE - 1024, currentChunk);
 
     }
 
@@ -158,87 +156,110 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
     private void readWindow() throws IOException {
         //read the window, leaving some room to look for white space to break at.
         int windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE - 1024, chunkSizeBytes + 1024);
-        readHelper(windowEnd, false, currentWindow);
-        whitespaceFound = false;
+        readHelper(windowEnd, currentWindow);
         //keep reading until the max chunk size, or until whitespace is reached.
         windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - 1024, chunkSizeBytes + 1024);
-        readHelper(windowEnd, true, currentWindow);
+        readToWhiteSpaceHelper(windowEnd, currentWindow);
     }
 
-    /** Helper method that implements reading in a loop.
-     *
-     * @param maxBytes           The max cummulative length of the content,in
-     *                           bytes, to read from the Reader. That is, when
-     *                           chunkSizeBytes >= maxBytes stop reading.
-     * @param inWhiteSpaceBuffer Should the current read stop once whitespace is
-     *                           found?
-     *
-     * @throws IOException If there is a problem reading from the Reader.
-     */
-    private void readHelper(int maxBytes, boolean inWhiteSpaceBuffer, StringBuilder currentSegment) throws IOException {
-        //only read one character at a time if we are looking for whitespace.
-        final int readSize = inWhiteSpaceBuffer ? 1 : READ_CHARS_BUFFER_SIZE;
+    private void readHelper(int maxBytes, StringBuilder currentSegment) throws IOException {
 
-        //read chars up to maxBytes, whitespaceFound if also inWhiteSpaceBuffer, or we reach the end of the reader.
+        //read chars up to maxBytes,  or we reach the end of the reader.
         while ((chunkSizeBytes < maxBytes)
-                && (false == (inWhiteSpaceBuffer && whitespaceFound))
                 && (endOfReaderReached == false)) {
-            charsRead = reader.read(tempChunkBuf, 0, readSize);
+            charsRead = reader.read(tempChunkBuf, 0, READ_CHARS_BUFFER_SIZE);
             if (-1 == charsRead) {
                 //this is the last chunk
                 endOfReaderReached = true;
+                return;
             } else {
+                //if the last charcter might be part of a surroate pair, unread it.
+                final char lastChar = tempChunkBuf[charsRead - 1];
+                String chunkSegment;
+                if (Character.isHighSurrogate(lastChar)) {
+                    charsRead--;
+                    chunkSegment = new String(tempChunkBuf, 0, charsRead);
+                    reader.unread(lastChar);
+                } else {
+                    chunkSegment = new String(tempChunkBuf, 0, charsRead);
+                }
 
                 //add read chars to the chunk and update the length.
-                String chunkSegment = new String(tempChunkBuf, 0, charsRead);
-                final int segmentSize = Utf8.encodedLength(chunkSegment);
+                int segmentSize = chunkSegment.getBytes(StandardCharsets.UTF_8).length;
 
                 if (chunkSizeBytes + segmentSize < maxBytes) {
-                    chunkSizeBytes += segmentSize;
                     currentSegment.append(chunkSegment);
+                    chunkSizeBytes = currentSegment.toString().getBytes(StandardCharsets.UTF_8).length;
                 } else {
-                    for (int i = 0; i < charsRead; i++) {
-                        final Character character = tempChunkBuf[i];
-                        int charSize = Utf8.encodedLength(character.toString());
-                        if (chunkSizeBytes + charSize < maxBytes
-                                && (false == (inWhiteSpaceBuffer && whitespaceFound))) {
-                            currentSegment.append(character);
-                            chunkSizeBytes += charSize;
-                            if (inWhiteSpaceBuffer) {
-                                //check for whitespace.
-                                whitespaceFound = Character.isWhitespace(character);
-                            }
-                        } else {
-                            reader.unread(tempChunkBuf, i, charsRead - i);
-                            return;
-                        }
-                    }
+                    reader.unread(tempChunkBuf, 0, charsRead);
+                    return;
                 }
             }
         }
     }
-}
-
-/**
- * Represents one chunk as the text in it and the length of the base chunk, in
- * chars.
- */
-class Chunk {
-
-    private final StringBuilder sb;
-    private final int chunksize;
-
-    Chunk(StringBuilder sb, int baseChunkLength) {
-        this.sb = sb;
-        this.chunksize = baseChunkLength;
-    }
-
-    @Override
-    public String toString() {
-        return sb.toString();
-    }
-
-    int getBaseChunkLength() {
-        return chunksize;
-    }
+
+    private void readToWhiteSpaceHelper(int maxBytes, StringBuilder currentSegment) throws IOException {
+        boolean whitespaceFound = false;
+        //read 1 char at a time up to maxBytes, whitespaceFound, or we reach the end of the reader.
+        while ((chunkSizeBytes < maxBytes)
+                && (whitespaceFound == false)
+                && (endOfReaderReached == false)) {
+            charsRead = reader.read(tempChunkBuf, 0, 1);
+            if (-1 == charsRead) {
+                //this is the last chunk
+                endOfReaderReached = true;
+                return;
+            } else {
+                //if the last charcter might be part of a surroate pair, read another char
+                final char ch = tempChunkBuf[0];
+                String chunkSegment;
+                if (Character.isHighSurrogate(ch)) {
+                    charsRead = reader.read(tempChunkBuf, 0, 1);
+                    if (charsRead == -1) {
+                        currentSegment.append(ch);
+                        chunkSizeBytes = currentSegment.toString().getBytes(StandardCharsets.UTF_8).length;
+                        //this is the last chunk
+                        endOfReaderReached = true;
+                        return;
+                    } else {
+                        chunkSegment = new String(new char[]{ch, tempChunkBuf[0]});
+                    }
+                } else {
+                    chunkSegment = new String(tempChunkBuf, 0, 1);
+                }
+                //check for whitespace.
+                whitespaceFound = Character.isWhitespace(chunkSegment.codePointAt(0));
+                //add read chars to the chunk and update the length.
+                currentSegment.append(chunkSegment);
+                /* this is wrong once we are in the white space but should have
+                 * no negative effect */
+                chunkSizeBytes = currentSegment.toString().getBytes(StandardCharsets.UTF_8).length;
+            }
+        }
+    }
+
+    /**
+     * Represents one chunk as the text in it and the length of the base chunk,
+     * in chars.
+     */
+    static class Chunk {
+
+        private final StringBuilder sb;
+        private final int chunksize;
+
+        Chunk(StringBuilder sb, int baseChunkLength) {
+            this.sb = sb;
+            this.chunksize = baseChunkLength;
+        }
+
+        @Override
+        public String toString() {
+            return sb.toString();
+        }
+
+        int getBaseChunkLength() {
+            return chunksize;
+        }
+    }
+
 }
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileTextExtractor.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileTextExtractor.java
index 55838f4e7f..689f42591b 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileTextExtractor.java
@@ -95,7 +95,7 @@ abstract class FileTextExtractor implements TextExtractor< AbstractFile> {
     abstract boolean isSupported(AbstractFile file, String detectedFormat);
 
     @Override
-    public abstract Reader getReader(AbstractFile source) throws Ingester.IngesterException;
+    public abstract Reader getReader(AbstractFile source) throws TextExtractorException;
 
     @Override
     public long getID(AbstractFile source) {
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HtmlTextExtractor.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HtmlTextExtractor.java
index e758ef86a0..f72b02d1eb 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HtmlTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HtmlTextExtractor.java
@@ -65,7 +65,7 @@ class HtmlTextExtractor extends FileTextExtractor {
     }
 
     @Override
-    public Reader getReader(AbstractFile sourceFile) throws Ingester.IngesterException {
+    public Reader getReader(AbstractFile sourceFile) throws TextExtractorException {
         ReadContentInputStream stream = new ReadContentInputStream(sourceFile);
 
         //Parse the stream with Jericho and put the results in a Reader
@@ -159,7 +159,7 @@ class HtmlTextExtractor extends FileTextExtractor {
             // All done, now make it a reader
             return new StringReader(stringBuilder.toString());
         } catch (IOException ex) {
-            throw new Ingester.IngesterException("Error extracting HTML from content.", ex);
+            throw new TextExtractorException("Error extracting HTML from content.", ex);
         }
     }
 
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
index 9b9ffa689f..a4b36ef651 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
@@ -19,7 +19,6 @@
 package org.sleuthkit.autopsy.keywordsearch;
 
 import java.io.BufferedReader;
-import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.logging.Level;
@@ -29,6 +28,8 @@ import org.openide.util.NbBundle;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.datamodel.ContentUtils;
 import org.sleuthkit.autopsy.ingest.IngestJobContext;
+import org.sleuthkit.autopsy.keywordsearch.Chunker.Chunk;
+import org.sleuthkit.autopsy.keywordsearch.TextExtractor.TextExtractorException;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.DerivedFile;
@@ -170,10 +171,11 @@ class Ingester {
                     throw new IngesterException(String.format("Error ingesting (indexing) file chunk: %s", chunkId), ex);
                 }
             }
-        } catch (IOException ex) {
+        } catch (TextExtractorException ex) {
             extractor.logWarning("Unable to read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
             return false;
-        } catch (Exception ex) {
+        } //NON-NLS
+        catch (Exception ex) {
             extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
             return false;
         } finally {
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/StringsTextExtractor.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/StringsTextExtractor.java
index 23c49c255f..919510332b 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/StringsTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/StringsTextExtractor.java
@@ -107,7 +107,7 @@ class StringsTextExtractor extends FileTextExtractor {
     }
 
     @Override
-    public InputStreamReader getReader(AbstractFile sourceFile) throws Ingester.IngesterException {
+    public InputStreamReader getReader(AbstractFile sourceFile) throws TextExtractorException {
         InputStream stringStream = getInputStream(sourceFile);
         return new InputStreamReader(stringStream, Server.DEFAULT_INDEXED_TEXT_CHARSET);
     }
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextExtractor.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextExtractor.java
index 6ea27e733b..94abb940eb 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextExtractor.java
@@ -30,7 +30,6 @@ import org.sleuthkit.datamodel.SleuthkitVisitableItem;
  */
 interface TextExtractor< TextSource extends SleuthkitVisitableItem> {
 
-
     /**
      * Is this extractor configured such that no extraction will/should be done?
      *
@@ -46,7 +45,6 @@ interface TextExtractor< TextSource extends SleuthkitVisitableItem> {
      */
     abstract void logWarning(String msg, Exception ex);
 
-
     /**
      * Get a reader that over the text extracted from the given source.
      *
@@ -57,7 +55,7 @@ interface TextExtractor< TextSource extends SleuthkitVisitableItem> {
      *
      * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
      */
-    abstract Reader getReader(TextSource source) throws Ingester.IngesterException;
+    abstract Reader getReader(TextSource source) throws TextExtractorException;
 
     /**
      * Get the 'object' id of the given source.
@@ -76,4 +74,15 @@ interface TextExtractor< TextSource extends SleuthkitVisitableItem> {
      * @return
      */
     abstract String getName(TextSource source);
+
+    class TextExtractorException extends Exception {
+
+        public TextExtractorException(String message) {
+            super(message);
+        }
+
+        public TextExtractorException(String message, Throwable cause) {
+            super(message, cause);
+        }
+    }
 }
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java
index 7ac5392016..3a494be9c6 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java
@@ -36,7 +36,6 @@ import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.openide.util.NbBundle;
 import org.sleuthkit.autopsy.coreutils.Logger;
-import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.ReadContentInputStream;
 
@@ -67,7 +66,7 @@ class TikaTextExtractor extends FileTextExtractor {
     }
 
     @Override
-    public Reader getReader(AbstractFile sourceFile) throws IngesterException, MissingResourceException {
+    public Reader getReader(AbstractFile sourceFile) throws TextExtractorException, MissingResourceException {
         ReadContentInputStream stream = new ReadContentInputStream(sourceFile);
 
         Metadata metadata = new Metadata();
@@ -81,12 +80,12 @@ class TikaTextExtractor extends FileTextExtractor {
         } catch (TimeoutException te) {
             final String msg = NbBundle.getMessage(this.getClass(), "AbstractFileTikaTextExtract.index.tikaParseTimeout.text", sourceFile.getId(), sourceFile.getName());
             logWarning(msg, te);
-            throw new IngesterException(msg);
+            throw new TextExtractorException(msg, te);
         } catch (Exception ex) {
             KeywordSearch.getTikaLogger().log(Level.WARNING, "Exception: Unable to Tika parse the content" + sourceFile.getId() + ": " + sourceFile.getName(), ex.getCause()); //NON-NLS
             final String msg = NbBundle.getMessage(this.getClass(), "AbstractFileTikaTextExtract.index.exception.tikaParse.msg", sourceFile.getId(), sourceFile.getName());
             logWarning(msg, ex);
-            throw new IngesterException(msg, ex);
+            throw new TextExtractorException(msg, ex);
         }
     }
 

From 23afb4515e7b75135483691cc859a8586ce81c9b Mon Sep 17 00:00:00 2001
From: millmanorama <millmanorama@gmail.com>
Date: Wed, 18 Jan 2017 13:04:50 +0100
Subject: [PATCH 5/8] refactor exception handling of Chunker

---
 .../autopsy/keywordsearch/Chunker.java        | 24 +++++++++++--------
 .../autopsy/keywordsearch/Ingester.java       | 21 +++++++---------
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
index 799c238b26..2583a4e4c0 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
@@ -60,7 +60,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
     /** has the chunker reached the end of the Reader? If so, there are no more
      * chunks, and the current chunk does not need a window. */
     private boolean endOfReaderReached = false;
-
+    private Exception ex;
 
     /**
      * Create a Chunker that will chunk the content of the given Reader.
@@ -76,9 +76,18 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
         return this;
     }
 
+    boolean hasException() {
+        return ex != null;
+    }
+
+    public Exception getException() {
+        return ex;
+    }
+
     @Override
     public boolean hasNext() {
-        return endOfReaderReached == false;
+        return (ex == null)
+                && (endOfReaderReached == false);
     }
 
     /**
@@ -102,7 +111,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
 
     @Override
     public Chunk next() {
-        if (endOfReaderReached) {
+        if (hasNext() == false) {
             throw new NoSuchElementException("There are no more chunks.");
         }
         //reset state for the next chunk
@@ -115,14 +124,9 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
             readBaseChunk();
             baseChunkSizeChars = currentChunk.length();
             readWindow();
-
-        } catch (IOException ioEx) {
-            throw new RuntimeException("IOException while reading chunk.", ioEx);
-        }
-        try {
             reader.unread(currentWindow.toString().toCharArray());
-        } catch (IOException ex) {
-            throw new RuntimeException("IOException while resetting chunk reader.", ex);
+        } catch (IOException ioEx) {
+            ex = ioEx;
         }
 
         if (endOfReaderReached) {
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
index a4b36ef651..0dc221356c 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
@@ -29,7 +29,6 @@ import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.datamodel.ContentUtils;
 import org.sleuthkit.autopsy.ingest.IngestJobContext;
 import org.sleuthkit.autopsy.keywordsearch.Chunker.Chunk;
-import org.sleuthkit.autopsy.keywordsearch.TextExtractor.TextExtractorException;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.DerivedFile;
@@ -145,8 +144,8 @@ class Ingester {
         int numChunks = 0; //unknown until chunking is done
 
         if (extractor.isDisabled()) {
-            /* some Extrctors, notable the strings extractor, have options which
-             * can be configured such that no extraction should be done */
+            /* some Extractors, notable the strings extractor, have options
+             * which can be configured such that no extraction should be done */
             return true;
         }
 
@@ -167,15 +166,13 @@ class Ingester {
                             + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
 
                     throw ingEx; //need to rethrow to signal error and move on
-                } catch (Exception ex) {
-                    throw new IngesterException(String.format("Error ingesting (indexing) file chunk: %s", chunkId), ex);
                 }
             }
-        } catch (TextExtractorException ex) {
-            extractor.logWarning("Unable to read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
-            return false;
-        } //NON-NLS
-        catch (Exception ex) {
+            if (chunker.hasException()) {
+                extractor.logWarning("Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
+                return false;
+            }
+        } catch (Exception ex) {
             extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
             return false;
         } finally {
@@ -189,7 +186,7 @@ class Ingester {
     }
 
     /**
-     * Add one chunk as to the Solr index as a seperate sold document.
+     * Add one chunk as to the Solr index as a separate Solr document.
      *
      * TODO see if can use a byte or string streaming way to add content to
      * /update handler e.g. with XMLUpdateRequestHandler (deprecated in SOlr
@@ -229,7 +226,7 @@ class Ingester {
             uncommitedIngests = true;
 
         } catch (KeywordSearchModuleException ex) {
-            //JMTODO: does this need to ne internationalized?
+            //JMTODO: does this need to be internationalized?
             throw new IngesterException(
                     NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
         }

From 851c0721eddbababf6f3b3a62e556d9a89f1ea5d Mon Sep 17 00:00:00 2001
From: millmanorama <millmanorama@gmail.com>
Date: Thu, 19 Jan 2017 12:30:03 +0100
Subject: [PATCH 6/8] correct size calculations

---
 .../autopsy/keywordsearch/Chunker.java        | 31 +++++++++++++------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
index 2583a4e4c0..ef1d06d98f 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
@@ -133,6 +133,11 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
             /* if we have reached the end of the content,we won't make another
              * overlapping chunk, so the base chunk can be extended to the end. */
             baseChunkSizeChars = currentChunk.length();
+            System.out.println("chunksize: " + baseChunkSizeChars);
+            System.out.println("no window");
+        } else {
+            System.out.println("chunksize: " + baseChunkSizeChars);
+            System.out.println("window size: " + (currentChunk.length() - baseChunkSizeChars));
         }
         //sanitize the text and return a Chunk object, that includes the base chunk length.
         return new Chunk(sanitizeToUTF8(currentChunk), baseChunkSizeChars);
@@ -145,10 +150,12 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
      */
     private void readBaseChunk() throws IOException {
         //read the chunk until the minimum base chunk size
-        readHelper(MINIMUM_BASE_CHUNK_SIZE - 1024, currentChunk);
-        //keep reading until the maximum base chunk size or white space is reached.
+        readHelper(MINIMUM_BASE_CHUNK_SIZE, currentChunk);
+        System.out.println("base chunk 1: " + chunkSizeBytes);
 
-        readToWhiteSpaceHelper(MAXIMUM_BASE_CHUNK_SIZE - 1024, currentChunk);
+        //keep reading until the maximum base chunk size or white space is reached.
+        readToWhiteSpaceHelper(MAXIMUM_BASE_CHUNK_SIZE, currentChunk);
+        System.out.println("base chunk 2: " + chunkSizeBytes);
 
     }
 
@@ -159,11 +166,15 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
      */
     private void readWindow() throws IOException {
         //read the window, leaving some room to look for white space to break at.
-        int windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE - 1024, chunkSizeBytes + 1024);
-        readHelper(windowEnd, currentWindow);
+//        int windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE, chunkSizeBytes + 1024);
+        readHelper(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE, currentWindow);
+        System.out.println("window chunk 1: " + chunkSizeBytes);
+
         //keep reading until the max chunk size, or until whitespace is reached.
-        windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - 1024, chunkSizeBytes + 1024);
-        readToWhiteSpaceHelper(windowEnd, currentWindow);
+//        windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE, chunkSizeBytes + 1024);
+        readToWhiteSpaceHelper(MAX_TOTAL_CHUNK_SIZE, currentWindow);
+        System.out.println("window chunk 2: " + chunkSizeBytes);
+
     }
 
     private void readHelper(int maxBytes, StringBuilder currentSegment) throws IOException {
@@ -193,7 +204,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
 
                 if (chunkSizeBytes + segmentSize < maxBytes) {
                     currentSegment.append(chunkSegment);
-                    chunkSizeBytes = currentSegment.toString().getBytes(StandardCharsets.UTF_8).length;
+                    chunkSizeBytes += segmentSize;
                 } else {
                     reader.unread(tempChunkBuf, 0, charsRead);
                     return;
@@ -221,7 +232,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
                     charsRead = reader.read(tempChunkBuf, 0, 1);
                     if (charsRead == -1) {
                         currentSegment.append(ch);
-                        chunkSizeBytes = currentSegment.toString().getBytes(StandardCharsets.UTF_8).length;
+                        chunkSizeBytes += new Character(ch).toString().getBytes(StandardCharsets.UTF_8).length;
                         //this is the last chunk
                         endOfReaderReached = true;
                         return;
@@ -237,7 +248,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
                 currentSegment.append(chunkSegment);
                 /* this is wrong once we are in the white space but should have
                  * no negative effect */
-                chunkSizeBytes = currentSegment.toString().getBytes(StandardCharsets.UTF_8).length;
+                chunkSizeBytes += chunkSegment.getBytes(StandardCharsets.UTF_8).length;
             }
         }
     }

From d490dfc44bd62bd811c81531a4f99928b65c72ea Mon Sep 17 00:00:00 2001
From: millmanorama <millmanorama@gmail.com>
Date: Thu, 19 Jan 2017 16:11:37 +0100
Subject: [PATCH 7/8] fix window overlapping

---
 .../autopsy/keywordsearch/Chunker.java        | 37 +++++++------------
 1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
index ef1d06d98f..9f3a99d5e5 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
@@ -124,21 +124,19 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
             readBaseChunk();
             baseChunkSizeChars = currentChunk.length();
             readWindow();
-            reader.unread(currentWindow.toString().toCharArray());
+            if (endOfReaderReached) {
+                /* if we have reached the end of the content,we won't make
+                 * another overlapping chunk, so the length of the base chunk
+                 * can be extended                 * to the end. */
+                baseChunkSizeChars = currentChunk.length();
+            } else {
+                reader.unread(currentWindow.toString().toCharArray());
+            }
         } catch (IOException ioEx) {
             ex = ioEx;
         }
-
-        if (endOfReaderReached) {
-            /* if we have reached the end of the content,we won't make another
-             * overlapping chunk, so the base chunk can be extended to the end. */
-            baseChunkSizeChars = currentChunk.length();
-            System.out.println("chunksize: " + baseChunkSizeChars);
-            System.out.println("no window");
-        } else {
-            System.out.println("chunksize: " + baseChunkSizeChars);
-            System.out.println("window size: " + (currentChunk.length() - baseChunkSizeChars));
-        }
+        //add the window text to the current chunk.
+        currentChunk.append(currentWindow);
         //sanitize the text and return a Chunk object, that includes the base chunk length.
         return new Chunk(sanitizeToUTF8(currentChunk), baseChunkSizeChars);
     }
@@ -151,12 +149,9 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
     private void readBaseChunk() throws IOException {
         //read the chunk until the minimum base chunk size
         readHelper(MINIMUM_BASE_CHUNK_SIZE, currentChunk);
-        System.out.println("base chunk 1: " + chunkSizeBytes);
 
         //keep reading until the maximum base chunk size or white space is reached.
         readToWhiteSpaceHelper(MAXIMUM_BASE_CHUNK_SIZE, currentChunk);
-        System.out.println("base chunk 2: " + chunkSizeBytes);
-
     }
 
     /**
@@ -166,15 +161,10 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
      */
     private void readWindow() throws IOException {
         //read the window, leaving some room to look for white space to break at.
-//        int windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE, chunkSizeBytes + 1024);
         readHelper(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE, currentWindow);
-        System.out.println("window chunk 1: " + chunkSizeBytes);
 
         //keep reading until the max chunk size, or until whitespace is reached.
-//        windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE, chunkSizeBytes + 1024);
         readToWhiteSpaceHelper(MAX_TOTAL_CHUNK_SIZE, currentWindow);
-        System.out.println("window chunk 2: " + chunkSizeBytes);
-
     }
 
     private void readHelper(int maxBytes, StringBuilder currentSegment) throws IOException {
@@ -229,7 +219,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
                 final char ch = tempChunkBuf[0];
                 String chunkSegment;
                 if (Character.isHighSurrogate(ch)) {
-                    charsRead = reader.read(tempChunkBuf, 0, 1);
+                    charsRead = reader.read(tempChunkBuf, 1, 1);
                     if (charsRead == -1) {
                         currentSegment.append(ch);
                         chunkSizeBytes += new Character(ch).toString().getBytes(StandardCharsets.UTF_8).length;
@@ -237,7 +227,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
                         endOfReaderReached = true;
                         return;
                     } else {
-                        chunkSegment = new String(new char[]{ch, tempChunkBuf[0]});
+                        chunkSegment = new String(tempChunkBuf, 0, 2);
                     }
                 } else {
                     chunkSegment = new String(tempChunkBuf, 0, 1);
@@ -246,8 +236,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
                 whitespaceFound = Character.isWhitespace(chunkSegment.codePointAt(0));
                 //add read chars to the chunk and update the length.
                 currentSegment.append(chunkSegment);
-                /* this is wrong once we are in the white space but should have
-                 * no negative effect */
+
                 chunkSizeBytes += chunkSegment.getBytes(StandardCharsets.UTF_8).length;
             }
         }

From 363b5c2cce4f96a4943de26169ee3a704e1a747d Mon Sep 17 00:00:00 2001
From: millmanorama <millmanorama@gmail.com>
Date: Thu, 19 Jan 2017 16:56:43 +0100
Subject: [PATCH 8/8] comments and some minor refactoring

---
 .../sleuthkit/autopsy/casemodule/Case.java    |   8 +-
 .../CoordinationService.java                  | 253 +++++++++---------
 .../CoordinationServiceNamespace.java         |   4 +-
 .../autoingest/AutoIngestJobLogger.java       |   2 +-
 .../autoingest/AutoIngestManager.java         |   2 +-
 .../configuration/SharedConfiguration.java    |   4 +-
 .../autopsy/keywordsearch/Chunker.java        | 111 +++++---
 7 files changed, 209 insertions(+), 175 deletions(-)

diff --git a/Core/src/org/sleuthkit/autopsy/casemodule/Case.java b/Core/src/org/sleuthkit/autopsy/casemodule/Case.java
index 3beac08648..d681e81b48 100644
--- a/Core/src/org/sleuthkit/autopsy/casemodule/Case.java
+++ b/Core/src/org/sleuthkit/autopsy/casemodule/Case.java
@@ -1008,7 +1008,7 @@ public class Case implements SleuthkitCase.ErrorObserver {
                     // The shared lock needs to be created on a special thread so it can be released
                     // from the same thread.
                     Future<Void> future = getCurrentCaseExecutor().submit(() -> {
-                        currentCaseLock = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot()).tryGetSharedLock(CoordinationService.CategoryNode.CASES, caseDir);
+                        currentCaseLock = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot()).tryGetSharedLock(CoordinationService.CategoryNode.CASES, caseDir);
                         if (null == currentCaseLock) {
                             throw new CaseActionException(NbBundle.getMessage(Case.class, "Case.exception.errorLocking", CaseMetadata.getFileExtension()));
                         }
@@ -1019,7 +1019,7 @@ public class Case implements SleuthkitCase.ErrorObserver {
                     // The exclusive lock uses the unique case name.
                     // This lock does not need to be on a special thread since it will be released before
                     // leaving this method
-                    exclusiveResourceLock = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.RESOURCE, 
+                    exclusiveResourceLock = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.RESOURCE, 
                             dbName, 12, TimeUnit.HOURS);
                     if (null == exclusiveResourceLock) {
                         throw new CaseActionException(NbBundle.getMessage(Case.class, "Case.exception.errorLocking", CaseMetadata.getFileExtension()));
@@ -1269,7 +1269,7 @@ public class Case implements SleuthkitCase.ErrorObserver {
                     // The shared lock needs to be created on a special thread so it can be released
                     // from the same thread.
                     Future<Void> future = getCurrentCaseExecutor().submit(() -> {
-                        currentCaseLock = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot()).tryGetSharedLock(CoordinationService.CategoryNode.CASES, metadata.getCaseDirectory());
+                        currentCaseLock = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot()).tryGetSharedLock(CoordinationService.CategoryNode.CASES, metadata.getCaseDirectory());
                         if (null == currentCaseLock) {
                             throw new CaseActionException(NbBundle.getMessage(Case.class, "Case.exception.errorLocking", CaseMetadata.getFileExtension()));
                         }
@@ -1280,7 +1280,7 @@ public class Case implements SleuthkitCase.ErrorObserver {
                     // The exclusive lock uses the unique case name
                     // This lock does not need to be on a special thread since it will be released before
                     // leaving this method
-                    exclusiveResourceLock = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.RESOURCE, 
+                    exclusiveResourceLock = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.RESOURCE, 
                             metadata.getCaseDatabaseName(), 12, TimeUnit.HOURS);
                     if (null == exclusiveResourceLock) {
                         throw new CaseActionException(NbBundle.getMessage(Case.class, "Case.exception.errorLocking", CaseMetadata.getFileExtension()));
diff --git a/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationService.java b/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationService.java
index adc89d4afa..39c5c25ba6 100644
--- a/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationService.java
+++ b/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationService.java
@@ -1,7 +1,7 @@
 /*
  * Autopsy Forensic Browser
  *
- * Copyright 2011-2017 Basis Technology Corp.
+ * Copyright 2015 Basis Technology Corp.
  * Contact: carrier <at> sleuthkit <dot> org
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,89 +18,129 @@
  */
 package org.sleuthkit.autopsy.coordinationservice;
 
-import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.ZooDefs;
 import org.apache.curator.RetryPolicy;
+import org.apache.curator.retry.ExponentialBackoffRetry;
 import org.apache.curator.framework.CuratorFramework;
 import org.apache.curator.framework.CuratorFrameworkFactory;
 import org.apache.curator.framework.recipes.locks.InterProcessMutex;
 import org.apache.curator.framework.recipes.locks.InterProcessReadWriteLock;
-import org.apache.curator.retry.ExponentialBackoffRetry;
-import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
-import org.apache.zookeeper.KeeperException.NoNodeException;
-import org.apache.zookeeper.WatchedEvent;
-import org.apache.zookeeper.ZooDefs;
-import org.apache.zookeeper.ZooKeeper;
 import org.sleuthkit.autopsy.core.UserPreferences;
+import java.io.IOException;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.KeeperException.NoNodeException;
 
 /**
- * A coordination service for maintaining configuration information and
- * providing distributed synchronization using a shared hierarchical namespace
- * of nodes.
- *
- * TODO (JIRA 2205): Simple refactoring for general use.
+ * A centralized service for maintaining configuration information and providing
+ * distributed synchronization using a shared hierarchical namespace of nodes.
  */
 public final class CoordinationService {
 
+    /**
+     * Category nodes are the immediate children of the root node of a shared
+     * hierarchical namespace managed by the coordination service.
+     */
+    public enum CategoryNode { // RJCTODO: Move this to CoordinationServiceNamespace
+
+        CASES("cases"),
+        MANIFESTS("manifests"),
+        CONFIG("config"),
+        RESOURCE("resource");
+
+        private final String displayName;
+
+        private CategoryNode(String displayName) {
+            this.displayName = displayName;
+        }
+
+        public String getDisplayName() {
+            return displayName;
+        }
+    }
+
+    /**
+     * Exception type thrown by the coordination service.
+     */
+    public final static class CoordinationServiceException extends Exception {
+
+        private static final long serialVersionUID = 1L;
+
+        private CoordinationServiceException(String message) {
+            super(message);
+        }
+
+        private CoordinationServiceException(String message, Throwable cause) {
+            super(message, cause);
+        }
+    }
+
+    /**
+     * An opaque encapsulation of a lock for use in distributed synchronization.
+     * Instances are obtained by calling a get lock method and must be passed to
+     * a release lock method.
+     */
+    public static class Lock implements AutoCloseable {
+
+        /**
+         * This implementation uses the Curator read/write lock. see
+         * http://curator.apache.org/curator-recipes/shared-reentrant-read-write-lock.html
+         */
+        private final InterProcessMutex interProcessLock;
+        private final String nodePath;
+
+        private Lock(String nodePath, InterProcessMutex lock) {
+            this.nodePath = nodePath;
+            this.interProcessLock = lock;
+        }
+
+        public String getNodePath() {
+            return nodePath;
+        }
+
+        public void release() throws CoordinationServiceException {
+            try {
+                this.interProcessLock.release();
+            } catch (Exception ex) {
+                throw new CoordinationServiceException(String.format("Failed to release the lock on %s", nodePath), ex);
+            }
+        }
+
+        @Override
+        public void close() throws CoordinationServiceException {
+            release();
+        }
+    }
+
     private static CuratorFramework curator = null;
     private static final Map<String, CoordinationService> rootNodesToServices = new HashMap<>();
+    private final Map<String, String> categoryNodeToPath = new HashMap<>();
     private static final int SESSION_TIMEOUT_MILLISECONDS = 300000;
     private static final int CONNECTION_TIMEOUT_MILLISECONDS = 300000;
     private static final int ZOOKEEPER_SESSION_TIMEOUT_MILLIS = 3000;
     private static final int ZOOKEEPER_CONNECTION_TIMEOUT_MILLIS = 15000;
-    private static final int PORT_OFFSET = 1000; // When run in Solr, ZooKeeper defaults to Solr port + 1000
-    private final Map<String, String> categoryNodeToPath = new HashMap<>();
+    private static final int PORT_OFFSET = 1000;
 
     /**
-     * Determines if ZooKeeper is accessible with the current settings. Closes
-     * the connection prior to returning.
-     *
-     * @return true if a connection was achieved, false otherwise
-     *
-     * @throws InterruptedException
-     * @throws IOException
-     */
-    private static boolean isZooKeeperAccessible() throws InterruptedException, IOException {
-        boolean result = false;
-        Object workerThreadWaitNotifyLock = new Object();
-        int zooKeeperServerPort = Integer.valueOf(UserPreferences.getIndexingServerPort()) + PORT_OFFSET;
-        String connectString = UserPreferences.getIndexingServerHost() + ":" + zooKeeperServerPort;
-        ZooKeeper zooKeeper = new ZooKeeper(connectString, ZOOKEEPER_SESSION_TIMEOUT_MILLIS,
-                (WatchedEvent event) -> {
-                    synchronized (workerThreadWaitNotifyLock) {
-                        workerThreadWaitNotifyLock.notify();
-                    }
-                });
-        synchronized (workerThreadWaitNotifyLock) {
-            workerThreadWaitNotifyLock.wait(ZOOKEEPER_CONNECTION_TIMEOUT_MILLIS);
-        }
-        ZooKeeper.States state = zooKeeper.getState();
-        if (state == ZooKeeper.States.CONNECTED || state == ZooKeeper.States.CONNECTEDREADONLY) {
-            result = true;
-        }
-        zooKeeper.close();
-        return result;
-    }
-
-    /**
-     * Gets a coordination service for a specific namespace.
+     * Gets an instance of the centralized coordination service for a specific
+     * namespace.
      *
      * @param rootNode The name of the root node that defines the namespace.
      *
-     * @return The coordination service.
+     * @return The service for the namespace defined by the root node name.
      *
-     * @throws CoordinationServiceException If an instance of the coordination
+     * @throws CoordinationServiceException If an instaNce of the coordination
      *                                      service cannot be created.
      */
-    public static synchronized CoordinationService getServiceForNamespace(String rootNode) throws CoordinationServiceException {
-        /*
-         * Connect to ZooKeeper via Curator.
-         */
+    public static synchronized CoordinationService getInstance(String rootNode) throws CoordinationServiceException {
         if (null == curator) {
             RetryPolicy retryPolicy = new ExponentialBackoffRetry(1000, 3);
+            // When run in Solr, ZooKeeper defaults to Solr port + 1000
             int zooKeeperServerPort = Integer.valueOf(UserPreferences.getIndexingServerPort()) + PORT_OFFSET;
             String connectString = UserPreferences.getIndexingServerHost() + ":" + zooKeeperServerPort;
             curator = CuratorFrameworkFactory.newClient(connectString, SESSION_TIMEOUT_MILLISECONDS, CONNECTION_TIMEOUT_MILLISECONDS, retryPolicy);
@@ -117,7 +157,7 @@ public final class CoordinationService {
             CoordinationService service;
             try {
                 service = new CoordinationService(rootNode);
-            } catch (IOException | InterruptedException | KeeperException | CoordinationServiceException ex) {
+            } catch (Exception ex) {
                 curator = null;
                 throw new CoordinationServiceException("Failed to create coordination service", ex);
             }
@@ -127,18 +167,15 @@ public final class CoordinationService {
     }
 
     /**
-     * Constructs an instance of the coordination service for a specific
-     * namespace.
+     * Constructs an instance of the centralized coordination service for a
+     * specific namespace.
      *
      * @param rootNodeName The name of the root node that defines the namespace.
-     *
-     * @throws Exception (calls Curator methods that throw Exception instead of
-     *                   more specific exceptions)
      */
-    private CoordinationService(String rootNodeName) throws InterruptedException, IOException, KeeperException, CoordinationServiceException {
+    private CoordinationService(String rootNodeName) throws Exception {
 
         if (false == isZooKeeperAccessible()) {
-            throw new CoordinationServiceException("Unable to access ZooKeeper");
+            throw new Exception("Unable to access ZooKeeper");
         }
 
         String rootNode = rootNodeName;
@@ -154,8 +191,6 @@ public final class CoordinationService {
                 if (ex.code() != KeeperException.Code.NODEEXISTS) {
                     throw ex;
                 }
-            } catch (Exception ex) {
-                throw new CoordinationServiceException("Curator experienced an error", ex);
             }
             categoryNodeToPath.put(node.getDisplayName(), nodePath);
         }
@@ -350,77 +385,35 @@ public final class CoordinationService {
     }
 
     /**
-     * Exception type thrown by the coordination service.
+     * Determines if ZooKeeper is accessible with the current settings. Closes
+     * the connection prior to returning.
+     *
+     * @return true if a connection was achieved, false otherwise
      */
-    public final static class CoordinationServiceException extends Exception {
+    private static boolean isZooKeeperAccessible() {
+        boolean result = false;
+        Object workerThreadWaitNotifyLock = new Object();
+        int zooKeeperServerPort = Integer.valueOf(UserPreferences.getIndexingServerPort()) + PORT_OFFSET;
+        String connectString = UserPreferences.getIndexingServerHost() + ":" + zooKeeperServerPort;
 
-        private static final long serialVersionUID = 1L;
+        try {
+            ZooKeeper zooKeeper = new ZooKeeper(connectString, ZOOKEEPER_SESSION_TIMEOUT_MILLIS,
+                    (WatchedEvent event) -> {
 
-        private CoordinationServiceException(String message) {
-            super(message);
-        }
-
-        private CoordinationServiceException(String message, Throwable cause) {
-            super(message, cause);
-        }
-    }
-
-    /**
-     * An opaque encapsulation of a lock for use in distributed synchronization.
-     * Instances are obtained by calling a get lock method and must be passed to
-     * a release lock method.
-     */
-    public static class Lock implements AutoCloseable {
-
-        /**
-         * This implementation uses the Curator read/write lock. see
-         * http://curator.apache.org/curator-recipes/shared-reentrant-read-write-lock.html
-         */
-        private final InterProcessMutex interProcessLock;
-        private final String nodePath;
-
-        private Lock(String nodePath, InterProcessMutex lock) {
-            this.nodePath = nodePath;
-            this.interProcessLock = lock;
-        }
-
-        public String getNodePath() {
-            return nodePath;
-        }
-
-        public void release() throws CoordinationServiceException {
-            try {
-                this.interProcessLock.release();
-            } catch (Exception ex) {
-                throw new CoordinationServiceException(String.format("Failed to release the lock on %s", nodePath), ex);
+                        synchronized (workerThreadWaitNotifyLock) {
+                            workerThreadWaitNotifyLock.notify();
+                        }
+                    });
+            synchronized (workerThreadWaitNotifyLock) {
+                workerThreadWaitNotifyLock.wait(ZOOKEEPER_CONNECTION_TIMEOUT_MILLIS);
             }
+            ZooKeeper.States state = zooKeeper.getState();
+            if (state == ZooKeeper.States.CONNECTED || state == ZooKeeper.States.CONNECTEDREADONLY) {
+                result = true;
+            }
+            zooKeeper.close();
+        } catch (InterruptedException | IOException ignored) {
         }
-
-        @Override
-        public void close() throws CoordinationServiceException {
-            release();
-        }
-    }
-
-    /**
-     * Category nodes are the immediate children of the root node of a shared
-     * hierarchical namespace managed by a coordination service.
-     */
-    public enum CategoryNode {
-
-        CASES("cases"),
-        MANIFESTS("manifests"),
-        CONFIG("config"),
-        RESOURCE("resource");
-
-        private final String displayName;
-
-        private CategoryNode(String displayName) {
-            this.displayName = displayName;
-        }
-
-        public String getDisplayName() {
-            return displayName;
-        }
+        return result;
     }
 }
diff --git a/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationServiceNamespace.java b/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationServiceNamespace.java
index 567dd38bc6..e1f2a3df42 100644
--- a/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationServiceNamespace.java
+++ b/Core/src/org/sleuthkit/autopsy/coordinationservice/CoordinationServiceNamespace.java
@@ -1,7 +1,7 @@
 /*
  * Autopsy Forensic Browser
  *
- * Copyright 2016-2017 Basis Technology Corp.
+ * Copyright 2015 Basis Technology Corp.
  * Contact: carrier <at> sleuthkit <dot> org
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -19,7 +19,7 @@
 package org.sleuthkit.autopsy.coordinationservice;
 
 /**
- * Root node for Autopsy coordination service namespace.
+ * Namespace elements for auto ingest coordination service nodes.
  */
 public final class CoordinationServiceNamespace {
     private static final String ROOT = "autopsy";
diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestJobLogger.java b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestJobLogger.java
index e9406950fd..3165ad23c9 100644
--- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestJobLogger.java
+++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestJobLogger.java
@@ -431,7 +431,7 @@ final class AutoIngestJobLogger {
      *                                      log file.
      */
     private void log(MessageCategory category, String message) throws AutoIngestJobLoggerException, InterruptedException {
-        try (Lock lock = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.CASES, getLogPath(caseDirectoryPath).toString(), LOCK_TIME_OUT, LOCK_TIME_OUT_UNIT)) {
+        try (Lock lock = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot()).tryGetExclusiveLock(CoordinationService.CategoryNode.CASES, getLogPath(caseDirectoryPath).toString(), LOCK_TIME_OUT, LOCK_TIME_OUT_UNIT)) {
             if (null != lock) {
                 File logFile = getLogPath(caseDirectoryPath).toFile();
                 try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(logFile, logFile.exists())), true)) {
diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java
index bb55281227..867ae631e8 100644
--- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java
+++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutoIngestManager.java
@@ -232,7 +232,7 @@ public final class AutoIngestManager extends Observable implements PropertyChang
     void startUp() throws AutoIngestManagerStartupException {
         SYS_LOGGER.log(Level.INFO, "Auto ingest starting");
         try {
-            coordinationService = CoordinationService.getServiceForNamespace(CoordinationServiceNamespace.getRoot());
+            coordinationService = CoordinationService.getInstance(CoordinationServiceNamespace.getRoot());
         } catch (CoordinationServiceException ex) {
             throw new AutoIngestManagerStartupException("Failed to get coordination service", ex);
         }
diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/SharedConfiguration.java b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/SharedConfiguration.java
index 653fc65807..7599ed4648 100644
--- a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/SharedConfiguration.java
+++ b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/SharedConfiguration.java
@@ -160,7 +160,7 @@ public class SharedConfiguration {
 
         File remoteFolder = getSharedFolder();
 
-        try (Lock writeLock = CoordinationService.getServiceForNamespace(LOCK_ROOT).tryGetExclusiveLock(CoordinationService.CategoryNode.CONFIG, remoteFolder.getAbsolutePath(), 30, TimeUnit.MINUTES)) {
+        try (Lock writeLock = CoordinationService.getInstance(LOCK_ROOT).tryGetExclusiveLock(CoordinationService.CategoryNode.CONFIG, remoteFolder.getAbsolutePath(), 30, TimeUnit.MINUTES)) {
             if (writeLock == null) {
                 logger.log(Level.INFO, String.format("Failed to lock %s - another node is currently uploading or downloading configuration", remoteFolder.getAbsolutePath()));
                 return SharedConfigResult.LOCKED;
@@ -230,7 +230,7 @@ public class SharedConfiguration {
 
         File remoteFolder = getSharedFolder();
 
-        try (Lock readLock = CoordinationService.getServiceForNamespace(LOCK_ROOT).tryGetSharedLock(CoordinationService.CategoryNode.CONFIG, remoteFolder.getAbsolutePath(), 30, TimeUnit.MINUTES)) {
+        try (Lock readLock = CoordinationService.getInstance(LOCK_ROOT).tryGetSharedLock(CoordinationService.CategoryNode.CONFIG, remoteFolder.getAbsolutePath(), 30, TimeUnit.MINUTES)) {
             if (readLock == null) {
                 return SharedConfigResult.LOCKED;
             }
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
index 9f3a99d5e5..3386472d03 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java
@@ -1,7 +1,20 @@
 /*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
+ * Autopsy Forensic Browser
+ *
+ * Copyright 2011-2016 Basis Technology Corp.
+ * Contact: carrier <at> sleuthkit <dot> org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 package org.sleuthkit.autopsy.keywordsearch;
 
@@ -46,20 +59,13 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
     private final PushbackReader reader;
     /** The local buffer of characters read from the Reader. */
     private final char[] tempChunkBuf = new char[READ_CHARS_BUFFER_SIZE];
-    /** number of chars read in the most recent read operation. */
-    private int charsRead = 0;
 
-    /** The text of the current chunk (so far). */
-    private StringBuilder currentChunk;
-    private StringBuilder currentWindow;
     /** the size in bytes of the chunk (so far). */
     private int chunkSizeBytes = 0;
-    /** the size in chars of the (base) chunk (so far). */
-    private int baseChunkSizeChars;
-
-    /** has the chunker reached the end of the Reader? If so, there are no more
+    /** Has the chunker reached the end of the Reader? If so, there are no more
      * chunks, and the current chunk does not need a window. */
     private boolean endOfReaderReached = false;
+    /** Store any exception encountered reading from the Reader. */
     private Exception ex;
 
     /**
@@ -68,6 +74,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
      * @param reader The content to chunk.
      */
     Chunker(Reader reader) {
+        //Using MAX_TOTAL_CHUNK_SIZE is safe but probably overkill.
         this.reader = new PushbackReader(reader, MAX_TOTAL_CHUNK_SIZE);
     }
 
@@ -76,10 +83,18 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
         return this;
     }
 
+    /**
+     * Has this Chunker encountered an exception reading from the Reader.
+     */
     boolean hasException() {
         return ex != null;
     }
 
+    /**
+     * Get the exception encountered reading from the Reader.
+     *
+     * @return The exception, or null if no exception was encountered.
+     */
     public Exception getException() {
         return ex;
     }
@@ -115,24 +130,28 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
             throw new NoSuchElementException("There are no more chunks.");
         }
         //reset state for the next chunk
-        currentChunk = new StringBuilder();
-        currentWindow = new StringBuilder();
+
         chunkSizeBytes = 0;
-        baseChunkSizeChars = 0;
+        int baseChunkSizeChars = 0;
+        StringBuilder currentChunk = new StringBuilder();
+        StringBuilder currentWindow = new StringBuilder();
 
         try {
-            readBaseChunk();
-            baseChunkSizeChars = currentChunk.length();
-            readWindow();
+            currentChunk.append(readBaseChunk());
+            baseChunkSizeChars = currentChunk.length(); //save the base chunk length
+            currentWindow.append(readWindow());
             if (endOfReaderReached) {
                 /* if we have reached the end of the content,we won't make
                  * another overlapping chunk, so the length of the base chunk
-                 * can be extended                 * to the end. */
+                 * can be extended to the end. */
                 baseChunkSizeChars = currentChunk.length();
             } else {
+                /* otherwise we will make another chunk, so unread the window */
                 reader.unread(currentWindow.toString().toCharArray());
             }
-        } catch (IOException ioEx) {
+        } catch (Exception ioEx) {
+            /* Save the exception, which will cause hasNext() to return false,
+             * and break any chunking loop in client code. */
             ex = ioEx;
         }
         //add the window text to the current chunk.
@@ -142,34 +161,46 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
     }
 
     /**
-     * Read the base chunk from the reader, and attempt to break at whitespace.
+     * Read the base chunk from the reader, attempting to break at whitespace.
      *
      * @throws IOException if there is a problem reading from the reader.
      */
-    private void readBaseChunk() throws IOException {
+    private StringBuilder readBaseChunk() throws IOException {
+        StringBuilder currentChunk = new StringBuilder();
         //read the chunk until the minimum base chunk size
         readHelper(MINIMUM_BASE_CHUNK_SIZE, currentChunk);
 
         //keep reading until the maximum base chunk size or white space is reached.
         readToWhiteSpaceHelper(MAXIMUM_BASE_CHUNK_SIZE, currentChunk);
+        return currentChunk;
     }
 
     /**
-     * Read the window from the reader, and attempt to break at whitespace.
+     * Read the window from the reader, attempting to break at whitespace.
      *
      * @throws IOException if there is a problem reading from the reader.
      */
-    private void readWindow() throws IOException {
+    private StringBuilder readWindow() throws IOException {
+        StringBuilder currentWindow = new StringBuilder();
         //read the window, leaving some room to look for white space to break at.
         readHelper(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE, currentWindow);
 
         //keep reading until the max chunk size, or until whitespace is reached.
         readToWhiteSpaceHelper(MAX_TOTAL_CHUNK_SIZE, currentWindow);
+        return currentWindow;
     }
 
+    /**
+     * Read until the maxBytes reached, or end of reader.
+     *
+     * @param maxBytes
+     * @param currentSegment
+     *
+     * @throws IOException
+     */
     private void readHelper(int maxBytes, StringBuilder currentSegment) throws IOException {
-
-        //read chars up to maxBytes,  or we reach the end of the reader.
+        int charsRead = 0;
+        //read chars up to maxBytes, or the end of the reader.
         while ((chunkSizeBytes < maxBytes)
                 && (endOfReaderReached == false)) {
             charsRead = reader.read(tempChunkBuf, 0, READ_CHARS_BUFFER_SIZE);
@@ -178,24 +209,25 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
                 endOfReaderReached = true;
                 return;
             } else {
-                //if the last charcter might be part of a surroate pair, unread it.
+                //if the last char might be part of a surroate pair, unread it.
                 final char lastChar = tempChunkBuf[charsRead - 1];
-                String chunkSegment;
                 if (Character.isHighSurrogate(lastChar)) {
                     charsRead--;
-                    chunkSegment = new String(tempChunkBuf, 0, charsRead);
                     reader.unread(lastChar);
-                } else {
-                    chunkSegment = new String(tempChunkBuf, 0, charsRead);
                 }
 
-                //add read chars to the chunk and update the length.
+                String chunkSegment = new String(tempChunkBuf, 0, charsRead);
+
+                //get the length in bytes of the read chars
                 int segmentSize = chunkSegment.getBytes(StandardCharsets.UTF_8).length;
 
+                //if it will not put us past maxBytes
                 if (chunkSizeBytes + segmentSize < maxBytes) {
+                    //add it to the chunk
                     currentSegment.append(chunkSegment);
                     chunkSizeBytes += segmentSize;
                 } else {
+                    //unread it, and break out of read loop.
                     reader.unread(tempChunkBuf, 0, charsRead);
                     return;
                 }
@@ -203,7 +235,16 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
         }
     }
 
+    /**
+     * Read until the maxBytes reached, whitespace, or end of reader.
+     *
+     * @param maxBytes
+     * @param currentSegment
+     *
+     * @throws IOException
+     */
     private void readToWhiteSpaceHelper(int maxBytes, StringBuilder currentSegment) throws IOException {
+        int charsRead = 0;
         boolean whitespaceFound = false;
         //read 1 char at a time up to maxBytes, whitespaceFound, or we reach the end of the reader.
         while ((chunkSizeBytes < maxBytes)
@@ -221,22 +262,23 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
                 if (Character.isHighSurrogate(ch)) {
                     charsRead = reader.read(tempChunkBuf, 1, 1);
                     if (charsRead == -1) {
+                        //this is the last chunk, so include the unpaired surrogate
                         currentSegment.append(ch);
                         chunkSizeBytes += new Character(ch).toString().getBytes(StandardCharsets.UTF_8).length;
-                        //this is the last chunk
                         endOfReaderReached = true;
                         return;
                     } else {
+                        //use the surrogate pair in place of the unpaired surrogate.
                         chunkSegment = new String(tempChunkBuf, 0, 2);
                     }
                 } else {
+                    //one char
                     chunkSegment = new String(tempChunkBuf, 0, 1);
                 }
                 //check for whitespace.
                 whitespaceFound = Character.isWhitespace(chunkSegment.codePointAt(0));
                 //add read chars to the chunk and update the length.
                 currentSegment.append(chunkSegment);
-
                 chunkSizeBytes += chunkSegment.getBytes(StandardCharsets.UTF_8).length;
             }
         }
@@ -265,5 +307,4 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
             return chunksize;
         }
     }
-
 }