mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-16 09:47:42 +00:00
Merge in develop with overlapping chunks
This commit is contained in:
commit
c6adff9c59
@ -19,6 +19,8 @@
|
|||||||
package org.sleuthkit.autopsy.datamodel;
|
package org.sleuthkit.autopsy.datamodel;
|
||||||
|
|
||||||
import java.awt.event.ActionEvent;
|
import java.awt.event.ActionEvent;
|
||||||
|
import java.beans.PropertyChangeEvent;
|
||||||
|
import java.beans.PropertyChangeListener;
|
||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -27,6 +29,7 @@ import java.util.List;
|
|||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import javax.swing.AbstractAction;
|
import javax.swing.AbstractAction;
|
||||||
import javax.swing.Action;
|
import javax.swing.Action;
|
||||||
|
import org.openide.nodes.Children;
|
||||||
import org.openide.nodes.Sheet;
|
import org.openide.nodes.Sheet;
|
||||||
import org.openide.util.NbBundle;
|
import org.openide.util.NbBundle;
|
||||||
import org.openide.util.NbBundle.Messages;
|
import org.openide.util.NbBundle.Messages;
|
||||||
@ -35,11 +38,14 @@ import org.sleuthkit.autopsy.coreutils.Logger;
|
|||||||
import org.sleuthkit.autopsy.directorytree.ExplorerNodeActionVisitor;
|
import org.sleuthkit.autopsy.directorytree.ExplorerNodeActionVisitor;
|
||||||
import org.sleuthkit.autopsy.directorytree.FileSearchAction;
|
import org.sleuthkit.autopsy.directorytree.FileSearchAction;
|
||||||
import org.sleuthkit.autopsy.directorytree.NewWindowViewAction;
|
import org.sleuthkit.autopsy.directorytree.NewWindowViewAction;
|
||||||
|
import org.sleuthkit.autopsy.ingest.IngestManager;
|
||||||
|
import org.sleuthkit.autopsy.ingest.ModuleContentEvent;
|
||||||
import org.sleuthkit.autopsy.ingest.RunIngestModulesDialog;
|
import org.sleuthkit.autopsy.ingest.RunIngestModulesDialog;
|
||||||
import org.sleuthkit.datamodel.Content;
|
import org.sleuthkit.datamodel.Content;
|
||||||
import org.sleuthkit.datamodel.Image;
|
import org.sleuthkit.datamodel.Image;
|
||||||
import org.sleuthkit.datamodel.SleuthkitCase.CaseDbQuery;
|
import org.sleuthkit.datamodel.SleuthkitCase.CaseDbQuery;
|
||||||
import org.sleuthkit.datamodel.TskCoreException;
|
import org.sleuthkit.datamodel.TskCoreException;
|
||||||
|
import org.sleuthkit.datamodel.VirtualDirectory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class is used to represent the "Node" for the image. The children of
|
* This class is used to represent the "Node" for the image. The children of
|
||||||
@ -71,6 +77,16 @@ public class ImageNode extends AbstractContentNode<Image> {
|
|||||||
String imgName = nameForImage(img);
|
String imgName = nameForImage(img);
|
||||||
this.setDisplayName(imgName);
|
this.setDisplayName(imgName);
|
||||||
this.setIconBaseWithExtension("org/sleuthkit/autopsy/images/hard-drive-icon.jpg"); //NON-NLS
|
this.setIconBaseWithExtension("org/sleuthkit/autopsy/images/hard-drive-icon.jpg"); //NON-NLS
|
||||||
|
|
||||||
|
// Listen for ingest events so that we can detect new added files (e.g. carved)
|
||||||
|
IngestManager.getInstance().addIngestModuleEventListener(pcl);
|
||||||
|
// Listen for case events so that we can detect when case is closed
|
||||||
|
Case.addPropertyChangeListener(pcl);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void removeListeners() {
|
||||||
|
IngestManager.getInstance().removeIngestModuleEventListener(pcl);
|
||||||
|
Case.removePropertyChangeListener(pcl);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -199,4 +215,46 @@ public class ImageNode extends AbstractContentNode<Image> {
|
|||||||
public String getItemType() {
|
public String getItemType() {
|
||||||
return getClass().getName();
|
return getClass().getName();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final PropertyChangeListener pcl = (PropertyChangeEvent evt) -> {
|
||||||
|
String eventType = evt.getPropertyName();
|
||||||
|
|
||||||
|
// See if the new file is a child of ours
|
||||||
|
if (eventType.equals(IngestManager.IngestModuleEvent.CONTENT_CHANGED.toString())) {
|
||||||
|
if ((evt.getOldValue() instanceof ModuleContentEvent) == false) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ModuleContentEvent moduleContentEvent = (ModuleContentEvent) evt.getOldValue();
|
||||||
|
if ((moduleContentEvent.getSource() instanceof Content) == false) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Content newContent = (Content) moduleContentEvent.getSource();
|
||||||
|
|
||||||
|
try {
|
||||||
|
Content parent = newContent.getParent();
|
||||||
|
if (parent != null) {
|
||||||
|
// Is this a new carved file?
|
||||||
|
if (parent.getName().equals(VirtualDirectory.NAME_CARVED)) {
|
||||||
|
// Was this new carved file produced from this image?
|
||||||
|
if (parent.getParent().getId() == getContent().getId()) {
|
||||||
|
Children children = getChildren();
|
||||||
|
if (children != null) {
|
||||||
|
((ContentChildren) children).refreshChildren();
|
||||||
|
children.getNodesCount();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (TskCoreException ex) {
|
||||||
|
// Do nothing.
|
||||||
|
}
|
||||||
|
} else if (eventType.equals(Case.Events.CURRENT_CASE.toString())) {
|
||||||
|
if (evt.getNewValue() == null) {
|
||||||
|
// case was closed. Remove listeners so that we don't get called with a stale case handle
|
||||||
|
removeListeners();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -82,7 +82,13 @@ ConfirmationDialog.Exit=Exit
|
|||||||
ConfirmationDialog.DoNotExit=Do Not Exit
|
ConfirmationDialog.DoNotExit=Do Not Exit
|
||||||
ConfirmationDialog.ConfirmExit=All incomplete copy jobs will be cancelled. Are you sure?
|
ConfirmationDialog.ConfirmExit=All incomplete copy jobs will be cancelled. Are you sure?
|
||||||
ConfirmationDialog.ConfirmExitHeader=Confirm Exit
|
ConfirmationDialog.ConfirmExitHeader=Confirm Exit
|
||||||
|
OpenIDE-Module-Long-Description=\
|
||||||
|
This module contains features that are being developed by Basis Technology and are not part of the default Autopsy distribution. \
|
||||||
|
You can enable this module to use the new features. \
|
||||||
|
The features should be stable, but their exact behavior and API are subject to change. \n\n\
|
||||||
|
We make no guarantee that the API of this module will not change, so developers should be careful when relying on it.
|
||||||
OpenIDE-Module-Name=Experimental
|
OpenIDE-Module-Name=Experimental
|
||||||
|
OpenIDE-Module-Short-Description=This module contains features that are being developed by Basis Technology and are not part of the default Autopsy distribution.
|
||||||
ReviewModeCasePanel.bnRefresh.text=&Refresh
|
ReviewModeCasePanel.bnRefresh.text=&Refresh
|
||||||
ReviewModeCasePanel.bnOpen.text=&Open
|
ReviewModeCasePanel.bnOpen.text=&Open
|
||||||
ReviewModeCasePanel.rbGroupLabel.text=Show Last 10:
|
ReviewModeCasePanel.rbGroupLabel.text=Show Last 10:
|
||||||
|
@ -524,6 +524,7 @@
|
|||||||
<!-- file chunk-specific fields (optional for others) -->
|
<!-- file chunk-specific fields (optional for others) -->
|
||||||
<!-- for a parent file with no content, number of chunks are specified -->
|
<!-- for a parent file with no content, number of chunks are specified -->
|
||||||
<field name="num_chunks" type="int" indexed="true" stored="true" required="false" />
|
<field name="num_chunks" type="int" indexed="true" stored="true" required="false" />
|
||||||
|
<field name="chunk_size" type="int" indexed="true" stored="true" required="false" />
|
||||||
|
|
||||||
<!-- Common metadata fields, named specifically to match up with
|
<!-- Common metadata fields, named specifically to match up with
|
||||||
SolrCell metadata when parsing rich documents such as Word, PDF.
|
SolrCell metadata when parsing rich documents such as Word, PDF.
|
||||||
|
@ -19,13 +19,14 @@
|
|||||||
package org.sleuthkit.autopsy.keywordsearch;
|
package org.sleuthkit.autopsy.keywordsearch;
|
||||||
|
|
||||||
import com.google.common.base.Utf8;
|
import com.google.common.base.Utf8;
|
||||||
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
|
import javax.annotation.concurrent.NotThreadSafe;
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.openide.util.NbBundle;
|
import org.openide.util.NbBundle;
|
||||||
@ -56,6 +57,7 @@ class Ingester {
|
|||||||
private final Server solrServer = KeywordSearch.getServer();
|
private final Server solrServer = KeywordSearch.getServer();
|
||||||
private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
|
private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
|
||||||
private static Ingester instance;
|
private static Ingester instance;
|
||||||
|
private static final int SINGLE_READ_CHARS = 512;
|
||||||
|
|
||||||
private Ingester() {
|
private Ingester() {
|
||||||
}
|
}
|
||||||
@ -153,15 +155,15 @@ class Ingester {
|
|||||||
|
|
||||||
Map<String, String> fields = getContentFields(source);
|
Map<String, String> fields = getContentFields(source);
|
||||||
//Get a reader for the content of the given source
|
//Get a reader for the content of the given source
|
||||||
try (Reader reader = extractor.getReader(source);) {
|
try (BufferedReader reader = new BufferedReader(extractor.getReader(source));) {
|
||||||
Chunker chunker = new Chunker(reader);
|
Chunker chunker = new Chunker(reader);
|
||||||
|
|
||||||
for (Chunk chunk : chunker) {
|
for (Chunk chunk : chunker) {
|
||||||
String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
|
String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
|
||||||
fields.put(Server.Schema.ID.toString(), chunkId);
|
fields.put(Server.Schema.ID.toString(), chunkId);
|
||||||
|
fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
|
||||||
try {
|
try {
|
||||||
//add the chunk text to Solr index
|
//add the chunk text to Solr index
|
||||||
indexChunk(chunk.getText().toString(), sourceName, fields);
|
indexChunk(chunk.toString(), sourceName, fields);
|
||||||
numChunks++;
|
numChunks++;
|
||||||
} catch (Ingester.IngesterException ingEx) {
|
} catch (Ingester.IngesterException ingEx) {
|
||||||
extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
|
extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
|
||||||
@ -366,50 +368,61 @@ class Ingester {
|
|||||||
super(message);
|
super(message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
class Chunk {
|
|
||||||
private final StringBuilder sb;
|
|
||||||
private final int chunksize;
|
|
||||||
|
|
||||||
Chunk(StringBuilder sb, int chunksize) {
|
|
||||||
this.sb = sb;
|
|
||||||
this.chunksize = chunksize;
|
|
||||||
}
|
|
||||||
|
|
||||||
StringBuilder getText() {
|
|
||||||
return sb;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getSize() {
|
|
||||||
return chunksize;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encapsulates the content chunking algorithm in implementation of the Iterator
|
* Encapsulates the content chunking algorithm in an implementation of the
|
||||||
* interface.
|
* Iterator interface. Also implements Iterable so it can be used directly in a
|
||||||
|
* for loop. The base chunk is the part of the chunk before the overlapping
|
||||||
|
* window. The window will be included at the end of the current chunk as well
|
||||||
|
* as at the beginning of the next chunk.
|
||||||
*/
|
*/
|
||||||
|
@NotThreadSafe
|
||||||
class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
|
class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
|
||||||
|
|
||||||
private static final int INITIAL_CHUNK_SIZE = 32 * 1024; //bytes
|
//Chunking algorithm paramaters-------------------------------------//
|
||||||
private static final int SINGLE_READ_CHARS = 1024;
|
/** the maximum size of a chunk, including the window. */
|
||||||
|
private static final int MAX_TOTAL_CHUNK_SIZE = 32766; //bytes
|
||||||
|
/** the minimum to read before we start the process of looking for
|
||||||
|
* whitespace to break at and creating an overlapping window. */
|
||||||
|
private static final int MINIMUM_BASE_CHUNK_SIZE = 30 * 1024; //bytes
|
||||||
|
/** The maximum size of the chunk, before the overlapping window, even if we
|
||||||
|
* couldn't find whitespace to break at. */
|
||||||
|
private static final int MAXIMUM_BASE_CHUNK_SIZE = 31 * 1024; //bytes
|
||||||
|
/** The amount of text we will read through before we give up on finding
|
||||||
|
* whitespace to break the chunk/window at. */
|
||||||
|
private static final int WHITE_SPACE_BUFFER_SIZE = 512; //bytes
|
||||||
|
/** The number of characters to read in one go from the Reader. */
|
||||||
|
private static final int READ_CHARS_BUFFER_SIZE = 512; //chars
|
||||||
|
|
||||||
private int chunkSizeBytes = 0; // the size in bytes of chunk (so far)
|
////chunker state--------------------------------------------///
|
||||||
private int charsRead = 0; // number of chars read in the most recent read operation
|
/** The Reader that this chunk reads from, and divides into chunks. It must
|
||||||
private boolean whitespace = false;
|
* be a buffered reader to ensure that mark/reset are supported. */
|
||||||
private char[] tempChunkBuf;
|
private final BufferedReader reader;
|
||||||
private StringBuilder chunkText;
|
/** The local buffer of characters read from the Reader. */
|
||||||
private boolean endOfContent = false;
|
private final char[] tempChunkBuf = new char[READ_CHARS_BUFFER_SIZE];
|
||||||
private final Reader reader;
|
/** number of chars read in the most recent read operation. */
|
||||||
|
private int charsRead = 0;
|
||||||
|
|
||||||
|
/** The text of the current chunk (so far). */
|
||||||
|
private StringBuilder currentChunk;
|
||||||
|
/** the size in bytes of the chunk (so far). */
|
||||||
|
private int chunkSizeBytes = 0;
|
||||||
|
/** the size in chars of the (base) chunk (so far). */
|
||||||
|
private int baseChunkSizeChars;
|
||||||
|
|
||||||
|
/** has the chunker found whitespace to break on? */
|
||||||
|
private boolean whitespaceFound = false;
|
||||||
|
/** has the chunker reached the end of the Reader? If so, there are no more
|
||||||
|
* chunks, and the current chunk does not need a window. */
|
||||||
|
private boolean endOfReaderReached = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a Chunker that will chunk the content of the given Reader.
|
* Create a Chunker that will chunk the content of the given Reader.
|
||||||
*
|
*
|
||||||
* @param reader The content to chunk.
|
* @param reader The content to chunk.
|
||||||
*/
|
*/
|
||||||
Chunker(Reader reader) {
|
Chunker(BufferedReader reader) {
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -418,73 +431,9 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Are there any more chunks available from this chunker?
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* @return true if there are more chunks available.
|
|
||||||
*/
|
|
||||||
@Override
|
@Override
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
return endOfContent == false;
|
return endOfReaderReached == false;
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Chunk next() {
|
|
||||||
if (hasNext()) {
|
|
||||||
chunkText = new StringBuilder();
|
|
||||||
tempChunkBuf = new char[SINGLE_READ_CHARS];
|
|
||||||
chunkSizeBytes = 0;
|
|
||||||
//read chars up to initial chunk size
|
|
||||||
while (chunkSizeBytes < INITIAL_CHUNK_SIZE && endOfContent == false) {
|
|
||||||
try {
|
|
||||||
charsRead = reader.read(tempChunkBuf, 0, SINGLE_READ_CHARS);
|
|
||||||
} catch (IOException ex) {
|
|
||||||
throw new RuntimeException("IOException while attempting to read chunk.", ex);
|
|
||||||
}
|
|
||||||
if (-1 == charsRead) {
|
|
||||||
//this is the last chunk
|
|
||||||
endOfContent = true;
|
|
||||||
} else {
|
|
||||||
String chunkSegment = new String(tempChunkBuf, 0, charsRead);
|
|
||||||
chunkSizeBytes += Utf8.encodedLength(chunkSegment);
|
|
||||||
chunkText.append(chunkSegment);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
if (false == endOfContent) {
|
|
||||||
endOfContent = readChunkUntilWhiteSpace();
|
|
||||||
}
|
|
||||||
return new Chunk(sanitizeToUTF8(chunkText), chunkSizeBytes);
|
|
||||||
} else {
|
|
||||||
throw new NoSuchElementException("There are no more chunks.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private boolean readChunkUntilWhiteSpace() {
|
|
||||||
charsRead = 0;
|
|
||||||
whitespace = false;
|
|
||||||
//if we haven't reached the end of the file,
|
|
||||||
//try to read char-by-char until whitespace to not break words
|
|
||||||
while ((chunkSizeBytes < INITIAL_CHUNK_SIZE)
|
|
||||||
&& (false == whitespace)) {
|
|
||||||
try {
|
|
||||||
charsRead = reader.read(tempChunkBuf, 0, 1);
|
|
||||||
} catch (IOException ex) {
|
|
||||||
throw new RuntimeException("IOException while attempting to read chunk until whitespace.", ex);
|
|
||||||
}
|
|
||||||
if (-1 == charsRead) {
|
|
||||||
//this is the last chunk
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
whitespace = Character.isWhitespace(tempChunkBuf[0]);
|
|
||||||
String chunkSegment = new String(tempChunkBuf, 0, 1);
|
|
||||||
chunkSizeBytes += Utf8.encodedLength(chunkSegment);
|
|
||||||
chunkText.append(chunkSegment);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -498,14 +447,133 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
|
|||||||
*/
|
*/
|
||||||
private static StringBuilder sanitizeToUTF8(StringBuilder sb) {
|
private static StringBuilder sanitizeToUTF8(StringBuilder sb) {
|
||||||
final int length = sb.length();
|
final int length = sb.length();
|
||||||
|
|
||||||
// Sanitize by replacing non-UTF-8 characters with caret '^'
|
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
if (TextUtil.isValidSolrUTF8(sb.charAt(i)) == false) {
|
if (TextUtil.isValidSolrUTF8(sb.charAt(i)) == false) {
|
||||||
sb.replace(i, i + 1, "^");
|
sb.replace(i, i + 1, "^");
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return sb;
|
return sb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Chunk next() {
|
||||||
|
if (endOfReaderReached) {
|
||||||
|
throw new NoSuchElementException("There are no more chunks.");
|
||||||
|
}
|
||||||
|
//reset state for the next chunk
|
||||||
|
currentChunk = new StringBuilder();
|
||||||
|
chunkSizeBytes = 0;
|
||||||
|
baseChunkSizeChars = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
readBaseChunk();
|
||||||
|
baseChunkSizeChars = currentChunk.length();
|
||||||
|
reader.mark(2048); //mark the reader so we can rewind the reader here to begin the next chunk
|
||||||
|
readWindow();
|
||||||
|
} catch (IOException ioEx) {
|
||||||
|
throw new RuntimeException("IOException while reading chunk.", ioEx);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
reader.reset(); //reset the reader the so the next chunk can begin at the position marked above
|
||||||
|
} catch (IOException ex) {
|
||||||
|
throw new RuntimeException("IOException while resetting chunk reader.", ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (endOfReaderReached) {
|
||||||
|
/* if we have reached the end of the content,we won't make another
|
||||||
|
* overlapping chunk, so the base chunk can be extended to the end. */
|
||||||
|
baseChunkSizeChars = currentChunk.length();
|
||||||
|
}
|
||||||
|
//sanitize the text and return a Chunk object, that includes the base chunk length.
|
||||||
|
return new Chunk(sanitizeToUTF8(currentChunk), baseChunkSizeChars);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the base chunk from the reader, and attempt to break at whitespace.
|
||||||
|
*
|
||||||
|
* @throws IOException if there is a problem reading from the reader.
|
||||||
|
*/
|
||||||
|
private void readBaseChunk() throws IOException {
|
||||||
|
//read the chunk until the minimum base chunk size
|
||||||
|
readHelper(MINIMUM_BASE_CHUNK_SIZE, false);
|
||||||
|
//keep reading until the maximum base chunk size or white space is reached.
|
||||||
|
whitespaceFound = false;
|
||||||
|
readHelper(MAXIMUM_BASE_CHUNK_SIZE, true);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read the window from the reader, and attempt to break at whitespace.
|
||||||
|
*
|
||||||
|
* @throws IOException if there is a problem reading from the reader.
|
||||||
|
*/
|
||||||
|
private void readWindow() throws IOException {
|
||||||
|
//read the window, leaving some room to look for white space to break at.
|
||||||
|
int windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE, chunkSizeBytes + 1024);
|
||||||
|
readHelper(windowEnd, false);
|
||||||
|
whitespaceFound = false;
|
||||||
|
//keep reading until the max chunk size, or until whitespace is reached.
|
||||||
|
windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE, chunkSizeBytes + 1024);
|
||||||
|
readHelper(windowEnd, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Helper method that implements reading in a loop.
|
||||||
|
*
|
||||||
|
* @param maxBytes The max cummulative length of the content,in
|
||||||
|
* bytes, to read from the Reader. That is, when
|
||||||
|
* chunkSizeBytes >= maxBytes stop reading.
|
||||||
|
* @param inWhiteSpaceBuffer Should the current read stop once whitespace is
|
||||||
|
* found?
|
||||||
|
*
|
||||||
|
* @throws IOException If there is a problem reading from the Reader.
|
||||||
|
*/
|
||||||
|
private void readHelper(int maxBytes, boolean inWhiteSpaceBuffer) throws IOException {
|
||||||
|
//only read one character at a time if we are looking for whitespace.
|
||||||
|
final int readSize = inWhiteSpaceBuffer ? 1 : READ_CHARS_BUFFER_SIZE;
|
||||||
|
|
||||||
|
//read chars up to maxBytes, whitespaceFound if also inWhiteSpaceBuffer, or we reach the end of the reader.
|
||||||
|
while ((chunkSizeBytes < maxBytes)
|
||||||
|
&& (false == (inWhiteSpaceBuffer && whitespaceFound))
|
||||||
|
&& (endOfReaderReached == false)) {
|
||||||
|
charsRead = reader.read(tempChunkBuf, 0, readSize);
|
||||||
|
if (-1 == charsRead) {
|
||||||
|
//this is the last chunk
|
||||||
|
endOfReaderReached = true;
|
||||||
|
} else {
|
||||||
|
if (inWhiteSpaceBuffer) {
|
||||||
|
//chec for whitespace.
|
||||||
|
whitespaceFound = Character.isWhitespace(tempChunkBuf[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
//add read chars to the chunk and update the length.
|
||||||
|
String chunkSegment = new String(tempChunkBuf, 0, charsRead);
|
||||||
|
chunkSizeBytes += Utf8.encodedLength(chunkSegment);
|
||||||
|
currentChunk.append(chunkSegment);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents one chunk as the text in it and the length of the base chunk, in
|
||||||
|
* chars.
|
||||||
|
*/
|
||||||
|
class Chunk {
|
||||||
|
|
||||||
|
private final StringBuilder sb;
|
||||||
|
private final int chunksize;
|
||||||
|
|
||||||
|
Chunk(StringBuilder sb, int baseChunkLength) {
|
||||||
|
this.sb = sb;
|
||||||
|
this.chunksize = baseChunkLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
int getBaseChunkLength() {
|
||||||
|
return chunksize;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -157,6 +157,12 @@ public class Server {
|
|||||||
public String toString() {
|
public String toString() {
|
||||||
return "num_chunks"; //NON-NLS
|
return "num_chunks"; //NON-NLS
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
CHUNK_SIZE {
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "chunk_size"; //NON-NLS
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user