mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 18:17:43 +00:00
Renamed TextExtractor public API
This commit is contained in:
parent
55f7003246
commit
1e1e166f55
@ -338,8 +338,8 @@
|
|||||||
<package>org.sleuthkit.autopsy.modules.vmextractor</package>
|
<package>org.sleuthkit.autopsy.modules.vmextractor</package>
|
||||||
<package>org.sleuthkit.autopsy.progress</package>
|
<package>org.sleuthkit.autopsy.progress</package>
|
||||||
<package>org.sleuthkit.autopsy.report</package>
|
<package>org.sleuthkit.autopsy.report</package>
|
||||||
<package>org.sleuthkit.autopsy.textreaders</package>
|
<package>org.sleuthkit.autopsy.textextractors</package>
|
||||||
<package>org.sleuthkit.autopsy.textreaders.textreaderconfigs</package>
|
<package>org.sleuthkit.autopsy.textextractors.textextractorconfigs</package>
|
||||||
<package>org.sleuthkit.autopsy.texttranslation</package>
|
<package>org.sleuthkit.autopsy.texttranslation</package>
|
||||||
<package>org.sleuthkit.datamodel</package>
|
<package>org.sleuthkit.datamodel</package>
|
||||||
</public-packages>
|
</public-packages>
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.textreaders;
|
package org.sleuthkit.autopsy.textextractors;
|
||||||
|
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
@ -83,7 +83,7 @@ class ArtifactTextExtractor extends TextExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isSupported(Content file, String detectedFormat) {
|
public boolean isSupported() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -16,7 +16,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.textreaders;
|
package org.sleuthkit.autopsy.textextractors;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
@ -32,6 +32,7 @@ import net.htmlparser.jericho.Source;
|
|||||||
import net.htmlparser.jericho.StartTag;
|
import net.htmlparser.jericho.StartTag;
|
||||||
import net.htmlparser.jericho.StartTagType;
|
import net.htmlparser.jericho.StartTagType;
|
||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
import org.sleuthkit.datamodel.Content;
|
import org.sleuthkit.datamodel.Content;
|
||||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||||
|
|
||||||
@ -42,7 +43,7 @@ final class HtmlTextExtractor extends TextExtractor {
|
|||||||
|
|
||||||
static final private Logger logger = Logger.getLogger(HtmlTextExtractor.class.getName());
|
static final private Logger logger = Logger.getLogger(HtmlTextExtractor.class.getName());
|
||||||
private final int MAX_SIZE;
|
private final int MAX_SIZE;
|
||||||
private final Content file;
|
private final AbstractFile file;
|
||||||
|
|
||||||
static final List<String> WEB_MIME_TYPES = Arrays.asList(
|
static final List<String> WEB_MIME_TYPES = Arrays.asList(
|
||||||
"application/javascript", //NON-NLS
|
"application/javascript", //NON-NLS
|
||||||
@ -62,7 +63,7 @@ final class HtmlTextExtractor extends TextExtractor {
|
|||||||
* Creates a default instance of the HtmlTextExtractor. Supported file size
|
* Creates a default instance of the HtmlTextExtractor. Supported file size
|
||||||
* is 50MB.
|
* is 50MB.
|
||||||
*/
|
*/
|
||||||
public HtmlTextExtractor(Content file) {
|
public HtmlTextExtractor(AbstractFile file) {
|
||||||
//Set default to be 50 MB.
|
//Set default to be 50 MB.
|
||||||
MAX_SIZE = 50_000_000;
|
MAX_SIZE = 50_000_000;
|
||||||
this.file = file;
|
this.file = file;
|
||||||
@ -77,10 +78,10 @@ final class HtmlTextExtractor extends TextExtractor {
|
|||||||
* @return flag indicating support
|
* @return flag indicating support
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean isSupported(Content content, String detectedFormat) {
|
public boolean isSupported() {
|
||||||
return detectedFormat != null
|
return file.getMIMEType() != null
|
||||||
&& WEB_MIME_TYPES.contains(detectedFormat)
|
&& WEB_MIME_TYPES.contains(file.getMIMEType())
|
||||||
&& content.getSize() <= MAX_SIZE;
|
&& file.getSize() <= MAX_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
@ -16,7 +16,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.textreaders;
|
package org.sleuthkit.autopsy.textextractors;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
@ -28,7 +28,6 @@ import org.sleuthkit.autopsy.coreutils.SQLiteTableReaderException;
|
|||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
import org.sleuthkit.autopsy.coreutils.SQLiteTableReader;
|
import org.sleuthkit.autopsy.coreutils.SQLiteTableReader;
|
||||||
import org.sleuthkit.datamodel.AbstractFile;
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
import org.sleuthkit.datamodel.Content;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts text from SQLite database files.
|
* Extracts text from SQLite database files.
|
||||||
@ -45,8 +44,8 @@ final class SqliteTextExtractor extends TextExtractor {
|
|||||||
private static final Logger logger = Logger.getLogger(SqliteTextExtractor.class.getName());
|
private static final Logger logger = Logger.getLogger(SqliteTextExtractor.class.getName());
|
||||||
private final AbstractFile file;
|
private final AbstractFile file;
|
||||||
|
|
||||||
public SqliteTextExtractor(Content file) {
|
public SqliteTextExtractor(AbstractFile file) {
|
||||||
this.file = (AbstractFile) file;
|
this.file = file;
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Supports only the sqlite mimetypes
|
* Supports only the sqlite mimetypes
|
||||||
@ -57,8 +56,8 @@ final class SqliteTextExtractor extends TextExtractor {
|
|||||||
* @return true if x-sqlite3
|
* @return true if x-sqlite3
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean isSupported(Content file, String detectedFormat) {
|
public boolean isSupported() {
|
||||||
return SQLITE_MIMETYPE.equals(detectedFormat);
|
return SQLITE_MIMETYPE.equals(file.getMIMEType());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
@ -16,7 +16,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.textreaders;
|
package org.sleuthkit.autopsy.textextractors;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
@ -28,7 +28,7 @@ import java.util.Objects;
|
|||||||
import org.openide.util.Lookup;
|
import org.openide.util.Lookup;
|
||||||
import org.sleuthkit.autopsy.coreutils.StringExtract;
|
import org.sleuthkit.autopsy.coreutils.StringExtract;
|
||||||
import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
|
import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
|
||||||
import org.sleuthkit.autopsy.textreaders.textreaderconfigs.StringsConfig;
|
import org.sleuthkit.autopsy.textextractors.textextractorconfigs.StringsConfig;
|
||||||
import org.sleuthkit.datamodel.Content;
|
import org.sleuthkit.datamodel.Content;
|
||||||
import org.sleuthkit.datamodel.TskCoreException;
|
import org.sleuthkit.datamodel.TskCoreException;
|
||||||
import org.sleuthkit.datamodel.TskException;
|
import org.sleuthkit.datamodel.TskException;
|
||||||
@ -36,7 +36,7 @@ import org.sleuthkit.datamodel.TskException;
|
|||||||
/**
|
/**
|
||||||
* Extracts raw strings from content.
|
* Extracts raw strings from content.
|
||||||
*/
|
*/
|
||||||
final class StringsTextExtractor {
|
final class StringsTextExtractor extends TextExtractor {
|
||||||
|
|
||||||
private boolean extractUTF8;
|
private boolean extractUTF8;
|
||||||
private boolean extractUTF16;
|
private boolean extractUTF16;
|
||||||
@ -81,6 +81,7 @@ final class StringsTextExtractor {
|
|||||||
* @throws
|
* @throws
|
||||||
* org.sleuthkit.autopsy.textextractors.TextExtractor.TextExtractorException
|
* org.sleuthkit.autopsy.textextractors.TextExtractor.TextExtractorException
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public InputStreamReader getReader() {
|
public InputStreamReader getReader() {
|
||||||
InputStream stringStream = getInputStream(content);
|
InputStream stringStream = getInputStream(content);
|
||||||
return new InputStreamReader(stringStream, Charset.forName(DEFAULT_INDEXED_TEXT_CHARSET));
|
return new InputStreamReader(stringStream, Charset.forName(DEFAULT_INDEXED_TEXT_CHARSET));
|
||||||
@ -104,6 +105,7 @@ final class StringsTextExtractor {
|
|||||||
*
|
*
|
||||||
* @param context Lookup instance containing config classes
|
* @param context Lookup instance containing config classes
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public void setExtractionSettings(Lookup context) {
|
public void setExtractionSettings(Lookup context) {
|
||||||
if (context != null) {
|
if (context != null) {
|
||||||
StringsConfig configInstance = context.lookup(StringsConfig.class);
|
StringsConfig configInstance = context.lookup(StringsConfig.class);
|
||||||
@ -126,14 +128,11 @@ final class StringsTextExtractor {
|
|||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public boolean isEnabled() {
|
@Override
|
||||||
|
public boolean isSupported() {
|
||||||
return extractUTF8 || extractUTF16;
|
return extractUTF8 || extractUTF16;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isSupported(Content file, String detectedFormat) {
|
|
||||||
throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Content input string stream reader/converter - given Content, extract
|
* Content input string stream reader/converter - given Content, extract
|
||||||
* strings from it and return encoded bytes via read()
|
* strings from it and return encoded bytes via read()
|
@ -16,38 +16,26 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.textreaders;
|
package org.sleuthkit.autopsy.textextractors;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import org.openide.util.Lookup;
|
import org.openide.util.Lookup;
|
||||||
import org.sleuthkit.datamodel.Content;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts the text out of Content instances and exposes them as a Reader.
|
* Extracts the text out of Content instances and exposes them as a Reader.
|
||||||
* Concrete implementations can be obtained from
|
* Concrete implementations can be obtained from
|
||||||
* {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory}
|
* {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory}
|
||||||
*/
|
*/
|
||||||
abstract class TextExtractor {
|
public abstract class TextExtractor {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines if the file content is supported by the extractor.
|
* Determines if this extractor supports the given Content and
|
||||||
|
* configurations passed into it in
|
||||||
|
* {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory}.
|
||||||
*
|
*
|
||||||
* @param file to test if its content should be supported
|
* @return true if content is supported, false otherwise
|
||||||
* @param detectedFormat mime-type with detected format (such as text/plain)
|
|
||||||
* or null if not detected
|
|
||||||
*
|
|
||||||
* @return true if the file content is supported, false otherwise
|
|
||||||
*/
|
*/
|
||||||
abstract boolean isSupported(Content file, String detectedFormat);
|
abstract boolean isSupported();
|
||||||
|
|
||||||
/**
|
|
||||||
* Determines if the TextExtractor instance is enabled to read content.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
boolean isEnabled() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get a Reader that will iterate over the text extracted from the Content
|
* Get a Reader that will iterate over the text extracted from the Content
|
||||||
@ -75,8 +63,7 @@ abstract class TextExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Exception encountered during
|
* Exception encountered during TextExtractor.getReader().
|
||||||
* {@link org.sleuthkit.autopsy.textextractors.TextExtractor#getReader()}.
|
|
||||||
* This indicates that there was an internal parsing error that occurred
|
* This indicates that there was an internal parsing error that occurred
|
||||||
* during the reading of Content text.
|
* during the reading of Content text.
|
||||||
*/
|
*/
|
160
Core/src/org/sleuthkit/autopsy/textextractors/TextExtractorFactory.java
Executable file
160
Core/src/org/sleuthkit/autopsy/textextractors/TextExtractorFactory.java
Executable file
@ -0,0 +1,160 @@
|
|||||||
|
/*
|
||||||
|
* Autopsy Forensic Browser
|
||||||
|
*
|
||||||
|
* Copyright 2018-2018 Basis Technology Corp.
|
||||||
|
* Contact: carrier <at> sleuthkit <dot> org
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.sleuthkit.autopsy.textextractors;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import org.openide.util.Lookup;
|
||||||
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
|
import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||||
|
import org.sleuthkit.datamodel.Content;
|
||||||
|
import org.sleuthkit.datamodel.Report;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for creating TextExtractors given a Content instance
|
||||||
|
*
|
||||||
|
* See {@link org.sleuthkit.autopsy.textextractors.textextractorconfigs} for
|
||||||
|
* available extractor configuration options.
|
||||||
|
*
|
||||||
|
* @see org.openide.util.Lookup
|
||||||
|
*/
|
||||||
|
public class TextExtractorFactory {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a TextExtractor containing the Content text. Configuration files
|
||||||
|
* can be added to the Lookup.
|
||||||
|
*
|
||||||
|
* See {@link org.sleuthkit.autopsy.textextractors.textextractorconfigs} for
|
||||||
|
* available extractor configuration options.
|
||||||
|
*
|
||||||
|
* @param content Content source that will be read from
|
||||||
|
* @param context Contains extraction configurations for certain file types
|
||||||
|
*
|
||||||
|
* @return TextExtractor containing file text
|
||||||
|
*
|
||||||
|
* @throws NoTextExtractorFound Encountered when there is no Reader found
|
||||||
|
* for the given content type or there was an
|
||||||
|
* error while creating the reader.
|
||||||
|
*
|
||||||
|
* @see org.openide.util.Lookup
|
||||||
|
*/
|
||||||
|
public static TextExtractor getExtractor(Content content, Lookup context) throws NoTextExtractorFound {
|
||||||
|
if (content instanceof AbstractFile) {
|
||||||
|
for (TextExtractor extractor : getFileExtractors((AbstractFile) content, context)) {
|
||||||
|
if (extractor.isSupported()) {
|
||||||
|
return extractor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (content instanceof BlackboardArtifact) {
|
||||||
|
TextExtractor artifactExtractor = new ArtifactTextExtractor((BlackboardArtifact) content);
|
||||||
|
artifactExtractor.setExtractionSettings(context);
|
||||||
|
return artifactExtractor;
|
||||||
|
} else if (content instanceof Report) {
|
||||||
|
TextExtractor reportExtractor = new TikaTextExtractor(content);
|
||||||
|
reportExtractor.setExtractionSettings(context);
|
||||||
|
return reportExtractor;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new NoTextExtractorFound(
|
||||||
|
String.format("Could not find a suitable reader for "
|
||||||
|
+ "content with name [%s] and id=[%d]. Try using "
|
||||||
|
+ "the strings extractor instead.",
|
||||||
|
content.getName(), content.getId())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes, orders, and returns all file extractors that can read
|
||||||
|
* AbstractFile instances.
|
||||||
|
*
|
||||||
|
* @param content AbstractFile content
|
||||||
|
* @param context Lookup containing extractor configurations
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private static List<TextExtractor> getFileExtractors(AbstractFile content, Lookup context) {
|
||||||
|
List<TextExtractor> fileExtractors = Arrays.asList(
|
||||||
|
new HtmlTextExtractor(content),
|
||||||
|
new SqliteTextExtractor(content),
|
||||||
|
new TikaTextExtractor(content));
|
||||||
|
|
||||||
|
fileExtractors.forEach((fileExtractor) -> {
|
||||||
|
fileExtractor.setExtractionSettings(context);
|
||||||
|
});
|
||||||
|
|
||||||
|
return fileExtractors;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a TextExtractor containing the Content text.
|
||||||
|
*
|
||||||
|
* @param content Content instance that will be read from
|
||||||
|
*
|
||||||
|
* @return TextExtractor containing file text
|
||||||
|
*
|
||||||
|
* @throws NoTextExtractorFound Encountered when there is no Reader was
|
||||||
|
* found for the given content type. Use
|
||||||
|
* getStringsExtractor(Content,Lookup) method
|
||||||
|
* instead.
|
||||||
|
*/
|
||||||
|
public static TextExtractor getExtractor(Content content) throws NoTextExtractorFound {
|
||||||
|
return TextExtractorFactory.getExtractor(content, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a TextExtractor containing the Content strings. This method
|
||||||
|
* supports all content types. This method should be used as a backup in the
|
||||||
|
* event that no reader was found using getExtractor(Content) or
|
||||||
|
* getExtractor(Content, Lookup).
|
||||||
|
*
|
||||||
|
* Configure this extractor with the StringsConfig in
|
||||||
|
* {@link org.sleuthkit.autopsy.textextractors.textextractorconfigs}
|
||||||
|
*
|
||||||
|
* @param content Content source to read from
|
||||||
|
* @param context Contains extraction configurations for certain file types
|
||||||
|
*
|
||||||
|
* @return TextExtractor containing file text
|
||||||
|
*
|
||||||
|
* @see org.openide.util.Lookup
|
||||||
|
*/
|
||||||
|
public static TextExtractor getStringsExtractor(Content content, Lookup context) {
|
||||||
|
StringsTextExtractor stringsInstance = new StringsTextExtractor(content);
|
||||||
|
stringsInstance.setExtractionSettings(context);
|
||||||
|
return stringsInstance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* System level exception for handling content types that have no specific
|
||||||
|
* strategy defined for extracting their text.
|
||||||
|
*/
|
||||||
|
public static class NoTextExtractorFound extends Exception {
|
||||||
|
|
||||||
|
public NoTextExtractorFound(String msg) {
|
||||||
|
super(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
public NoTextExtractorFound(Throwable ex) {
|
||||||
|
super(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
private NoTextExtractorFound(String msg, Throwable ex) {
|
||||||
|
super(msg, ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -16,7 +16,7 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.textreaders;
|
package org.sleuthkit.autopsy.textextractors;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
import com.google.common.io.CharSource;
|
import com.google.common.io.CharSource;
|
||||||
@ -61,7 +61,7 @@ import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
|
|||||||
import org.sleuthkit.autopsy.coreutils.ExecUtil;
|
import org.sleuthkit.autopsy.coreutils.ExecUtil;
|
||||||
import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;
|
import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;
|
||||||
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
|
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
|
||||||
import org.sleuthkit.autopsy.textreaders.textreaderconfigs.ImageConfig;
|
import org.sleuthkit.autopsy.textextractors.textextractorconfigs.ImageConfig;
|
||||||
import org.sleuthkit.autopsy.datamodel.ContentUtils;
|
import org.sleuthkit.autopsy.datamodel.ContentUtils;
|
||||||
import org.sleuthkit.datamodel.AbstractFile;
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
import org.sleuthkit.datamodel.Content;
|
import org.sleuthkit.datamodel.Content;
|
||||||
@ -318,7 +318,7 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wraps the creation of a TikaReader into a Future so that it can be
|
* Wraps the creation of a TikaReader into a Future so that it can be
|
||||||
* cancelled.
|
* cancelled.
|
||||||
@ -422,24 +422,27 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines if Tika is supported for this content type and mimetype.
|
* Determines if Tika is enabled for this content
|
||||||
*
|
|
||||||
* @param content Source content to read
|
|
||||||
* @param detectedFormat Mimetype of content
|
|
||||||
*
|
*
|
||||||
* @return Flag indicating support for reading content type
|
* @return Flag indicating support for reading content type
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean isSupported(Content content, String detectedFormat) {
|
public boolean isSupported() {
|
||||||
if (detectedFormat == null
|
if(!(content instanceof AbstractFile)) {
|
||||||
|| BINARY_MIME_TYPES.contains(detectedFormat) //any binary unstructured blobs (string extraction will be used)
|
return false;
|
||||||
|| ARCHIVE_MIME_TYPES.contains(detectedFormat)
|
}
|
||||||
|| (detectedFormat.startsWith("video/") && !detectedFormat.equals("video/x-flv")) //skip video other than flv (tika supports flv only) //NON-NLS
|
|
||||||
|| detectedFormat.equals(SQLITE_MIMETYPE) //Skip sqlite files, Tika cannot handle virtual tables and will fail with an exception. //NON-NLS
|
String detectedType = ((AbstractFile)content).getMIMEType();
|
||||||
|
if (detectedType == null
|
||||||
|
|| BINARY_MIME_TYPES.contains(detectedType) //any binary unstructured blobs (string extraction will be used)
|
||||||
|
|| ARCHIVE_MIME_TYPES.contains(detectedType)
|
||||||
|
|| (detectedType.startsWith("video/") && !detectedType.equals("video/x-flv")) //skip video other than flv (tika supports flv only) //NON-NLS
|
||||||
|
|| detectedType.equals(SQLITE_MIMETYPE) //Skip sqlite files, Tika cannot handle virtual tables and will fail with an exception. //NON-NLS
|
||||||
) {
|
) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return TIKA_SUPPORTED_TYPES.contains(detectedFormat);
|
|
||||||
|
return TIKA_SUPPORTED_TYPES.contains(detectedType);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
@ -16,11 +16,11 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.textreaders.textreaderconfigs;
|
package org.sleuthkit.autopsy.textextractors.textextractorconfigs;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows for configuration of OCR on image files. Readers that use ImageConfig
|
* Allows for configuration of OCR on image files. Extractors that use ImageConfig
|
||||||
* can be obtained through {@link org.sleuthkit.autopsy.textreaders.TextReaders}
|
* can be obtained through TextExtractoryFactory.getExtractor().
|
||||||
*
|
*
|
||||||
* @see org.openide.util.Lookup
|
* @see org.openide.util.Lookup
|
||||||
*/
|
*/
|
@ -16,20 +16,20 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.textreaders.textreaderconfigs;
|
package org.sleuthkit.autopsy.textextractors.textextractorconfigs;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
|
import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows for configuration of the Reader obtained from
|
* Allows for configuration of the TextExtractor obtained from
|
||||||
* {@link org.sleuthkit.autopsy.textreaders.TextReader#getStringsReader(Content, Lookup)}.
|
* TextExtractorFactory.getExtractor().
|
||||||
*
|
*
|
||||||
* The strings reader will read strings from the Content instance. This class
|
* The strings extractor will extract strings from the Content instance. This class
|
||||||
* allows for the configuration of the encoding and language scripts used during
|
* allows for the configuration of the encoding and language scripts used during
|
||||||
* reading.
|
* reading.
|
||||||
*
|
*
|
||||||
* @see org.sleuthkit.autopsy.textreaders.TextReaders
|
* @see org.sleuthkit.autopsy.textextractors.TextExtractorFactory
|
||||||
* @see
|
* @see
|
||||||
* org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT
|
* org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT
|
||||||
* @see org.openide.util.Lookup
|
* @see org.openide.util.Lookup
|
||||||
@ -77,8 +77,8 @@ public class StringsConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the type of language scripts that will be used during this
|
* Sets the type of language scripts that will be used during this reading.
|
||||||
* reading. See
|
* See
|
||||||
* {@link org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT}
|
* {@link org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT}
|
||||||
* for more information about available scripts.
|
* for more information about available scripts.
|
||||||
*
|
*
|
@ -1,152 +0,0 @@
|
|||||||
/*
|
|
||||||
* Autopsy Forensic Browser
|
|
||||||
*
|
|
||||||
* Copyright 2018-2018 Basis Technology Corp.
|
|
||||||
* Contact: carrier <at> sleuthkit <dot> org
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.sleuthkit.autopsy.textreaders;
|
|
||||||
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import org.openide.util.Lookup;
|
|
||||||
import org.sleuthkit.autopsy.textreaders.TextExtractor.ExtractionException;
|
|
||||||
import org.sleuthkit.datamodel.AbstractFile;
|
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
|
||||||
import org.sleuthkit.datamodel.Content;
|
|
||||||
import org.sleuthkit.datamodel.Report;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Factory for creating Readers given a Content instance
|
|
||||||
*
|
|
||||||
* See {@link org.sleuthkit.autopsy.textreaders.textreaderconfigs} for available
|
|
||||||
* Reader configuration options.
|
|
||||||
*
|
|
||||||
* @see org.openide.util.Lookup
|
|
||||||
*/
|
|
||||||
public class TextReaders {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a reader containing the Content text. Configuration files can be
|
|
||||||
* added to the Lookup.
|
|
||||||
*
|
|
||||||
* See {@link org.sleuthkit.autopsy.textreaders.textreaderconfigs} for
|
|
||||||
* available Reader configuration options.
|
|
||||||
*
|
|
||||||
* @param content Content source that will be read from
|
|
||||||
* @param context Contains extraction configurations for certain file types
|
|
||||||
*
|
|
||||||
* @return Reader containing file text
|
|
||||||
*
|
|
||||||
* @throws NoTextReaderFound Encountered when there is no Reader found for
|
|
||||||
* the given content type or there was an error
|
|
||||||
* while creating the reader.
|
|
||||||
*
|
|
||||||
* @see org.openide.util.Lookup
|
|
||||||
*/
|
|
||||||
public static Reader getReader(Content content,
|
|
||||||
Lookup context) throws NoTextReaderFound {
|
|
||||||
try {
|
|
||||||
if (content instanceof AbstractFile) {
|
|
||||||
String mimeType = ((AbstractFile) content).getMIMEType();
|
|
||||||
List<TextExtractor> extractors = Arrays.asList(
|
|
||||||
new HtmlTextExtractor(content),
|
|
||||||
new SqliteTextExtractor(content),
|
|
||||||
new TikaTextExtractor(content));
|
|
||||||
for (TextExtractor extractor : extractors) {
|
|
||||||
extractor.setExtractionSettings(context);
|
|
||||||
if (extractor.isEnabled() && extractor.isSupported(content, mimeType)) {
|
|
||||||
return extractor.getReader();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (content instanceof BlackboardArtifact) {
|
|
||||||
TextExtractor artifactExtractor = new ArtifactTextExtractor((BlackboardArtifact) content);
|
|
||||||
artifactExtractor.setExtractionSettings(context);
|
|
||||||
return artifactExtractor.getReader();
|
|
||||||
} else if (content instanceof Report) {
|
|
||||||
TextExtractor reportExtractor = new TikaTextExtractor(content);
|
|
||||||
reportExtractor.setExtractionSettings(context);
|
|
||||||
return reportExtractor.getReader();
|
|
||||||
}
|
|
||||||
} catch (ExtractionException ex) {
|
|
||||||
throw new NoTextReaderFound("Error while getting reader", ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new NoTextReaderFound(
|
|
||||||
String.format("Could not find a suitable reader for "
|
|
||||||
+ "content with name [%s] and id=[%d]. Try using "
|
|
||||||
+ "the default reader instead.",
|
|
||||||
content.getName(), content.getId())
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a reader containing the Content text.
|
|
||||||
*
|
|
||||||
* @param content Content instance that will be read from
|
|
||||||
*
|
|
||||||
* @return Reader containing file text
|
|
||||||
*
|
|
||||||
* @throws NoTextReaderFound Encountered when there is no Reader was found
|
|
||||||
* for the given content type. Use
|
|
||||||
* getStringsReader(Content,Lookup) method
|
|
||||||
* instead.
|
|
||||||
*/
|
|
||||||
public static Reader getReader(Content content)
|
|
||||||
throws NoTextReaderFound {
|
|
||||||
return TextReaders.getReader(content, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a Reader containing the Content strings. This method supports all
|
|
||||||
* content types. This method should be used as a backup in the event that
|
|
||||||
* no reader was found using getReader(Content) or getReader(Content,
|
|
||||||
* Lookup).
|
|
||||||
*
|
|
||||||
* Configure this reader with the StringsConfig in
|
|
||||||
* {@link org.sleuthkit.autopsy.textreaders.textreaderconfigs}
|
|
||||||
*
|
|
||||||
* @param content Content source to read from
|
|
||||||
* @param context Contains extraction configurations for certain file types
|
|
||||||
*
|
|
||||||
* @return Reader containing file text
|
|
||||||
*
|
|
||||||
* @see org.openide.util.Lookup
|
|
||||||
*/
|
|
||||||
public static Reader getStringsReader(Content content, Lookup context) {
|
|
||||||
StringsTextExtractor stringsInstance = new StringsTextExtractor(content);
|
|
||||||
stringsInstance.setExtractionSettings(context);
|
|
||||||
return stringsInstance.getReader();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* System level exception for handling content types that have no specific
|
|
||||||
* strategy defined for extracting their text.
|
|
||||||
*/
|
|
||||||
public static class NoTextReaderFound extends Exception {
|
|
||||||
|
|
||||||
public NoTextReaderFound(String msg) {
|
|
||||||
super(msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
public NoTextReaderFound(Throwable ex) {
|
|
||||||
super(ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
private NoTextReaderFound(String msg, Throwable ex) {
|
|
||||||
super(msg, ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -25,6 +25,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
|
import org.openide.util.Exceptions;
|
||||||
import org.openide.util.Lookup;
|
import org.openide.util.Lookup;
|
||||||
import org.openide.util.NbBundle;
|
import org.openide.util.NbBundle;
|
||||||
import org.openide.util.NbBundle.Messages;
|
import org.openide.util.NbBundle.Messages;
|
||||||
@ -44,9 +45,10 @@ import org.sleuthkit.autopsy.keywordsearch.TextFileExtractor.TextFileExtractorEx
|
|||||||
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
|
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
|
||||||
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
|
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
|
||||||
import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
|
import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
|
||||||
import org.sleuthkit.autopsy.textreaders.TextReaders;
|
import org.sleuthkit.autopsy.textextractors.TextExtractor;
|
||||||
import org.sleuthkit.autopsy.textreaders.textreaderconfigs.ImageConfig;
|
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
|
||||||
import org.sleuthkit.autopsy.textreaders.textreaderconfigs.StringsConfig;
|
import org.sleuthkit.autopsy.textextractors.textextractorconfigs.ImageConfig;
|
||||||
|
import org.sleuthkit.autopsy.textextractors.textextractorconfigs.StringsConfig;
|
||||||
import org.sleuthkit.datamodel.AbstractFile;
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
import org.sleuthkit.datamodel.TskData;
|
import org.sleuthkit.datamodel.TskData;
|
||||||
import org.sleuthkit.datamodel.TskData.FileKnown;
|
import org.sleuthkit.datamodel.TskData.FileKnown;
|
||||||
@ -480,10 +482,11 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
Lookup extractionContext = Lookups.fixed(imageConfig);
|
Lookup extractionContext = Lookups.fixed(imageConfig);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Reader specializedReader = TextReaders.getReader(aFile,extractionContext);
|
TextExtractor extractor = TextExtractorFactory.getExtractor(aFile,extractionContext);
|
||||||
|
Reader extractedTextReader = extractor.getReader();
|
||||||
//divide into chunks and index
|
//divide into chunks and index
|
||||||
return Ingester.getDefault().indexText(specializedReader,aFile.getId(),aFile.getName(), aFile, context);
|
return Ingester.getDefault().indexText(extractedTextReader,aFile.getId(),aFile.getName(), aFile, context);
|
||||||
} catch (TextReaders.NoTextReaderFound ex) {
|
} catch (TextExtractorFactory.NoTextExtractorFound | TextExtractor.ExtractionException ex) {
|
||||||
//No text extractor found... run the default instead
|
//No text extractor found... run the default instead
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -502,8 +505,9 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
if (context.fileIngestIsCancelled()) {
|
if (context.fileIngestIsCancelled()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
Reader stringsReader = TextReaders.getStringsReader(aFile, stringsExtractionContext);
|
TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(aFile, stringsExtractionContext);
|
||||||
if (Ingester.getDefault().indexText(stringsReader,aFile.getId(),aFile.getName(), aFile, KeywordSearchIngestModule.this.context)) {
|
Reader extractedTextReader = stringsExtractor.getReader();
|
||||||
|
if (Ingester.getDefault().indexText(extractedTextReader,aFile.getId(),aFile.getName(), aFile, KeywordSearchIngestModule.this.context)) {
|
||||||
putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED);
|
putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED);
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
@ -511,7 +515,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
|
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} catch (IngesterException ex) {
|
} catch (IngesterException | TextExtractor.ExtractionException ex) {
|
||||||
logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); //NON-NLS
|
logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); //NON-NLS
|
||||||
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
|
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
|
||||||
return false;
|
return false;
|
||||||
|
@ -46,7 +46,8 @@ import org.sleuthkit.autopsy.appservices.AutopsyService;
|
|||||||
import org.sleuthkit.autopsy.progress.ProgressIndicator;
|
import org.sleuthkit.autopsy.progress.ProgressIndicator;
|
||||||
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
|
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
|
||||||
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
|
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
|
||||||
import org.sleuthkit.autopsy.textreaders.TextReaders;
|
import org.sleuthkit.autopsy.textextractors.TextExtractor;
|
||||||
|
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||||
import org.sleuthkit.datamodel.Content;
|
import org.sleuthkit.datamodel.Content;
|
||||||
import org.sleuthkit.datamodel.TskCoreException;
|
import org.sleuthkit.datamodel.TskCoreException;
|
||||||
@ -114,22 +115,26 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
Reader blackboardReader = TextReaders.getReader(content, null);
|
TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(content, null);
|
||||||
|
Reader blackboardExtractedTextReader = blackboardExtractor.getReader();
|
||||||
String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
|
String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
|
||||||
ingester.indexMetaDataOnly(artifact, sourceName);
|
ingester.indexMetaDataOnly(artifact, sourceName);
|
||||||
ingester.indexText(blackboardReader, artifact.getArtifactID(), sourceName, content, null);
|
ingester.indexText(blackboardExtractedTextReader, artifact.getArtifactID(), sourceName, content, null);
|
||||||
} catch (Ingester.IngesterException | TextReaders.NoTextReaderFound ex) {
|
} catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.ExtractionException ex) {
|
||||||
throw new TskCoreException(ex.getCause().getMessage(), ex);
|
throw new TskCoreException(ex.getCause().getMessage(), ex);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
try {
|
try {
|
||||||
Reader contentReader = TextReaders.getReader(content, null);
|
TextExtractor contentExtractor = TextExtractorFactory.getExtractor(content, null);
|
||||||
ingester.indexText(contentReader, content.getId(), content.getName(), content, null);
|
Reader contentExtractedTextReader = contentExtractor.getReader();
|
||||||
} catch (TextReaders.NoTextReaderFound | Ingester.IngesterException ex) {
|
ingester.indexText(contentExtractedTextReader, content.getId(), content.getName(), content, null);
|
||||||
|
} catch (TextExtractorFactory.NoTextExtractorFound | Ingester.IngesterException | TextExtractor.ExtractionException ex) {
|
||||||
try {
|
try {
|
||||||
// Try the StringsTextExtractor if Tika extractions fails.
|
// Try the StringsTextExtractor if Tika extractions fails.
|
||||||
ingester.indexText(TextReaders.getStringsReader(content, null),content.getId(),content.getName(), content, null);
|
TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(content, null);
|
||||||
} catch (Ingester.IngesterException ex1) {
|
Reader stringsExtractedTextReader = stringsExtractor.getReader();
|
||||||
|
ingester.indexText(stringsExtractedTextReader,content.getId(),content.getName(), content, null);
|
||||||
|
} catch (Ingester.IngesterException | TextExtractor.ExtractionException ex1) {
|
||||||
throw new TskCoreException(ex.getCause().getMessage(), ex1);
|
throw new TskCoreException(ex.getCause().getMessage(), ex1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -443,10 +448,11 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
|
String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
|
||||||
Reader contentSpecificReader = TextReaders.getReader((Content) artifact, null);
|
TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor((Content) artifact, null);
|
||||||
|
Reader blackboardExtractedTextReader = blackboardExtractor.getReader();
|
||||||
ingester.indexMetaDataOnly(artifact, sourceName);
|
ingester.indexMetaDataOnly(artifact, sourceName);
|
||||||
ingester.indexText(contentSpecificReader, artifact.getId(), sourceName, artifact, null);
|
ingester.indexText(blackboardExtractedTextReader, artifact.getId(), sourceName, artifact, null);
|
||||||
} catch (Ingester.IngesterException | TextReaders.NoTextReaderFound ex) {
|
} catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.ExtractionException ex) {
|
||||||
throw new TskCoreException(ex.getCause().getMessage(), ex);
|
throw new TskCoreException(ex.getCause().getMessage(), ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user