build out ArtifactExtractor

2025-07-17 18:17:43 +00:00 · 2016-12-13 00:02:03 +01:00 · 2016-12-13 00:02:03 +01:00 · 85af7c57b6
commit 85af7c57b6
parent 1a70a4e8b2
14 changed files with 503 additions and 267 deletions
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ArtifactExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ArtifactExtractor.java
@ -6,10 +6,51 @@
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.HashMap;
 import org.apache.commons.io.IOUtils;
 import org.apache.solr.common.util.ContentStream;
 import org.openide.util.Exceptions;
 import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.datamodel.ContentUtils;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.BlackboardAttribute;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.SleuthkitCase;
 import org.sleuthkit.datamodel.TskCoreException;
-public class ArtifactExtractor extends TextProvider<Void, BlackboardArtifact> {
+public class ArtifactExtractor extends TextExtractor<Void, BlackboardArtifact> {
    static Content getDataSource(BlackboardArtifact artifact) throws TskCoreException {
        Content dataSource;
        Case currentCase;
        try {
            currentCase = Case.getCurrentCase();
        } catch (IllegalStateException ignore) {
            // thorown by Case.getCurrentCase() if currentCase is null
            return null;
        }
        SleuthkitCase sleuthkitCase = currentCase.getSleuthkitCase();
        if (sleuthkitCase == null) {
            return null;
        }
        AbstractFile abstractFile = sleuthkitCase.getAbstractFileById(artifact.getObjectID());
        if (abstractFile != null) {
            dataSource = abstractFile.getDataSource();
        } else {
            dataSource = sleuthkitCase.getContentById(artifact.getObjectID());
        }
        if (dataSource == null) {
            return null;
        }
        return dataSource;
    }
    @Override
    boolean noExtractionOptionsAreEnabled() {
@ -27,13 +68,99 @@ public class ArtifactExtractor extends TextProvider<Void, BlackboardArtifact> {
    }
    @Override
-    InputStream getInputStream(BlackboardArtifact source) {
+    InputStream getInputStream(BlackboardArtifact artifact) {
-        throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
+
        // Concatenate the string values of all attributes into a single
        // "content" string to be indexed.
        StringBuilder artifactContents = new StringBuilder();
        Content dataSource;
        try {
            dataSource = getDataSource(artifact);
            if (dataSource == null) {
                return null;
            }
            for (BlackboardAttribute attribute : artifact.getAttributes()) {
                artifactContents.append(attribute.getAttributeType().getDisplayName());
                artifactContents.append(" : ");
                // This is ugly since it will need to updated any time a new
                // TSK_DATETIME_* attribute is added. A slightly less ugly
                // alternative would be to assume that all date time attributes
                // will have a name of the form "TSK_DATETIME*" and check
                // attribute.getAttributeTypeName().startsWith("TSK_DATETIME*".
                // The major problem with that approach is that it would require
                // a round trip to the database to get the type name string.
                // We have also discussed modifying BlackboardAttribute.getDisplayString()
                // to magically format datetime attributes but that is complicated by
                // the fact that BlackboardAttribute exists in Sleuthkit data model
                // while the utility to determine the timezone to use is in ContentUtils
                // in the Autopsy datamodel.
                if (attribute.getValueType() == BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.DATETIME) {
                    artifactContents.append(ContentUtils.getStringTime(attribute.getValueLong(), dataSource));
                } else {
                    artifactContents.append(attribute.getDisplayString());
                }
                artifactContents.append(System.lineSeparator());
            }
        } catch (TskCoreException ex) {
            Exceptions.printStackTrace(ex);
            return null;
        }
        if (artifactContents.length() == 0) {
            return null;
        }
        // To play by the rules of the existing text markup implementations,
        // we need to (a) index the artifact contents in a "chunk" and
        // (b) create a separate index entry for the base artifact.
        // We distinguish artifact content from file content by applying a
        // mask to the artifact id to make its value > 0x8000000000000000 (i.e. negative).
        // First, create an index entry for the base artifact.
        HashMap<String, String> solrFields = new HashMap<>();
        String documentId = Long.toString(artifact.getArtifactID());
        solrFields.put(Server.Schema.ID.toString(), documentId);
        // Set the IMAGE_ID field.
        solrFields.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
        // Next create the index entry for the document content.
        // The content gets added to a single chunk. We may need to add chunking
        // support later.
        long chunkId = 1;
        documentId += "_" + Long.toString(chunkId);
        solrFields.replace(Server.Schema.ID.toString(), documentId);
        return IOUtils.toInputStream(artifactContents);
    }
    @Override
    Reader getReader(InputStream stream, BlackboardArtifact source, Void appendix) throws Ingester.IngesterException {
-        throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
+        return new InputStreamReader(stream);
    }
    @Override
    long getID(BlackboardArtifact source) {
        return source.getArtifactID();
    }
    @Override
    ContentStream getContentStream(byte[] encodedBytes, int length, BlackboardArtifact source) {
        return new ByteArtifactStream(encodedBytes, length, source);
    }
    @Override
    ContentStream getNullStream(BlackboardArtifact source) {
        return new Ingester.NullArtifactStream(source);
    }
    @Override
    String getName(BlackboardArtifact source) {
        return source.getDisplayName();
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ByteArtifactStream.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ByteArtifactStream.java
@ -0,0 +1,100 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import org.apache.solr.common.util.ContentStream;
 import org.openide.util.NbBundle;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import static org.sleuthkit.autopsy.keywordsearch.Bundle.*;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 /**
 * Stream of bytes representing string with specified encoding to feed into Solr
 * as ContentStream
 */
 class ByteArtifactStream implements ContentStream {
    //input
    private final byte[] content; //extracted subcontent
    private long contentSize;
    private final BlackboardArtifact aContent; //origin
    private final InputStream stream;
    private static final Logger logger = Logger.getLogger(ByteContentStream.class.getName());
    public ByteArtifactStream(byte[] content, long contentSize, BlackboardArtifact aContent) {
        this.content = content;
        this.aContent = aContent;
        stream = new ByteArrayInputStream(content, 0, (int) contentSize);
    }
    public byte[] getByteContent() {
        return content;
    }
    public BlackboardArtifact getSourceContent() {
        return aContent;
    }
    @Override
    public String getContentType() {
        return "text/plain;charset=" + Server.DEFAULT_INDEXED_TEXT_CHARSET.name(); //NON-NLS
    }
    @Override
    public String getName() {
        return aContent.getDisplayName();
    }
    @Override
    public Reader getReader() throws IOException {
        return new InputStreamReader(stream);
    }
    @Override
    public Long getSize() {
        return contentSize;
    }
    @Override
    @NbBundle.Messages("ByteArtifactStream.getSrcInfo.text=Artifact:{0}")
    public String getSourceInfo() {
        return ByteArtifactStream_getSrcInfo_text(aContent.getArtifactID());
    }
    @Override
    public InputStream getStream() throws IOException {
        return stream;
    }
    @Override
    protected void finalize() throws Throwable {
        super.finalize();
        stream.close();
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileTextExtractor.java
@ -0,0 +1,124 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011-2016 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.util.Arrays;
 import java.util.List;
 import org.apache.solr.common.util.ContentStream;
 import org.sleuthkit.datamodel.AbstractFile;
 /**
 * Common methods for utilities that extract text and content and divide into
 * chunks
 */
 abstract class FileTextExtractor<AppendixProvider> extends TextExtractor<AppendixProvider, AbstractFile> {
    /**
     * Common options that can be used by some extractors
     */
    enum ExtractOptions {
        EXTRACT_UTF16, ///< extract UTF16 text, possible values Boolean.TRUE.toString(), Boolean.FALSE.toString()
        EXTRACT_UTF8, ///< extract UTF8 text, possible values Boolean.TRUE.toString(), Boolean.FALSE.toString()
    };
    static final List<String> BLOB_MIME_TYPES
            = Arrays.asList(
                    //ignore binary blob data, for which string extraction will be used
                    "application/octet-stream", //NON-NLS
                    "application/x-msdownload"); //NON-NLS
    /** generally text extractors should ignore archives and let unpacking
     * modules take care of them */
    static final List<String> ARCHIVE_MIME_TYPES
            = Arrays.asList(
                    //ignore unstructured binary and compressed data, for which string extraction or unzipper works better
                    "application/x-7z-compressed", //NON-NLS
                    "application/x-ace-compressed", //NON-NLS
                    "application/x-alz-compressed", //NON-NLS
                    "application/x-arj", //NON-NLS
                    "application/vnd.ms-cab-compressed", //NON-NLS
                    "application/x-cfs-compressed", //NON-NLS
                    "application/x-dgc-compressed", //NON-NLS
                    "application/x-apple-diskimage", //NON-NLS
                    "application/x-gca-compressed", //NON-NLS
                    "application/x-dar", //NON-NLS
                    "application/x-lzx", //NON-NLS
                    "application/x-lzh", //NON-NLS
                    "application/x-rar-compressed", //NON-NLS
                    "application/x-stuffit", //NON-NLS
                    "application/x-stuffitx", //NON-NLS
                    "application/x-gtar", //NON-NLS
                    "application/x-archive", //NON-NLS
                    "application/x-executable", //NON-NLS
                    "application/x-gzip", //NON-NLS
                    "application/zip", //NON-NLS
                    "application/x-zoo", //NON-NLS
                    "application/x-cpio", //NON-NLS
                    "application/x-shar", //NON-NLS
                    "application/x-tar", //NON-NLS
                    "application/x-bzip", //NON-NLS
                    "application/x-bzip2", //NON-NLS
                    "application/x-lzip", //NON-NLS
                    "application/x-lzma", //NON-NLS
                    "application/x-lzop", //NON-NLS
                    "application/x-z", //NON-NLS
                    "application/x-compress"); //NON-NLS
    /**
     * Determines if the extractor works only for specified types is
     * supportedTypes() or whether is a generic content extractor (such as
     * string extractor)
     *
     * @return
     */
    abstract boolean isContentTypeSpecific();
    /**
     * Determines if the file content is supported by the extractor if
     * isContentTypeSpecific() returns true.
     *
     * @param file           to test if its content should be supported
     * @param detectedFormat mime-type with detected format (such as text/plain)
     *                       or null if not detected
     *
     * @return true if the file content is supported, false otherwise
     */
    abstract boolean isSupported(AbstractFile file, String detectedFormat);
    @Override
    long getID(AbstractFile source) {
        return source.getId();
    }
    @Override
    ContentStream getContentStream(byte[] encodedBytes, int length, AbstractFile source) {
        return new ByteContentStream(encodedBytes, length, source);
    }
    @Override
    ContentStream getNullStream(AbstractFile source) {
        return new Ingester.NullContentStream(source);
    }
    @Override
    String getName(AbstractFile source) {
        return source.getName();
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HtmlTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HtmlTextExtractor.java
@ -39,7 +39,7 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
 * divided into chunks and indexed with Solr. If HTML extraction succeeds,
 * chunks are indexed with Solr.
 */
-class HtmlTextExtractor extends TextExtractor<Void> {
+class HtmlTextExtractor extends FileTextExtractor<Void> {
    private static final Logger logger = Logger.getLogger(HtmlTextExtractor.class.getName());
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
@ -36,14 +36,16 @@ import org.sleuthkit.autopsy.datamodel.ContentUtils;
 import org.sleuthkit.autopsy.ingest.IngestJobContext;
 import org.sleuthkit.datamodel.AbstractContent;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.ContentVisitor;
 import org.sleuthkit.datamodel.DerivedFile;
 import org.sleuthkit.datamodel.Directory;
 import org.sleuthkit.datamodel.File;
 import org.sleuthkit.datamodel.LayoutFile;
 import org.sleuthkit.datamodel.LocalFile;
 import org.sleuthkit.datamodel.SlackFile;
 import org.sleuthkit.datamodel.SleuthkitItemVisitor;
 import org.sleuthkit.datamodel.SleuthkitVisitableItem;
 import org.sleuthkit.datamodel.TskCoreException;
 /**
@ -99,6 +101,11 @@ class Ingester {
        indexContentStream(new NullContentStream(file), getContentFields(file), 0);
    }
    void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {
 //        indexContentStream(new NullContentStream(artifact), getContentFields(file), 0);
    }
    /**
     * Sends a TextExtractor to Solr to have its content extracted and added to
     * the index. commit() should be called once you're done ingesting files.
@ -117,6 +124,12 @@ class Ingester {
        indexContentStream(new NullContentStream(file), params, 0);
    }
    private void recordNumberOfChunks(BlackboardArtifact artifact, int numChunks) throws IngesterException {
        Map<String, String> params = getContentFields(artifact);
        params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
        indexContentStream(new NullArtifactStream(artifact), params, 0);
    }
    /**
     * Creates a field map from FsContent, that is later sent to Solr
     *
@ -124,19 +137,14 @@ class Ingester {
     *
     * @return the map
     */
-    Map<String, String> getContentFields(AbstractContent fsc) {
+    Map<String, String> getContentFields(SleuthkitVisitableItem fsc) {
        return fsc.accept(getContentFieldsV);
    }
    /**
     * Visitor used to create param list to send to SOLR index.
     */
-    static private class GetContentFieldsV extends ContentVisitor.Default<Map<String, String>> {
+    static private class GetContentFieldsV extends SleuthkitItemVisitor.Default<Map<String, String>> {
        @Override
        protected Map<String, String> defaultVisit(Content cntnt) {
            return new HashMap<>();
        }
        @Override
        public Map<String, String> visit(File f) {
@ -201,21 +209,46 @@ class Ingester {
            params.put(Server.Schema.FILE_NAME.toString(), af.getName());
            return params;
        }
        @Override
        public Map<String, String> visit(BlackboardArtifact artifact) {
            Map<String, String> params = new HashMap<>();
            params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
            try {
                Content dataSource = ArtifactExtractor.getDataSource(artifact);
                params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
            } catch (TskCoreException ex) {
                logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact {0}", artifact.getArtifactID()); //NON-NLS
                params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
            }
            return params;
        }
        @Override
        protected Map<String, String> defaultVisit(SleuthkitVisitableItem svi) {
            return new HashMap<>();
        }
    }
    private static final int MAX_EXTR_TEXT_CHARS = 512 * 1024; //chars
    private static final int SINGLE_READ_CHARS = 1024;
    private static final int EXTRA_CHARS = 128; //for whitespace
-    public <T> boolean indexText(TextExtractor<T> extractor, AbstractFile sourceFile, IngestJobContext context) throws Ingester.IngesterException {
+    public <A, T extends SleuthkitVisitableItem> boolean indexText(TextExtractor<A, T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
        int numChunks = 0; //unknown until chunking is done
        if (extractor.noExtractionOptionsAreEnabled()) {
            return true;
        }
-        T appendix = extractor.newAppendixProvider();
+        final long sourceID = extractor.getID(source);
-        try (final InputStream stream = extractor.getInputStream(sourceFile);
+        final String sourceName = extractor.getName(source);
-                Reader reader = extractor.getReader(stream, sourceFile, appendix);) {
+        Map<String, String> fields = getContentFields(source);
        A appendix = extractor.newAppendixProvider();
        try (final InputStream stream = extractor.getInputStream(source);
                Reader reader = extractor.getReader(stream, source, appendix);) {
            //we read max 1024 chars at time, this seems to max what this Reader would return
            char[] textChunkBuf = new char[MAX_EXTR_TEXT_CHARS];
@ -265,10 +298,10 @@ class Ingester {
                //encode to bytes as UTF-8 to index as byte stream
                byte[] encodedBytes = chunkString.getBytes(Server.DEFAULT_INDEXED_TEXT_CHARSET);
-                String chunkId = Server.getChunkIdString(sourceFile.getId(), numChunks + 1);
+
                String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
                try {
-                    ByteContentStream bcs = new ByteContentStream(encodedBytes, encodedBytes.length, sourceFile);
+                    ContentStream bcs = extractor.getContentStream(encodedBytes, encodedBytes.length, source);
                    Map<String, String> fields = getContentFields(sourceFile);
                    try {
                        indexContentStream(bcs, fields, encodedBytes.length);
                    } catch (Exception ex) {
@ -277,20 +310,21 @@ class Ingester {
                    numChunks++;
                } catch (Ingester.IngesterException ingEx) {
                    extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
-                            + sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);//NON-NLS
+                            + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
                    throw ingEx; //need to rethrow to signal error and move on
                }
            }
        } catch (IOException ex) {
-            extractor.logWarning("Unable to read content stream from " + sourceFile.getId() + ": " + sourceFile.getName(), ex);//NON-NLS
+            extractor.logWarning("Unable to read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
            return false;
        } catch (Exception ex) {
-            extractor.logWarning("Unexpected error, can't read content stream from " + sourceFile.getId() + ": " + sourceFile.getName(), ex);//NON-NLS
+            extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
            return false;
        } finally {
            //after all chunks, ingest the parent file without content itself, and store numChunks
-            recordNumberOfChunks(sourceFile, numChunks);
+            fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
            indexContentStream(extractor.getNullStream(source), fields, 0);
        }
        return true;
    }
@ -442,7 +476,7 @@ class Ingester {
    /**
     * ContentStream associated with FsContent, but forced with no content
     */
-    private static class NullContentStream implements ContentStream {
+    static class NullContentStream implements ContentStream {
        AbstractContent aContent;
@ -482,6 +516,50 @@ class Ingester {
        }
    }
    /**
     * ContentStream associated with Artifact, but forced with no content
     */
    static class NullArtifactStream implements ContentStream {
        BlackboardArtifact aContent;
        NullArtifactStream(BlackboardArtifact aContent) {
            this.aContent = aContent;
        }
        @Override
        public String getName() {
            return aContent.getDisplayName();
        }
        @NbBundle.Messages("Ingester.NullArtifactStream.getSrcInfo.text=File:{0})\n")
        @Override
        public String getSourceInfo() {
            return Bundle.Ingester_NullArtifactStream_getSrcInfo_text(aContent.getArtifactID());
        }
        @Override
        public String getContentType() {
            return null;
        }
        @Override
        public Long getSize() {
            return 0L;
        }
        @Override
        public InputStream getStream() throws IOException {
            return new ByteArrayInputStream(new byte[0]);
        }
        @Override
        public Reader getReader() throws IOException {
            throw new UnsupportedOperationException(
                    NbBundle.getMessage(this.getClass(), "Ingester.NullContentStream.getReader"));
        }
    }
    /**
     * Indicates that there was an error with the specific ingest operation, but
     * it's still okay to continue ingesting files.
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalLanguageSettingsPanel.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalLanguageSettingsPanel.java
@ -103,12 +103,12 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
    private void reloadScriptsCheckBoxes() {
        boolean utf16
-                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(TextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
+                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(FileTextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
        enableUTF16Checkbox.setSelected(utf16);
        boolean utf8
-                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(TextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
+                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(FileTextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
        enableUTF8Checkbox.setSelected(utf8);
        final List<SCRIPT> serviceScripts = KeywordSearchSettings.getStringExtractScripts();
@ -127,12 +127,12 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
        reloadScriptsCheckBoxes();
        boolean utf16
-                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(TextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
+                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(FileTextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
        enableUTF16Checkbox.setSelected(utf16);
        boolean utf8
-                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(TextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
+                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(FileTextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
        enableUTF8Checkbox.setSelected(utf8);
        final boolean extractEnabled = utf16 || utf8;
@ -257,9 +257,9 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
    @Override
    public void store() {
-        KeywordSearchSettings.setStringExtractOption(TextExtractor.ExtractOptions.EXTRACT_UTF8.toString(),
+        KeywordSearchSettings.setStringExtractOption(FileTextExtractor.ExtractOptions.EXTRACT_UTF8.toString(),
                Boolean.toString(enableUTF8Checkbox.isSelected()));
-        KeywordSearchSettings.setStringExtractOption(TextExtractor.ExtractOptions.EXTRACT_UTF16.toString(),
+        KeywordSearchSettings.setStringExtractOption(FileTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(),
                Boolean.toString(enableUTF16Checkbox.isSelected()));
        if (toUpdate != null) {
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
@ -89,7 +89,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
    //accessed read-only by searcher thread
    private boolean startedSearching = false;
-    private List<TextExtractor<?>> textExtractors;
+    private List<FileTextExtractor<?>> textExtractors;
    private StringsTextExtractor stringExtractor;
    private final KeywordSearchJobSettings settings;
    private boolean initialized = false;
@ -415,10 +415,10 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
         * @throws IngesterException exception thrown if indexing failed
         */
        private boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat) throws IngesterException {
-            TextExtractor extractor = null;
+            FileTextExtractor extractor = null;
            //go over available text extractors in order, and pick the first one (most specific one)
-            for (TextExtractor fe : textExtractors) {
+            for (FileTextExtractor fe : textExtractors) {
                if (fe.isSupported(aFile, detectedFormat)) {
                    extractor = fe;
                    break;
@ -514,7 +514,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
            // we skip archive formats that are opened by the archive module. 
            // @@@ We could have a check here to see if the archive module was enabled though...
-            if (TextExtractor.ARCHIVE_MIME_TYPES.contains(fileType)) {
+            if (FileTextExtractor.ARCHIVE_MIME_TYPES.contains(fileType)) {
                try {
                    if (context.fileIngestIsCancelled()) {
                        return;
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java
@ -101,8 +101,8 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
    }
    private void displayEncodings() {
-        String utf8 = KeywordSearchSettings.getStringExtractOption(TextExtractor.ExtractOptions.EXTRACT_UTF8.toString());
+        String utf8 = KeywordSearchSettings.getStringExtractOption(FileTextExtractor.ExtractOptions.EXTRACT_UTF8.toString());
-        String utf16 = KeywordSearchSettings.getStringExtractOption(TextExtractor.ExtractOptions.EXTRACT_UTF16.toString());
+        String utf16 = KeywordSearchSettings.getStringExtractOption(FileTextExtractor.ExtractOptions.EXTRACT_UTF16.toString());
        ArrayList<String> encodingsList = new ArrayList<>();
        if (utf8 == null || Boolean.parseBoolean(utf8)) {
            encodingsList.add("UTF8");
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java
@ -211,14 +211,14 @@ class KeywordSearchSettings {
            KeywordSearchSettings.setUpdateFrequency(UpdateFrequency.DEFAULT);
        }
        //setting default Extract UTF8
-        if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, TextExtractor.ExtractOptions.EXTRACT_UTF8.toString())) {
+        if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, FileTextExtractor.ExtractOptions.EXTRACT_UTF8.toString())) {
            logger.log(Level.INFO, "No configuration for UTF8 found, generating default..."); //NON-NLS
-            KeywordSearchSettings.setStringExtractOption(TextExtractor.ExtractOptions.EXTRACT_UTF8.toString(), Boolean.TRUE.toString());
+            KeywordSearchSettings.setStringExtractOption(FileTextExtractor.ExtractOptions.EXTRACT_UTF8.toString(), Boolean.TRUE.toString());
        }
        //setting default Extract UTF16
-        if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, TextExtractor.ExtractOptions.EXTRACT_UTF16.toString())) {
+        if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, FileTextExtractor.ExtractOptions.EXTRACT_UTF16.toString())) {
            logger.log(Level.INFO, "No configuration for UTF16 found, generating defaults..."); //NON-NLS
-            KeywordSearchSettings.setStringExtractOption(TextExtractor.ExtractOptions.EXTRACT_UTF16.toString(), Boolean.TRUE.toString());
+            KeywordSearchSettings.setStringExtractOption(FileTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(), Boolean.TRUE.toString());
        }
        //setting default Latin-1 Script
        if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_SCRIPTS, SCRIPT.LATIN_1.name())) {
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java
@ -20,22 +20,14 @@ package org.sleuthkit.autopsy.keywordsearch;
 import java.io.IOException;
 import java.net.InetAddress;
 import java.util.HashMap;
 import java.util.MissingResourceException;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.HttpSolrServer;
 import org.apache.solr.common.util.ContentStreamBase.StringStream;
 import org.openide.util.NbBundle;
 import org.openide.util.lookup.ServiceProvider;
 import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.datamodel.ContentUtils;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.BlackboardAttribute;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.SleuthkitCase;
 import org.sleuthkit.datamodel.TskCoreException;
 /**
@ -49,6 +41,8 @@ public class SolrSearchService implements KeywordSearchService {
    private static final String SERVER_REFUSED_CONNECTION = "server refused connection"; //NON-NLS
    private static final int IS_REACHABLE_TIMEOUT_MS = 1000;
    ArtifactExtractor extractor = new ArtifactExtractor();
    @Override
    public void indexArtifact(BlackboardArtifact artifact) throws TskCoreException {
        if (artifact == null) {
@ -57,109 +51,18 @@ public class SolrSearchService implements KeywordSearchService {
        // We only support artifact indexing for Autopsy versions that use
        // the negative range for artifact ids.
-        long artifactId = artifact.getArtifactID();
+        if (artifact.getArtifactID() > 0) {
        if (artifactId > 0) {
            return;
        }
        Case currentCase;
        try {
            currentCase = Case.getCurrentCase();
        } catch (IllegalStateException ignore) {
            // thorown by Case.getCurrentCase() if currentCase is null
            return;
        }
        SleuthkitCase sleuthkitCase = currentCase.getSleuthkitCase();
        if (sleuthkitCase == null) {
            return;
        }
        Content dataSource;
        AbstractFile abstractFile = sleuthkitCase.getAbstractFileById(artifact.getObjectID());
        if (abstractFile != null) {
            dataSource = abstractFile.getDataSource();
        } else {
            dataSource = sleuthkitCase.getContentById(artifact.getObjectID());
        }
        if (dataSource == null) {
            return;
        }
        // Concatenate the string values of all attributes into a single 
        // "content" string to be indexed.
        StringBuilder artifactContents = new StringBuilder();
        for (BlackboardAttribute attribute : artifact.getAttributes()) {
            artifactContents.append(attribute.getAttributeType().getDisplayName());
            artifactContents.append(" : ");
            // This is ugly since it will need to updated any time a new
            // TSK_DATETIME_* attribute is added. A slightly less ugly 
            // alternative would be to assume that all date time attributes
            // will have a name of the form "TSK_DATETIME*" and check
            // attribute.getAttributeTypeName().startsWith("TSK_DATETIME*".
            // The major problem with that approach is that it would require
            // a round trip to the database to get the type name string.
            // We have also discussed modifying BlackboardAttribute.getDisplayString()
            // to magically format datetime attributes but that is complicated by
            // the fact that BlackboardAttribute exists in Sleuthkit data model
            // while the utility to determine the timezone to use is in ContentUtils
            // in the Autopsy datamodel.
            if (attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME.getTypeID()
                    || attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID()
                    || attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_CREATED.getTypeID()
                    || attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_MODIFIED.getTypeID()
                    || attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_RCVD.getTypeID()
                    || attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_SENT.getTypeID()
                    || attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_START.getTypeID()
                    || attribute.getAttributeType().getTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_END.getTypeID()) {
                artifactContents.append(ContentUtils.getStringTime(attribute.getValueLong(), dataSource));
            } else {
                artifactContents.append(attribute.getDisplayString());
            }
            artifactContents.append(System.lineSeparator());
        }
        if (artifactContents.length() == 0) {
            return;
        }
        // To play by the rules of the existing text markup implementations,
        // we need to (a) index the artifact contents in a "chunk" and 
        // (b) create a separate index entry for the base artifact.
        // We distinguish artifact content from file content by applying a 
        // mask to the artifact id to make its value > 0x8000000000000000 (i.e. negative).
        // First, create an index entry for the base artifact.
        HashMap<String, String> solrFields = new HashMap<>();
        String documentId = Long.toString(artifactId);
        solrFields.put(Server.Schema.ID.toString(), documentId);
        // Set the IMAGE_ID field.
        solrFields.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
        try {
-            Ingester.getDefault().indexContentStream(new StringStream(""), solrFields, 0);
+            Ingester.getDefault().indexMetaDataOnly(artifact);
        } catch (Ingester.IngesterException ex) {
            throw new TskCoreException(ex.getCause().getMessage(), ex);
        }
        // Next create the index entry for the document content.
        // The content gets added to a single chunk. We may need to add chunking
        // support later.
        long chunkId = 1;
        documentId += "_" + Long.toString(chunkId);
        solrFields.replace(Server.Schema.ID.toString(), documentId);
        StringStream contentStream = new StringStream(artifactContents.toString());
        try {
-            Ingester.getDefault().indexContentStream(contentStream, solrFields, contentStream.getSize());
+            Ingester.getDefault().indexText(extractor, artifact);
        } catch (Ingester.IngesterException ex) {
            throw new TskCoreException(ex.getCause().getMessage(), ex);
        }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/StringsTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/StringsTextExtractor.java
@ -38,7 +38,7 @@ import org.sleuthkit.datamodel.TskException;
 * with the original source file) up to 1MB then and indexes chunks as text with
 * Solr.
 */
-class StringsTextExtractor extends TextExtractor<Void> {
+class StringsTextExtractor extends FileTextExtractor<Void> {
    private static final Logger logger = Logger.getLogger(StringsTextExtractor.class.getName());
    private static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L;
@ -94,8 +94,8 @@ class StringsTextExtractor extends TextExtractor<Void> {
    @Override
    boolean noExtractionOptionsAreEnabled() {
-        boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(TextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
+        boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(FileTextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
-        boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(TextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
+        boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(FileTextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
        return extractUTF8 == false && extractUTF16 == false;
    }
@ -120,8 +120,8 @@ class StringsTextExtractor extends TextExtractor<Void> {
     */
    @Override
    InputStream getInputStream(AbstractFile sourceFile) {
-        boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(TextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
+        boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(FileTextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
-        boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(TextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
+        boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(FileTextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
        //check which extract stream to use
        InputStream stringStream = extractScripts.size() == 1 && extractScripts.get(0).equals(SCRIPT.LATIN_1)
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextExtractor.java
@ -1,7 +1,7 @@
 /*
 * Autopsy Forensic Browser
 *
- * Copyright 2011-2016 Basis Technology Corp.
+ * Copyright 2011-16 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@ -18,89 +18,30 @@
 */
 package org.sleuthkit.autopsy.keywordsearch;
-import java.util.Arrays;
+import java.io.InputStream;
-import java.util.List;
+import java.io.Reader;
-import org.sleuthkit.datamodel.AbstractFile;
+import org.apache.solr.common.util.ContentStream;
 import org.sleuthkit.datamodel.SleuthkitVisitableItem;
-/**
+abstract class TextExtractor<AppendixProvider, TextSource extends SleuthkitVisitableItem> {
 * Common methods for utilities that extract text and content and divide into
 * chunks
 */
 abstract class TextExtractor<AppendixProvider> extends TextProvider<AppendixProvider, AbstractFile> {
-    /**
+    abstract boolean noExtractionOptionsAreEnabled();
     * Common options that can be used by some extractors
     */
    enum ExtractOptions {
-        EXTRACT_UTF16, ///< extract UTF16 text, possible values Boolean.TRUE.toString(), Boolean.FALSE.toString()
+    abstract void logWarning(final String msg, Exception ex);
        EXTRACT_UTF8, ///< extract UTF8 text, possible values Boolean.TRUE.toString(), Boolean.FALSE.toString()
    };
-    static final List<String> BLOB_MIME_TYPES
+    void appendDataToFinalChunk(StringBuilder sb, AppendixProvider dataProvider) {
-            = Arrays.asList(
+        //no-op
-                    //ignore binary blob data, for which string extraction will be used
+    }
                    "application/octet-stream", //NON-NLS
                    "application/x-msdownload"); //NON-NLS
-    /** generally text extractors should ignore archives and let unpacking
+    abstract AppendixProvider newAppendixProvider();
     * modules take care of them */
    static final List<String> ARCHIVE_MIME_TYPES
            = Arrays.asList(
                    //ignore unstructured binary and compressed data, for which string extraction or unzipper works better
                    "application/x-7z-compressed", //NON-NLS
                    "application/x-ace-compressed", //NON-NLS
                    "application/x-alz-compressed", //NON-NLS
                    "application/x-arj", //NON-NLS
                    "application/vnd.ms-cab-compressed", //NON-NLS
                    "application/x-cfs-compressed", //NON-NLS
                    "application/x-dgc-compressed", //NON-NLS
                    "application/x-apple-diskimage", //NON-NLS
                    "application/x-gca-compressed", //NON-NLS
                    "application/x-dar", //NON-NLS
                    "application/x-lzx", //NON-NLS
                    "application/x-lzh", //NON-NLS
                    "application/x-rar-compressed", //NON-NLS
                    "application/x-stuffit", //NON-NLS
                    "application/x-stuffitx", //NON-NLS
                    "application/x-gtar", //NON-NLS
                    "application/x-archive", //NON-NLS
                    "application/x-executable", //NON-NLS
                    "application/x-gzip", //NON-NLS
                    "application/zip", //NON-NLS
                    "application/x-zoo", //NON-NLS
                    "application/x-cpio", //NON-NLS
                    "application/x-shar", //NON-NLS
                    "application/x-tar", //NON-NLS
                    "application/x-bzip", //NON-NLS
                    "application/x-bzip2", //NON-NLS
                    "application/x-lzip", //NON-NLS
                    "application/x-lzma", //NON-NLS
                    "application/x-lzop", //NON-NLS
                    "application/x-z", //NON-NLS
                    "application/x-compress"); //NON-NLS
-    /**
+    abstract InputStream getInputStream(TextSource source);
     * Determines if the extractor works only for specified types is
     * supportedTypes() or whether is a generic content extractor (such as
     * string extractor)
     *
     * @return
     */
    abstract boolean isContentTypeSpecific();
    /**
     * Determines if the file content is supported by the extractor if
     * isContentTypeSpecific() returns true.
     *
     * @param file           to test if its content should be supported
     * @param detectedFormat mime-type with detected format (such as text/plain)
     *                       or null if not detected
     *
     * @return true if the file content is supported, false otherwise
     */
    abstract boolean isSupported(AbstractFile file, String detectedFormat);
    abstract Reader getReader(InputStream stream, TextSource source, AppendixProvider appendix) throws Ingester.IngesterException;
    abstract long getID(TextSource source);
    abstract ContentStream getContentStream(byte[] encodedBytes, int length, TextSource source);
    abstract String getName(TextSource source);
    abstract ContentStream getNullStream(TextSource source);
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextProvider.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextProvider.java
@ -1,39 +0,0 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011-16 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.InputStream;
 import java.io.Reader;
 abstract class TextProvider<AppendixProvider, TextSource> {
    abstract boolean noExtractionOptionsAreEnabled();
    abstract void logWarning(final String msg, Exception ex);
    void appendDataToFinalChunk(StringBuilder sb, AppendixProvider dataProvider) {
        //no-op
    }
    abstract AppendixProvider newAppendixProvider();
    abstract InputStream getInputStream(TextSource source);
    abstract Reader getReader(InputStream stream, TextSource source, AppendixProvider appendix) throws Ingester.IngesterException;
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java
@ -49,7 +49,7 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
 * parsers-supported content type.
 *
 */
-class TikaTextExtractor extends TextExtractor<Metadata> {
+class TikaTextExtractor extends FileTextExtractor<Metadata> {
    private static final Logger logger = Logger.getLogger(TikaTextExtractor.class.getName());
    private static final int MAX_EXTR_TEXT_CHARS = 512 * 1024;
@ -110,8 +110,8 @@ class TikaTextExtractor extends TextExtractor<Metadata> {
    @Override
    public boolean isSupported(AbstractFile file, String detectedFormat) {
        if (detectedFormat == null
-                || TextExtractor.BLOB_MIME_TYPES.contains(detectedFormat) //any binary unstructured blobs (string extraction will be used)
+                || FileTextExtractor.BLOB_MIME_TYPES.contains(detectedFormat) //any binary unstructured blobs (string extraction will be used)
-                || TextExtractor.ARCHIVE_MIME_TYPES.contains(detectedFormat)
+                || FileTextExtractor.ARCHIVE_MIME_TYPES.contains(detectedFormat)
                || (detectedFormat.startsWith("video/") && !detectedFormat.equals("video/x-flv")) //skip video other than flv (tika supports flv only) //NON-NLS
                || detectedFormat.equals("application/x-font-ttf")) {   // Tika currently has a bug in the ttf parser in fontbox; It will throw an out of memory exception//NON-NLS
@ -123,6 +123,7 @@ class TikaTextExtractor extends TextExtractor<Metadata> {
        return TIKA_SUPPORTED_TYPES.contains(detectedFormat);
    }
    @Override
    InputStream getInputStream(AbstractFile sourceFile1) {
        return new ReadContentInputStream(sourceFile1);
    }
@ -131,4 +132,5 @@ class TikaTextExtractor extends TextExtractor<Metadata> {
    boolean noExtractionOptionsAreEnabled() {
        return false;
    }
 }