mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-12 16:06:15 +00:00
Cleaned up code and added comments
This commit is contained in:
parent
38049bc73c
commit
7669590455
@ -25,7 +25,6 @@ import java.util.Arrays;
|
|||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.TreeMap;
|
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import net.htmlparser.jericho.Attributes;
|
import net.htmlparser.jericho.Attributes;
|
||||||
import net.htmlparser.jericho.Config;
|
import net.htmlparser.jericho.Config;
|
||||||
@ -86,6 +85,12 @@ final class HtmlTextExtractor implements TextExtractor {
|
|||||||
&& file.getSize() <= MAX_SIZE;
|
&& file.getSize() <= MAX_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the metadata as a key -> value map. HTML metadata will include
|
||||||
|
* scripts, links, images, comments, and misc attributes.
|
||||||
|
*
|
||||||
|
* @return Map containing metadata key -> value pairs.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Map<String, String> getMetadata() {
|
public Map<String, String> getMetadata() {
|
||||||
Map<String, String> metadataMap = new HashMap<>();
|
Map<String, String> metadataMap = new HashMap<>();
|
||||||
|
@ -33,7 +33,6 @@ import java.util.HashMap;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.TreeMap;
|
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
@ -409,7 +408,7 @@ final class TikaTextExtractor implements TextExtractor {
|
|||||||
/**
|
/**
|
||||||
* Get the content metdata
|
* Get the content metdata
|
||||||
*
|
*
|
||||||
* @return Metadata name -> value
|
* @return Metadata as a name -> value map
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Map<String, String> getMetadata() {
|
public Map<String, String> getMetadata() {
|
||||||
|
@ -28,9 +28,6 @@ import java.util.Map;
|
|||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.apache.tika.metadata.Metadata;
|
|
||||||
import org.openide.util.Exceptions;
|
|
||||||
import org.openide.util.Lookup;
|
import org.openide.util.Lookup;
|
||||||
import org.openide.util.NbBundle;
|
import org.openide.util.NbBundle;
|
||||||
import org.openide.util.NbBundle.Messages;
|
import org.openide.util.NbBundle.Messages;
|
||||||
@ -477,13 +474,12 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
*
|
*
|
||||||
* @param aFile file to extract strings from, divide into chunks and
|
* @param aFile file to extract strings from, divide into chunks and
|
||||||
* index
|
* index
|
||||||
* @param detectedFormat mime-type detected, or null if none detected
|
|
||||||
*
|
*
|
||||||
* @return true if the file was text_ingested, false otherwise
|
* @return true if the file was text_ingested, false otherwise
|
||||||
*
|
*
|
||||||
* @throws IngesterException exception thrown if indexing failed
|
* @throws IngesterException exception thrown if indexing failed
|
||||||
*/
|
*/
|
||||||
private boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat) throws IngesterException {
|
private boolean extractTextAndIndex(AbstractFile aFile) throws IngesterException {
|
||||||
ImageConfig imageConfig = new ImageConfig();
|
ImageConfig imageConfig = new ImageConfig();
|
||||||
imageConfig.setOCREnabled(KeywordSearchSettings.getOcrOption());
|
imageConfig.setOCREnabled(KeywordSearchSettings.getOcrOption());
|
||||||
ProcessTerminator terminator = () -> context.fileIngestIsCancelled();
|
ProcessTerminator terminator = () -> context.fileIngestIsCancelled();
|
||||||
@ -497,8 +493,9 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
try {
|
try {
|
||||||
Map<String, String> metadata = extractor.getMetadata();
|
Map<String, String> metadata = extractor.getMetadata();
|
||||||
CharSource formattedMetadata = getMetaDataCharSource(metadata);
|
CharSource formattedMetadata = getMetaDataCharSource(metadata);
|
||||||
|
//Append the metadata to end of the file text
|
||||||
finalReader = CharSource.concat(new CharSource() {
|
finalReader = CharSource.concat(new CharSource() {
|
||||||
//Wrap the TikaReader into a CharSource for concatenation
|
//Wrap fileText reader for concatenation
|
||||||
@Override
|
@Override
|
||||||
public Reader openStream() throws IOException {
|
public Reader openStream() throws IOException {
|
||||||
return fileText;
|
return fileText;
|
||||||
@ -518,11 +515,11 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Format the
|
* Pretty print the text extractor metadata.
|
||||||
*
|
*
|
||||||
* @param metadata The Metadata to wrap as a CharSource
|
* @param metadata The Metadata map to wrap as a CharSource
|
||||||
*
|
*
|
||||||
* @return A CharSource for the given MetaData
|
* @return A CharSource for the given Metadata
|
||||||
*/
|
*/
|
||||||
private CharSource getMetaDataCharSource(Map<String, String> metadata) {
|
private CharSource getMetaDataCharSource(Map<String, String> metadata) {
|
||||||
return CharSource.wrap(new StringBuilder("\n\n------------------------------METADATA------------------------------\n\n")
|
return CharSource.wrap(new StringBuilder("\n\n------------------------------METADATA------------------------------\n\n")
|
||||||
@ -633,7 +630,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
extractStringsAndIndex(aFile);
|
extractStringsAndIndex(aFile);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!extractTextAndIndex(aFile, fileType)) {
|
if (!extractTextAndIndex(aFile)) {
|
||||||
// Text extractor not found for file. Extract string only.
|
// Text extractor not found for file. Extract string only.
|
||||||
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
|
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user