mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-06 21:00:22 +00:00
More work
This commit is contained in:
parent
8494453a09
commit
e3da0cae14
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2011-2021 Basis Technology Corp.
|
||||
* Copyright 2011-2023 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -659,14 +659,8 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
|
||||
int totalPages = source.getNumberPages();
|
||||
updateTotalPagesDisplay(totalPages);
|
||||
|
||||
// ELTODO
|
||||
//if (totalPages < 2) {
|
||||
// enableNextPageControl(false);
|
||||
// enablePrevPageControl(false);
|
||||
//} else {
|
||||
enableNextPageControl(source.hasNextPage());
|
||||
enablePrevPageControl(source.hasPreviousPage());
|
||||
//}
|
||||
enableNextPageControl(source.hasNextPage());
|
||||
enablePrevPageControl(source.hasPreviousPage());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -32,46 +32,33 @@ import org.sleuthkit.autopsy.textextractors.TextExtractor;
|
||||
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
|
||||
import org.sleuthkit.datamodel.AbstractFile;
|
||||
|
||||
/** ELTODO
|
||||
* A "source" for the extracted abstractFile viewer that displays "raw" (not
|
||||
* highlighted) indexed text for a file or an artifact.
|
||||
/**
|
||||
* A "source" for abstractFile viewer that displays "raw" extracted text for a
|
||||
* file. Only supports file types for which there are text extractors. Uses
|
||||
* chunking algorithm used by KeywordSearchIngestModule. The readers used in
|
||||
* chunking don't have ability to go backwards or to fast forward to a specific
|
||||
* offset. Therefore there is no way to scroll pages back, or to determine how
|
||||
* many total pages there are.
|
||||
*/
|
||||
class ExtractedText implements IndexedText {
|
||||
|
||||
private int numPages = 0;
|
||||
private int currentPage = 0;
|
||||
private final AbstractFile abstractFile;
|
||||
private final long objectId;
|
||||
private Chunker chunker = null;
|
||||
private static final Logger logger = Logger.getLogger(ExtractedText.class.getName());
|
||||
|
||||
/**
|
||||
* Construct a new ExtractedText object for the given content and object id.
|
||||
* This constructor needs both a content object and an object id because the
|
||||
* ExtractedText implementation attempts to provide useful messages in the
|
||||
* text content viewer for (a) the case where a file has not been indexed
|
||||
* because known files are being skipped and (b) the case where the file
|
||||
* content has not yet been indexed.
|
||||
* Construct a new ExtractedText object for the given abstract file.
|
||||
*
|
||||
* @param file Abstract file.
|
||||
* @param objectId Either a file id or an artifact id.
|
||||
* @param file Abstract file.
|
||||
*/
|
||||
ExtractedText(AbstractFile file, long objectId) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException {
|
||||
ExtractedText(AbstractFile file) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException {
|
||||
this.abstractFile = file;
|
||||
this.objectId = objectId;
|
||||
this.numPages = -1; // We don't know how many pages there are until we reach end of the document
|
||||
initialize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the ID that this object is associated with -- to help with caching
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public long getObjectId() {
|
||||
return this.objectId;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCurrentPage() {
|
||||
return this.currentPage;
|
||||
@ -177,9 +164,6 @@ class ExtractedText implements IndexedText {
|
||||
return numPages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the internal values, such as pages
|
||||
*/
|
||||
private void initialize() throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException {
|
||||
TextExtractor extractor = TextExtractorFactory.getExtractor(abstractFile, null);
|
||||
|
||||
@ -194,7 +178,6 @@ class ExtractedText implements IndexedText {
|
||||
/**
|
||||
* Extract text from abstractFile
|
||||
*
|
||||
* @param node a node that has extracted abstractFile
|
||||
* @param currentPage currently used page
|
||||
*
|
||||
* @return the extracted text
|
||||
@ -209,7 +192,7 @@ class ExtractedText implements IndexedText {
|
||||
logger.log(Level.WARNING, "Error chunking content from " + abstractFile.getId() + ": " + abstractFile.getName(), chunker.getException());
|
||||
throw chunker.getException();
|
||||
}
|
||||
|
||||
|
||||
indexedText = chunk.toString();
|
||||
} else {
|
||||
return Bundle.IndexedText_errorMessage_errorGettingText();
|
||||
@ -229,9 +212,7 @@ class ExtractedText implements IndexedText {
|
||||
try {
|
||||
Map<String, String> metadata = extractor.getMetadata();
|
||||
if (!metadata.isEmpty()) {
|
||||
// Creating the metadata artifact here causes occasional problems
|
||||
// when indexing the text, so we save the metadata map to
|
||||
// use after this method is complete.
|
||||
// save the metadata map to use after this method is complete.
|
||||
extractedMetadata.putAll(metadata);
|
||||
}
|
||||
CharSource formattedMetadata = KeywordSearchIngestModule.getMetaDataCharSource(metadata);
|
||||
@ -249,9 +230,8 @@ class ExtractedText implements IndexedText {
|
||||
//Just send file text.
|
||||
finalReader = fileText;
|
||||
}
|
||||
//divide into chunks and index
|
||||
//divide into chunks
|
||||
return finalReader;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2011-2019 Basis Technology Corp.
|
||||
* Copyright 2011-2023 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -28,7 +28,6 @@ import java.util.List;
|
||||
import java.util.logging.Level;
|
||||
import org.apache.tika.mime.MimeTypes;
|
||||
import org.openide.nodes.Node;
|
||||
import org.openide.util.Exceptions;
|
||||
import org.openide.util.Lookup;
|
||||
import org.openide.util.NbBundle;
|
||||
import org.openide.util.lookup.ServiceProvider;
|
||||
@ -36,7 +35,6 @@ import org.sleuthkit.autopsy.casemodule.Case;
|
||||
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
|
||||
import org.sleuthkit.autopsy.corecomponentinterfaces.TextViewer;
|
||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
import org.sleuthkit.autopsy.ingest.IngestModule;
|
||||
import org.sleuthkit.autopsy.keywordsearch.AdHocSearchChildFactory.AdHocQueryResult;
|
||||
import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
|
||||
import org.sleuthkit.autopsy.textextractors.TextExtractor;
|
||||
@ -178,7 +176,7 @@ public class ExtractedTextViewer implements TextViewer {
|
||||
// see if it's a file type for which we can extract text
|
||||
if (ableToExtractTextFromFile(file)) {
|
||||
try {
|
||||
rawContentText = new ExtractedText(file, file.getId());
|
||||
rawContentText = new ExtractedText(file);
|
||||
sources.add(rawContentText);
|
||||
} catch (TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) {
|
||||
// do nothing
|
||||
@ -451,7 +449,9 @@ public class ExtractedTextViewer implements TextViewer {
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we can extract text for this file type.
|
||||
* Check if we can extract text for this file type using one of our text extractors.
|
||||
* NOTE: the logic in this method should be similar and based on the
|
||||
* logic of how KeywordSearchIngestModule decides which files to index.
|
||||
*
|
||||
* @param file Abstract File
|
||||
*
|
||||
@ -465,12 +465,6 @@ public class ExtractedTextViewer implements TextViewer {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract unicode strings from unallocated and unused blocks and carved
|
||||
* text files. The reason for performing string extraction on these is
|
||||
* because they all may contain multiple encodings which can cause text
|
||||
* to be missed by the more specialized text extractors.
|
||||
*/
|
||||
if ((fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
|
||||
|| fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))
|
||||
|| (fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED))) {
|
||||
@ -478,22 +472,10 @@ public class ExtractedTextViewer implements TextViewer {
|
||||
}
|
||||
|
||||
final long size = file.getSize();
|
||||
//if not to index content, or a dir, or 0 content, index meta data only
|
||||
|
||||
if (file.isDir() || size == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// ELTODO do we need to skip text files here? probably not.
|
||||
if (file.getNameExtension().equalsIgnoreCase("txt")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// ELTODO do we need to skip known files here? probably not.
|
||||
if (KeywordSearchSettings.getSkipKnown() && file.getKnown().equals(TskData.FileKnown.KNOWN)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
String mimeType = fileTypeDetector.getMIMEType(file).trim().toLowerCase();
|
||||
|
||||
if (KeywordSearchIngestModule.ARCHIVE_MIME_TYPES.contains(mimeType)) {
|
||||
@ -501,7 +483,7 @@ public class ExtractedTextViewer implements TextViewer {
|
||||
}
|
||||
|
||||
if (MimeTypes.OCTET_STREAM.equals(mimeType)) {
|
||||
// ELTODO return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -38,7 +38,6 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.logging.Level;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.tika.mime.MimeTypes;
|
||||
import org.openide.util.Exceptions;
|
||||
import org.openide.util.Lookup;
|
||||
import org.openide.util.NbBundle;
|
||||
import org.openide.util.NbBundle.Messages;
|
||||
|
Loading…
x
Reference in New Issue
Block a user