mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-16 09:47:42 +00:00
Merge pull request #2506 from esaunders/remove-hit-trimming
Remove hit trimming
This commit is contained in:
commit
405e4598a6
@ -66,7 +66,7 @@ ExtractedContentViewer.getTitle=Indexed Text
|
||||
ExtractedContentViewer.getSolrContent.knownFileMsg=<p style\=''font-style\:italic''>{0} is a known file (based on MD5 hash) and does not have text in the index.</p>
|
||||
ExtractedContentViewer.getSolrContent.noTxtYetMsg=<p style\=''font-style\:italic''>{0} does not have text in the index.<br/>It may have no text, not been analyzed yet, or keyword search was not enabled during ingest.</p>
|
||||
ExtractedContentViewer.getSolrContent.txtBodyItal=<span style\=''font-style\:italic''>{0}</span>
|
||||
HighlightedMatchesSource.getMarkup.noMatchMsg=<html><pre><span style\\\\\='background\\\\\:yellow'>Failed to retrieve content for keyword hit. <br />The keyword could have been in the file name. <br />Advance to another page if present, or to view the original text, choose File Text <br />in the drop down menu to the right...</span></pre></html>
|
||||
HighlightedMatchesSource.getMarkup.noMatchMsg=<span>Failed to retrieve indexed text for keyword hit. Advance to another page if present, or to view the original text, choose File Text in the drop down menu to the right. Alternatively, you may choose to extract file content and search for the hit using an external application (e.g. a text editor).</span>
|
||||
HighlightedMatchesSource.getMarkup.queryFailedMsg=<html><pre><span style\\\\\='background\\\\\:yellow'>Failed to retrieve keyword hit results. <br />Confirm that Autopsy can connect to the Solr server. <br /></span></pre></html>
|
||||
HighlightedMatchesSource.toString=Search Results
|
||||
Installer.reportPortError=Indexing server port {0} is not available. Check if your security software does not block {1} and consider changing {2} in {3} property file in the application user folder. Then try rebooting your system if another process was causing the conflict.
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2011-2016 Basis Technology Corp.
|
||||
* Copyright 2011-2017 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -80,12 +80,6 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
private boolean escaped;
|
||||
private String escapedQuery;
|
||||
|
||||
// These are the valid characters that can appear either before or after a
|
||||
// keyword hit. We use these characters to try to turn the hit into a
|
||||
// token that can be more readily matched when it comes to highlighting
|
||||
// against the Schema.TEXT field later.
|
||||
private static final String BOUNDARY_CHARS = "[\\s\\[\\]\\(\\)\\,\\\"\\\'\\!\\?\\.\\/\\:\\;\\=\\<\\>\\^\\{\\}]"; //NON-NLS
|
||||
|
||||
// Lucene regular expressions do not support the following Java predefined
|
||||
// and POSIX character classes. There are other valid Java character classes
|
||||
// that are not supported by Lucene but we do not check for all of them.
|
||||
@ -240,31 +234,17 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
|
||||
final Collection<Object> content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
|
||||
|
||||
// By default, we create keyword hits on whitespace or punctuation character boundaries.
|
||||
// Having a set of well defined boundary characters produces hits that can
|
||||
// subsequently be matched for highlighting against the tokens produced by
|
||||
// the standard tokenizer.
|
||||
// This behavior can be overridden by the user if they give us a search string
|
||||
// with .* at either the start and/or end of the string. This basically tells us find
|
||||
// all hits instead of the ones surrounded by one of our boundary characters.
|
||||
String keywordTokenRegex
|
||||
= // If the given search string starts with .*, we ignore our default
|
||||
// boundary prefix characters
|
||||
(queryStringContainsWildcardPrefix ? "" : "(^|" + BOUNDARY_CHARS + ")") //NON-NLS
|
||||
+ keywordString
|
||||
// If the given search string ends with .*, we ignore our default
|
||||
// boundary suffix characters
|
||||
+ (queryStringContainsWildcardSuffix ? "" : "($|" + BOUNDARY_CHARS + ")"); //NON-NLS
|
||||
|
||||
for (Object content_obj : content_str) {
|
||||
String content = (String) content_obj;
|
||||
Matcher hitMatcher = Pattern.compile(keywordTokenRegex).matcher(content);
|
||||
Matcher hitMatcher = Pattern.compile(keywordString).matcher(content);
|
||||
int offset = 0;
|
||||
|
||||
while (hitMatcher.find(offset)) {
|
||||
StringBuilder snippet = new StringBuilder();
|
||||
|
||||
//"parent" entries in the index don't have chunk size, so just accept those hits
|
||||
// If the location of the hit is beyond this chunk (i.e. it
|
||||
// exists in the overlap region), we skip the hit. It will
|
||||
// show up again as a hit in the chunk following this one.
|
||||
if (chunkSize != null && hitMatcher.start() >= chunkSize) {
|
||||
break;
|
||||
}
|
||||
@ -277,14 +257,6 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
// input where they were separated by a single boundary character.
|
||||
offset = hitMatcher.end() - 1;
|
||||
|
||||
// Remove any remaining leading and trailing boundary characters.
|
||||
if (!queryStringContainsWildcardPrefix) {
|
||||
hit = hit.replaceAll("^" + BOUNDARY_CHARS, ""); //NON-NLS
|
||||
}
|
||||
if (!queryStringContainsWildcardSuffix) {
|
||||
hit = hit.replaceAll(BOUNDARY_CHARS + "$", ""); //NON-NLS
|
||||
}
|
||||
|
||||
/*
|
||||
* If searching for credit card account numbers, do a Luhn check
|
||||
* on the term and discard it if it does not pass.
|
||||
|
Loading…
x
Reference in New Issue
Block a user