From 31049106485ff45f3363ed80b390cef728b39fb3 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Sat, 28 Nov 2020 22:32:01 -0500 Subject: [PATCH 1/5] Added sanitization of lowercased strings --- .../src/org/sleuthkit/autopsy/keywordsearch/Chunker.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java index fd0f11ab74..c5045ddca4 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java @@ -288,7 +288,7 @@ class Chunker implements Iterator, Iterable { // lower case the string and get it's size. NOTE: lower casing can // change the size of the string! - String lowerCasedSegment = chunkSegment.toString().toLowerCase(); + String lowerCasedSegment = sanitize(chunkSegment.toString().toLowerCase()).toString(); int lowerCasedSegmentSize = lowerCasedSegment.getBytes(UTF_8).length; //if it will not put us past maxBytes @@ -357,7 +357,7 @@ class Chunker implements Iterator, Iterable { // lower case the string and get it's size. NOTE: lower casing can // change the size of the string. - String lowerCasedSegment = sanitizedChunkSegment.toString().toLowerCase(); + String lowerCasedSegment = sanitize(sanitizedChunkSegment.toString().toLowerCase()).toString(); int lowerCasedSegmentSize = lowerCasedSegment.getBytes(UTF_8).length; //if it will not put us past maxBytes From d1f7e97344f5472792d0234f6692bd22daa85596 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Fri, 4 Dec 2020 23:39:43 -0500 Subject: [PATCH 2/5] Sanitizing all strings --- .../src/org/sleuthkit/autopsy/keywordsearch/Chunker.java | 7 ++++--- .../src/org/sleuthkit/autopsy/keywordsearch/Ingester.java | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java index c5045ddca4..8f6fbd905f 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java @@ -184,7 +184,8 @@ class Chunker implements Iterator, Iterable { return new StringBuilder(UTF_16.decode(UTF_16.encode(s))); } - private static StringBuilder sanitize(String s) { + // ELTODO + static StringBuilder sanitize(String s) { String normStr = Normalizer.normalize(s, Normalizer.Form.NFKC); return sanitizeToUTF8(replaceInvalidUTF16(normStr)); } @@ -288,7 +289,7 @@ class Chunker implements Iterator, Iterable { // lower case the string and get it's size. NOTE: lower casing can // change the size of the string! - String lowerCasedSegment = sanitize(chunkSegment.toString().toLowerCase()).toString(); + String lowerCasedSegment = chunkSegment.toString().toLowerCase(); int lowerCasedSegmentSize = lowerCasedSegment.getBytes(UTF_8).length; //if it will not put us past maxBytes @@ -357,7 +358,7 @@ class Chunker implements Iterator, Iterable { // lower case the string and get it's size. NOTE: lower casing can // change the size of the string. - String lowerCasedSegment = sanitize(sanitizedChunkSegment.toString().toLowerCase()).toString(); + String lowerCasedSegment = sanitizedChunkSegment.toString().toLowerCase(); int lowerCasedSegmentSize = lowerCasedSegment.getBytes(UTF_8).length; //if it will not put us past maxBytes diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java index 576b65d581..cea68452e1 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java @@ -232,7 +232,7 @@ class Ingester { //Make a SolrInputDocument out of the field map SolrInputDocument updateDoc = new SolrInputDocument(); for (String key : fields.keySet()) { - updateDoc.addField(key, fields.get(key)); + updateDoc.addField(key, Chunker.sanitize((String)fields.get(key)).toString()); } try { From 34462c0f91a0561bff13771fcb5e1a5657d3f084 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Mon, 7 Dec 2020 17:18:04 -0500 Subject: [PATCH 3/5] Sanitizing language strings --- .../org/sleuthkit/autopsy/keywordsearch/Chunker.java | 8 +++++++- .../LanguageSpecificContentIndexingHelper.java | 10 +++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java index 8f6fbd905f..2deb82d2f5 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Chunker.java @@ -184,7 +184,13 @@ class Chunker implements Iterator, Iterable { return new StringBuilder(UTF_16.decode(UTF_16.encode(s))); } - // ELTODO + /** + * Wrapper method that performs UTF-8 string sanitization. + * + * @param s String to be sanitized. + * + * @return Sanitized string. + */ static StringBuilder sanitize(String s) { String normStr = Normalizer.normalize(s, Normalizer.Form.NFKC); return sanitizeToUTF8(replaceInvalidUTF16(normStr)); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java index d0988c83f3..4b20216aca 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java @@ -54,7 +54,7 @@ class LanguageSpecificContentIndexingHelper { // index the chunk to a language specific field fields.put(Server.Schema.CONTENT_JA.toString(), values); - fields.put(Server.Schema.LANGUAGE.toString(), language.getValue()); + fields.put(Server.Schema.LANGUAGE.toString(), Chunker.sanitize(language.getValue()).toString()); } void indexMiniChunk(Chunker.Chunk chunk, String sourceName, Map fields, String baseChunkID, Language language) @@ -62,15 +62,15 @@ class LanguageSpecificContentIndexingHelper { //Make a SolrInputDocument out of the field map SolrInputDocument updateDoc = new SolrInputDocument(); for (String key : fields.keySet()) { - updateDoc.addField(key, fields.get(key)); + updateDoc.addField(key, Chunker.sanitize((String)fields.get(key)).toString()); } try { - updateDoc.setField(Server.Schema.ID.toString(), MiniChunkHelper.getChunkIdString(baseChunkID)); + updateDoc.setField(Server.Schema.ID.toString(), Chunker.sanitize(MiniChunkHelper.getChunkIdString(baseChunkID)).toString()); // index the chunk to a language specific field - updateDoc.addField(Server.Schema.CONTENT_JA.toString(), chunk.toString().substring(chunk.getBaseChunkLength())); - updateDoc.addField(Server.Schema.LANGUAGE.toString(), language.getValue()); + updateDoc.addField(Server.Schema.CONTENT_JA.toString(), Chunker.sanitize(chunk.toString().substring(chunk.getBaseChunkLength())).toString()); + updateDoc.addField(Server.Schema.LANGUAGE.toString(), Chunker.sanitize(language.getValue()).toString()); TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk"); From edc5a799d5a862948d6fffc82366d707136f2a95 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Mon, 7 Dec 2020 17:21:07 -0500 Subject: [PATCH 4/5] Sanitizing language strings --- .../keywordsearch/LanguageSpecificContentIndexingHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java index 4b20216aca..38fcfd429e 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java @@ -49,7 +49,7 @@ class LanguageSpecificContentIndexingHelper { List values = new ArrayList<>(); values.add(chunk.toString()); if (fields.containsKey(Server.Schema.FILE_NAME.toString())) { - values.add(fields.get(Server.Schema.FILE_NAME.toString()).toString()); + values.add(Chunker.sanitize(fields.get(Server.Schema.FILE_NAME.toString()).toString()).toString()); } // index the chunk to a language specific field From 051f435dbe2bf74d24bbb50dd4b5df1e9f099fca Mon Sep 17 00:00:00 2001 From: Kelly Kelly Date: Wed, 16 Dec 2020 13:04:54 -0500 Subject: [PATCH 5/5] Fixed auto ingest warning message --- .../autopsy/experimental/configuration/Bundle.properties | 2 +- .../autopsy/experimental/configuration/Bundle.properties-MERGED | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/Bundle.properties b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/Bundle.properties index 4e5ec81f78..bf4384ff93 100644 --- a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/Bundle.properties +++ b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/Bundle.properties @@ -35,7 +35,7 @@ AutoIngestSettingsPanel.ImageDirectoryUnspecified=Shared images folder must be s AutoIngestSettingsPanel.InvalidPortNumber=Invalid port number. AutoIngestSettingsPanel.jRadioButtonCopyFiles.text=File Copy mode AutoIngestSettingsPanel.KeywordSearchNull=Cannot find Keyword Search service -AutoIngestSettingsPanel.MustRestart=Autopsy must be restarted for new configuration to take effect +AutoIngestSettingsPanel.MustRestart=Application must be restarted for new configuration to take effect AutoIngestSettingsPanel.nodePanel.TabConstraints.tabTitle=Node Configuration AutoIngestSettingsPanel.NodeStatusLogging.text=Node Status Logging Settings AutoIngestSettingsPanel.restartRequiredLabel.text=Application restart required to take effect. diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/Bundle.properties-MERGED b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/Bundle.properties-MERGED index 08f4718734..aabcdf1c23 100755 --- a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/Bundle.properties-MERGED +++ b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/Bundle.properties-MERGED @@ -35,7 +35,7 @@ AutoIngestSettingsPanel.ImageDirectoryUnspecified=Shared images folder must be s AutoIngestSettingsPanel.InvalidPortNumber=Invalid port number. AutoIngestSettingsPanel.jRadioButtonCopyFiles.text=File Copy mode AutoIngestSettingsPanel.KeywordSearchNull=Cannot find Keyword Search service -AutoIngestSettingsPanel.MustRestart=Autopsy must be restarted for new configuration to take effect +AutoIngestSettingsPanel.MustRestart=Application must be restarted for new configuration to take effect AutoIngestSettingsPanel.nodePanel.TabConstraints.tabTitle=Node Configuration AutoIngestSettingsPanel.NodeStatusLogging.text=Node Status Logging Settings AutoIngestSettingsPanel.restartRequiredLabel.text=Application restart required to take effect.