From 26b47b9ca979d35662c0600346f5ff9d42339c37 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Fri, 7 Jun 2019 13:58:02 -0400 Subject: [PATCH] Added payload limit API and implemented it in the Bing and Google translators --- .../texttranslation/TextTranslator.java | 7 ++++++ .../translators/BingTranslator.java | 6 ++++- .../translators/GoogleTranslator.java | 25 +++++++++++-------- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/texttranslation/TextTranslator.java b/Core/src/org/sleuthkit/autopsy/texttranslation/TextTranslator.java index 5e6ad8b19e..e996e6331e 100755 --- a/Core/src/org/sleuthkit/autopsy/texttranslation/TextTranslator.java +++ b/Core/src/org/sleuthkit/autopsy/texttranslation/TextTranslator.java @@ -56,4 +56,11 @@ public interface TextTranslator { * Save the settings as they have been modified in the component. */ void saveSettings(); + + /** + * Returns the hard limit for translation request sizes. + * + * @return + */ + int getMaxPayloadSize(); } diff --git a/Core/src/org/sleuthkit/autopsy/texttranslation/translators/BingTranslator.java b/Core/src/org/sleuthkit/autopsy/texttranslation/translators/BingTranslator.java index 2b234f71d3..3dfd984b0a 100644 --- a/Core/src/org/sleuthkit/autopsy/texttranslation/translators/BingTranslator.java +++ b/Core/src/org/sleuthkit/autopsy/texttranslation/translators/BingTranslator.java @@ -108,7 +108,6 @@ public class BingTranslator implements TextTranslator { String toTranslate = string.trim(); //Translates some text into English, without specifying the source langauge. - // HTML files were producing lots of white space at the end //Google Translate required us to replace (\r\n|\n) with
//but Bing Translator doesn not have that requirement. //The free account has a maximum file size. If you have a paid account, @@ -172,4 +171,9 @@ public class BingTranslator implements TextTranslator { throw new TranslationException("JSON text does not match Bing Translator scheme: " + e); } } + + @Override + public int getMaxPayloadSize() { + return MAX_STRING_LENGTH; + } } diff --git a/Core/src/org/sleuthkit/autopsy/texttranslation/translators/GoogleTranslator.java b/Core/src/org/sleuthkit/autopsy/texttranslation/translators/GoogleTranslator.java index 46bdd6da67..3142eb7542 100644 --- a/Core/src/org/sleuthkit/autopsy/texttranslation/translators/GoogleTranslator.java +++ b/Core/src/org/sleuthkit/autopsy/texttranslation/translators/GoogleTranslator.java @@ -47,7 +47,8 @@ import org.sleuthkit.autopsy.texttranslation.TranslationException; public final class GoogleTranslator implements TextTranslator { private static final Logger logger = Logger.getLogger(GoogleTranslator.class.getName()); - private static final int MAX_STRING_LENGTH = 15000; + //See translate method for justification of this limit. + private static final int MAX_PAYLOAD_SIZE = 5000; private final GoogleTranslatorSettingsPanel settingsPanel; private final GoogleTranslatorSettings settings = new GoogleTranslatorSettings(); private Translate googleTranslate; @@ -90,21 +91,20 @@ public final class GoogleTranslator implements TextTranslator { if (googleTranslate != null) { try { // Translates some text into English, without specifying the source language. - - // HTML files were producing lots of white space at the end String substring = string.trim(); // We can't currently set parameters, so we are using the default behavior of // assuming the input is HTML. We need to replace newlines with
for Google to preserve them substring = substring.replaceAll("(\r\n|\n)", "
"); - // The API complains if the "Payload" is over 204800 bytes. I'm assuming that - // deals with the full request. At some point, we get different errors about too - // much text. Officially, Google says they will googleTranslate only 5k chars, - // but we have seen more than that working. - // there could be a value betwen 15k and 25k that works. I (BC) didn't test further - if (substring.length() > MAX_STRING_LENGTH) { - substring = substring.substring(0, MAX_STRING_LENGTH); + // The API complains if the "Payload" is over 204800 bytes. Google references that + //their service is optimized for 2K code points and recommends keeping the requests that size. + //There is a hard limit of 30K code points per request. There is also a time-based quota that + //we are not enforcing, which may lead to 403 errors. We are currently configured for a max of 5K + //in each request, for two reasons. 1) Is to be more in line with Google's recommendation. 2) Is to + //minimize accidental exceedence of time based quotas. + if (substring.length() > MAX_PAYLOAD_SIZE) { + substring = substring.substring(0, MAX_PAYLOAD_SIZE); } Translation translation = googleTranslate.translate(substring); @@ -178,4 +178,9 @@ public final class GoogleTranslator implements TextTranslator { settings.saveSettings(); loadTranslator(); } + + @Override + public int getMaxPayloadSize() { + return MAX_PAYLOAD_SIZE; + } }