diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index 9491e75783..0a05a238c4 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -50,7 +50,6 @@ import org.apache.tika.parser.ParsingReader; import org.apache.tika.parser.microsoft.OfficeParserConfig; import org.apache.tika.parser.ocr.TesseractOCRConfig; import org.apache.tika.parser.pdf.PDFParserConfig; -import org.apache.tika.mime.MediaType; import org.openide.util.NbBundle; import org.openide.modules.InstalledFileLocator; import org.openide.util.Lookup; @@ -126,7 +125,7 @@ final class TikaTextExtractor implements TextExtractor { private final ExecutorService executorService = Executors.newSingleThreadExecutor(tikaThreadFactory); private static final String SQLITE_MIMETYPE = "application/x-sqlite3"; - private final AutoDetectParser parser; + private final AutoDetectParser parser = new AutoDetectParser(); private final Content content; private boolean tesseractOCREnabled; @@ -135,7 +134,7 @@ final class TikaTextExtractor implements TextExtractor { private static final File TESSERACT_PATH = locateTesseractExecutable(); private String languagePacks = formatLanguagePacks(PlatformUtil.getOcrLanguagePacks()); private static final String TESSERACT_OUTPUT_FILE_NAME = "tess_output"; //NON-NLS - + private ProcessTerminator processTerminator; private static final List TIKA_SUPPORTED_TYPES @@ -146,23 +145,12 @@ final class TikaTextExtractor implements TextExtractor { public TikaTextExtractor(Content content) { this.content = content; - - parser = new AutoDetectParser(); - - if (content instanceof AbstractFile) { - AbstractFile file = (AbstractFile) content; - if (file.getMIMEType() != null && !file.getMIMEType().isEmpty()) { - //Force Tika to use our pre-computed mime type during detection - parser.setDetector((InputStream inStream, Metadata metaData) - -> MediaType.parse(file.getMIMEType())); - } - } } /** * If Tesseract has been installed and is set to be used through - * configuration, then ocr is enabled. OCR can only currently be run on 64 - * bit Windows OS. + * configuration, then ocr is enabled. OCR can only currently be run on + * 64 bit Windows OS. * * @return Flag indicating if OCR is set to be used. */ @@ -211,7 +199,7 @@ final class TikaTextExtractor implements TextExtractor { TesseractOCRConfig ocrConfig = new TesseractOCRConfig(); String tesseractFolder = TESSERACT_PATH.getParent(); ocrConfig.setTesseractPath(tesseractFolder); - + ocrConfig.setLanguage(languagePacks); ocrConfig.setTessdataPath(PlatformUtil.getOcrLanguagePacksPath()); parseContext.set(TesseractOCRConfig.class, ocrConfig); @@ -281,7 +269,7 @@ final class TikaTextExtractor implements TextExtractor { File outputFile = null; try { String tempDirectory = Case.getCurrentCaseThrows().getTempDirectory(); - + //Appending file id makes the name unique String tempFileName = FileUtil.escapeFileName(file.getId() + file.getName()); inputFile = Paths.get(tempDirectory, tempFileName).toFile(); @@ -322,7 +310,7 @@ final class TikaTextExtractor implements TextExtractor { } } } - + /** * Wraps the creation of a TikaReader into a Future so that it can be * cancelled. @@ -434,11 +422,11 @@ final class TikaTextExtractor implements TextExtractor { */ @Override public boolean isSupported() { - if (!(content instanceof AbstractFile)) { + if(!(content instanceof AbstractFile)) { return false; } - - String detectedType = ((AbstractFile) content).getMIMEType(); + + String detectedType = ((AbstractFile)content).getMIMEType(); if (detectedType == null || BINARY_MIME_TYPES.contains(detectedType) //any binary unstructured blobs (string extraction will be used) || ARCHIVE_MIME_TYPES.contains(detectedType) @@ -447,7 +435,7 @@ final class TikaTextExtractor implements TextExtractor { ) { return false; } - + return TIKA_SUPPORTED_TYPES.contains(detectedType); } @@ -497,11 +485,11 @@ final class TikaTextExtractor implements TextExtractor { if (context != null) { ImageConfig configInstance = context.lookup(ImageConfig.class); if (configInstance != null) { - if (Objects.nonNull(configInstance.getOCREnabled())) { + if(Objects.nonNull(configInstance.getOCREnabled())) { this.tesseractOCREnabled = configInstance.getOCREnabled(); } - - if (Objects.nonNull(configInstance.getOCRLanguages())) { + + if(Objects.nonNull(configInstance.getOCRLanguages())) { this.languagePacks = formatLanguagePacks(configInstance.getOCRLanguages()); } } diff --git a/build.xml b/build.xml index 4b88b52cc8..7e7e87d388 100644 --- a/build.xml +++ b/build.xml @@ -102,7 +102,6 @@ - diff --git a/docs/doxygen-user/footer.html b/docs/doxygen-user/footer.html index e53036dde6..5ab86c6e86 100644 --- a/docs/doxygen-user/footer.html +++ b/docs/doxygen-user/footer.html @@ -1,5 +1,5 @@
-

Copyright © 2012-2018 Basis Technology. Generated on $date
+

Copyright © 2012-2019 Basis Technology. Generated on $date
This work is licensed under a Creative Commons Attribution-Share Alike 3.0 United States License.

diff --git a/docs/doxygen-user/main.dox b/docs/doxygen-user/main.dox index e22d5fb20e..231b9dc95c 100644 --- a/docs/doxygen-user/main.dox +++ b/docs/doxygen-user/main.dox @@ -65,6 +65,7 @@ The following topics are available here: - \subpage live_triage_page - \subpage advanced_page - \subpage experimental_page +- \subpage translations_page If the topic you need is not listed, refer to the Autopsy Wiki or join the SleuthKit User List at SourceForge. diff --git a/docs/doxygen-user/translations.dox b/docs/doxygen-user/translations.dox new file mode 100644 index 0000000000..6ff49fa558 --- /dev/null +++ b/docs/doxygen-user/translations.dox @@ -0,0 +1,75 @@ +/*! \page translations_page Translating This Document + +The Autopsy user base is global. You can help out by translating the UI and this documentation. + +\section translations_doc Translating Documentation + +This section outlines how to translate this user documentation. To translate, you will need: +- A git account +- Basic familiarity with git +- Text editor + +The Autopsy documentation is created by [Doxygen](http://www.doxygen.nl/) from ".dox" text files in the [docs/doxygen-user](https://github.com/sleuthkit/autopsy/tree/develop/docs/doxygen-user) folder in the Github repository. + +The first step is to fork the [Autopsy Repository](https://github.com/sleuthkit/autopsy) into your Git account and make a clone of it into your environment so that you can make edits to the files. + +As you are editing, you can review your documentation by installing Doxygen and running 'doxygen' from within the translations folder. It will save the HTML to the 'user-docs' folder. + +\subsection translations_doc_start Translating To a New Language + +If there is not already documentation in a language, then you need to make a copy of the entire English 'doxygen-user' folder and name it 'doxygen-user_AB' where AB is replaced by the 2 character [country code] (http://www.lingoes.net/en/translator/langcode.htm). For example, 'doxygen-user_fr' for French and 'doxygen-user_ja' for Japanese. + +Edit the Doxyfile to update the OUTPUT_LANGUAGE field. For English it has: + +\code +OUTPUT_LANGUAGE = English +\endcode + +Now, simply start translating the English documents. + +\subsection translations_doc_update Updating The Documentation + +When new releases are made and the English documentation is updated, the other languages should be updated as well. To determine what has changed: +- First, determine when the last time the documentation was changed. From a command line, you can change into the translated documentation folder and type: + +\code + $ cd docs/doxygen-user_fr + $ git log -n 1 . + commit 94e4b1042af47908dd4a0b2959b3f6c3d4af1111 + Author: John Doe + Date: Tue Jan 1 22:56:09 2019 -0500 + + update to quick start +\endcode + +This shows you that commit 94e4b1042af47908dd4a0b2959b3f6c3d4af1111 was the last translation update to occur for the French version. + +- Next, determine what changed in the English version since then: + +\code + $ git diff 94e4b1042af47908dd4a0b2959b3f6c3d4af1111 ../doxygen-user + diff --git a/docs/doxygen-user/central_repo.dox b/docs/doxygen-user/central_repo.dox +index 83d3407e8..e8cd01c1b 100644 + --- a/docs/doxygen-user/central_repo.dox + +++ b/docs/doxygen-user/central_repo.dox + @@ -79,6 +79,16 @@ Descriptions of the property types: + - Phone numbers are currently only extracted from call logs, contact lists and message, which come from the Android Analyzer module. + - USB Devices + - USB device properties come from the registry parsing in the Recent Activity Module. + +- Wireless Networks + + - Wireless networks are correlated on SSIDs, and come from the registry par +\endcode + +- Update the translated documentation accordingly based on what changed in the English version. + +- If you do not get to complete all of the changes, you should create a TODO.txt file that lists what was not updated so that other people know that not everything was updated. + +\subsection translations_doc_commit Committing the Documentation + +You should submit a Github Pull Request when: +- You complete a language. +- You don't have time to do more work, but want to submit what you did. + +To get the code committed, send a [pull request](https://help.github.com/articles/about-pull-requests/) to the main Autopsy repository. + +*/ \ No newline at end of file diff --git a/release_scripts/update_sleuthkit_version.pl b/release_scripts/update_sleuthkit_version.pl index e630e4890b..89b0c42de5 100755 --- a/release_scripts/update_sleuthkit_version.pl +++ b/release_scripts/update_sleuthkit_version.pl @@ -31,14 +31,13 @@ sub main { update_tsk_version(); update_core_project_properties(); update_core_project_xml(); - + update_unix_setup(); + print "Files updated. You need to commit and push them\n"; } - - ###################################################### # Utility functions @@ -194,6 +193,39 @@ sub update_core_project_xml { } +# update the tskversion.xml +sub update_unix_setup { + + my $orig = "unix_setup.sh"; + my $temp = "${orig}-bak"; + + print "Updating the version in ${orig}\n"; + + open (CONF_IN, "<${orig}") or die "Cannot open ${orig}"; + open (CONF_OUT, ">${temp}") or die "Cannot open ${temp}"; + + my $found = 0; + while () { + if (/^TSK_VERSION=/) { + print CONF_OUT "TSK_VERSION=${VER}\n"; + $found++; + } + else { + print CONF_OUT $_; + } + } + close (CONF_IN); + close (CONF_OUT); + + if ($found != 1) { + die "$found (instead of 1) occurrences of TSK_VERSION found in ${orig}"; + } + + unlink ($orig) or die "Error deleting ${orig}"; + rename ($temp, $orig) or die "Error renaming tmp $orig file"; + system("git add ${orig}") unless ($TESTING); + +} -main(); \ No newline at end of file +main(); diff --git a/setupSleuthkitBranch.py b/setupSleuthkitBranch.py index 4c7ace32ac..881ce44570 100644 --- a/setupSleuthkitBranch.py +++ b/setupSleuthkitBranch.py @@ -39,17 +39,26 @@ def gitSleuthkitCheckout(branch, branchOwner): # passed is a global variable that gets set to non-zero integer # When an error occurs global passed - if (branchOwner==ORIGIN_OWNER): - cmd = ['git','checkout', branch] - else: + if branch in getSleuthkitBranchList(branchOwner): #add the remotes + #if the branch owner was origin substitute in the name of that owner + if (branchOwner==ORIGIN_OWNER): + gitHubUser="sleuthkit" + else: + gitHubUser=branchOwner checkout=['git','checkout','-b',branchOwner+'-'+branch] + print("Command run:" + " ".join(checkout)) passed = subprocess.call(checkout, stdout=sys.stdout,cwd=TSK_HOME) - cmd = ['git','pull', "/".join(["https://github.com", branchOwner, "sleuthkit.git"]), branch] + cmd = ['git','pull', "/".join(["https://github.com", gitHubUser, "sleuthkit.git"]), branch] if passed != 0: #0 would be success #unable to create new branch return instead of pulling return - passed = subprocess.call(cmd,stdout=sys.stdout,cwd=TSK_HOME) + print("Command run:" + " ".join(cmd)) + passed = subprocess.call(cmd,stdout=sys.stdout,cwd=TSK_HOME) + if (passed == 0): + sys.exit() #exit if successful + else: + print("Branch: " + branch + " does not exist for owner: " + branchOwner) def parseXML(xmlFile): ''' @@ -70,40 +79,39 @@ def main(): if not TSK_HOME: sys.exit(1) print('Please set TSK_HOME env variable') - # Get the Autopsy branch being used. Travis and Appveyor # will tell us where a pull request is directed TRAVIS=os.getenv("TRAVIS",False) APPVEYOR=os.getenv("APPVEYOR",False) if TRAVIS == "true": - CURRENT_BRANCH=os.getenv("TRAVIS_PULL_REQUEST_BRANCH",False) - BRANCH_OWNER=os.getenv("TRAVIS_PULL_REQUEST_SLUG", False).split('/')[0] + CURRENT_BRANCH=os.getenv("TRAVIS_PULL_REQUEST_BRANCH","") #make default empty string which is same vaule used when not a PR + if (CURRENT_BRANCH != ""): #if it is a PR + BRANCH_OWNER=os.getenv("TRAVIS_PULL_REQUEST_SLUG", ORIGIN_OWNER+"/"+CURRENT_BRANCH).split('/')[0] #default owner is ORIGIN_OWNER + gitSleuthkitCheckout(CURRENT_BRANCH, BRANCH_OWNER) + TARGET_BRANCH=os.getenv("TRAVIS_BRANCH",DEVELOP_BRANCH) elif APPVEYOR: - CURRENT_BRANCH=os.getenv("APPVEYOR_PULL_REQUEST_HEAD_REPO_BRANCH",False) - BRANCH_OWNER=os.getenv("APPVEYOR_PULL_REQUEST_HEAD_REPO_NAME", False).split('/')[0] + CURRENT_BRANCH=os.getenv("APPVEYOR_PULL_REQUEST_HEAD_REPO_BRANCH","") #make default same as value used by travis for readability of code + if (CURRENT_BRANCH != ""): #if it is a PR + BRANCH_OWNER=os.getenv("APPVEYOR_PULL_REQUEST_HEAD_REPO_NAME", ORIGIN_OWNER+"/"+CURRENT_BRANCH).split('/')[0] #default owner is ORIGIN_OWNER + gitSleuthkitCheckout(CURRENT_BRANCH, BRANCH_OWNER) + TARGET_BRANCH=os.getenv("APPVEYOR_REPO_BRANCH",DEVELOP_BRANCH) else: cmd=['git','rev-parse','--abbrev-ref','HEAD'] output = subprocess.check_output(cmd) - CURRENT_BRANCH=output.strip() - BRANCH_OWNER=ORIGIN_OWNER + TARGET_BRANCH=output.strip() # If we are in an Autopsy release branch, then use the # info in TSKVersion.xml to find the corresponding TSK # release branch. For other branches, we don't always # trust that TSKVersion has been updated. - if CURRENT_BRANCH.startswith('release'): + if TARGET_BRANCH.startswith('release'): version = parseXML('TSKVersion.xml') RELEASE_BRANCH = "release-"+version - gitSleuthkitCheckout(RELEASE_BRANCH, BRANCH_OWNER) - #If it failed try the origin release branch - if passed != 0: - gitSleuthkitCheckout(RELEASE_BRANCH, ORIGIN_OWNER) - # Check if the same branch exists in TSK (develop->develop, custom1->custom1, etc.) + #Check if the same user has a release branch which corresponds to this release branch + gitSleuthkitCheckout(RELEASE_BRANCH, ORIGIN_OWNER) else: - gitSleuthkitCheckout(CURRENT_BRANCH, BRANCH_OWNER) - + gitSleuthkitCheckout(TARGET_BRANCH, ORIGIN_OWNER) # Otherwise, default to origin develop - if passed != 0: - gitSleuthkitCheckout(DEVELOP_BRANCH, ORIGIN_OWNER) + gitSleuthkitCheckout(DEVELOP_BRANCH, ORIGIN_OWNER) if passed != 0: print('Error checking out a Sleuth Kit branch') diff --git a/unix_setup.sh b/unix_setup.sh index dde655e921..8e7c9a94c5 100755 --- a/unix_setup.sh +++ b/unix_setup.sh @@ -3,7 +3,10 @@ # Verifies programs are installed and copies native code into the Autopsy folder structure # -TSK_VERSION=4.6.4 +# NOTE: update_sleuthkit_version.pl updates this value and relies +# on it keeping the same name and whitespace. Don't change it. +TSK_VERSION=4.6.5 + # In the beginning... echo "---------------------------------------------"