/**
 * Uses Tikas {@link AutoDetectParser} to extract the text of a file.
 * 
 * @param document
 * @return The text content of a file
 */
@Override
public String extractTextOfDocument(File file) throws Exception {
	InputStream fileStream = new FileInputStream(file);
	Parser parser = new AutoDetectParser();
	Metadata metadata = new Metadata();
	BodyContentHandler handler = new BodyContentHandler(Integer.MAX_VALUE);

	TesseractOCRConfig config = new TesseractOCRConfig();
	PDFParserConfig pdfConfig = new PDFParserConfig();
	pdfConfig.setExtractInlineImages(true);

	// To parse images in files those lines are needed
	ParseContext parseContext = new ParseContext();
	parseContext.set(TesseractOCRConfig.class, config);
	parseContext.set(PDFParserConfig.class, pdfConfig);
	parseContext.set(Parser.class, parser); // need to add this to make sure
											// recursive parsing happens!
	try {
		parser.parse(fileStream, handler, metadata, parseContext);
		String text = handler.toString();
		if (text.trim().isEmpty()) {
			logger.warn("Could not extract text of '" + document.getName() + "'");
		} else {
			logger.debug("Successfully extracted the text of '" + document.getName() + "'");
		}
		return text;
	} catch (IOException | SAXException | TikaException e) {
		throw new Exception("TIKA was not able to exctract text of file '" + document.getName() + "'", e);
	} finally {
		try {
			fileStream.close();
		} catch (IOException e) {
			throw new Exception(e);
		}
	}
}