Tika tika = new Tika(); File file = ... String mimeType = tika.detect(file);
protected void getMimeType(InputStream inputStream, String fileName, StaticAsset newAsset) { Tika tika = new Tika(); String tikaMimeType = tika.detect(fileName); if (tikaMimeType == null) { try { tikaMimeType = tika.detect(inputStream); } catch (IOException e) { //if tika can't resolve, don't throw exception } } if (tikaMimeType != null) { newAsset.setMimeType(tikaMimeType); } }
public static void main(String[] args) throws Exception { // Create a Tika instance with the default configuration Tika tika = new Tika(); // Parse all given files and print out the extracted // text content for (String file : args) { String text = tika.parseToString(new File(file)); System.out.print(text); } } }
public static void main(String[] args) throws Exception { Tika tika = new Tika(); for (String file : args) { String type = tika.detect(new File(file)); System.out.println(file + ": " + type); } } }
public static String parseToStringExample() throws Exception { File document = new File("example.doc"); String content = new Tika().parseToString(document); System.out.print(content); return content; }
public static void parseToReaderExample() throws Exception { File document = new File("example.doc"); try (Reader reader = new Tika().parse(document)) { char[] buffer = new char[1000]; int n = reader.read(buffer); while (n != -1) { System.out.append(CharBuffer.wrap(buffer, 0, n)); n = reader.read(buffer); } } }
public static void main(String[] args) { LOG.info("Starting {} server", new Tika()); try { execute(args); } catch (Exception e) { e.printStackTrace(); LOG.error("Can't start: ", e); System.exit(-1); } }
public static void main(String[] args) throws Exception { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new StandardAnalyzer()); try (IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(args[0])), indexWriterConfig)) { LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer); for (int i = 1; i < args.length; i++) { indexer.indexDocument(new File(args[i])); } } }
public TikaVersion() { this.tika = new Tika(TikaResource.getConfig()); }
private void version() { System.out.println(new Tika().toString()); }
public static String customMimeInfo() throws Exception { String path = "file:///path/to/prescription-type.xml"; MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path)); Tika tika = new Tika(typeDatabase); String type = tika.detect("/path/to/prescription.xpd"); return type; }
public AgeRecogniser() { try { secondaryParser = new Tika(new TikaConfig()); available = true; } catch (Exception e) { available = false; LOG.log(Level.SEVERE, "Unable to initialize secondary parser"); } }
public static String detectWithCustomConfig(String name) throws Exception { String config = "/org/apache/tika/mime/tika-mimetypes.xml"; Tika tika = new Tika(MimeTypesFactory.create(config)); return tika.detect(name); }
public TikaWelcome(List<ResourceProvider> rCoreProviders) { this.tika = new Tika(TikaResource.getConfig()); this.html = new HTMLHelper(); for (ResourceProvider rp : rCoreProviders) { this.endpoints.add(rp.getResourceClass()); } }
public TrecDocument summarize(File file) throws FileNotFoundException, IOException, TikaException { Tika tika = new Tika(); Metadata met = new Metadata(); String contents = tika.parseToString(new FileInputStream(file), met); return new TrecDocument(met.get(TikaCoreProperties.RESOURCE_NAME_KEY), contents, met.getDate(TikaCoreProperties.CREATED)); }
public static String customCompositeDetector() throws Exception { String path = "file:///path/to/prescription-type.xml"; MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path)); Tika tika = new Tika(new CompositeDetector(typeDatabase, new EncryptedPrescriptionDetector())); String type = tika.detect("/path/to/tmp/prescription.xpd"); return type; }
private Metadata getMetadata(String name) throws TikaException, IOException, SAXException { URL url = this.getClass().getResource("/org/apache/tika/config/"+name); assertNotNull("couldn't find: "+name, url); TikaConfig tikaConfig = new TikaConfig(url); Tika tika = new Tika(tikaConfig); Metadata metadata = new Metadata(); tika.parse(url.openStream(), metadata); return metadata; } }
@Test public void testToString() { String version = new Tika().toString(); assertNotNull(version); assertTrue(version.matches( "Apache Tika \\d+\\.\\d+(\\.\\d+)?(-SNAPSHOT)?")); }
@Test public void testInitializableParser() throws Exception { URL configFileUrl = getClass().getClassLoader().getResource(TIKA_CFG_FILE); assert configFileUrl != null; TikaConfig config = new TikaConfig(configFileUrl); Tika tika = new Tika(config); Metadata md = new Metadata(); tika.parse(TikaInputStream.get("someString".getBytes(StandardCharsets.ISO_8859_1)), md); assertEquals("5", md.get(DummyInitializableParser.SUM_FIELD)); } }
@Test public void testGetVersion() throws Exception { Response response = WebClient .create(endPoint + VERSION_PATH) .type("text/plain") .accept("text/plain") .get(); assertEquals(new Tika().toString(), getStringFromInputStream((InputStream) response.getEntity())); } }