Tabnine Logo
Tika.parse
Code IndexAdd Tabnine to your IDE (free)

How to use
parse
method
in
org.apache.tika.Tika

Best Java code snippets using org.apache.tika.Tika.parse (Showing top 20 results out of 315)

origin: apache/tika

public static void parseToReaderExample() throws Exception {
  File document = new File("example.doc");
  try (Reader reader = new Tika().parse(document)) {
    char[] buffer = new char[1000];
    int n = reader.read(buffer);
    while (n != -1) {
      System.out.append(CharBuffer.wrap(buffer, 0, n));
      n = reader.read(buffer);
    }
  }
}
origin: apache/tika

/**
 * Parses the file at the given path and returns the extracted text content.
 *
 * @param path the path of the file to be parsed
 * @return extracted text content
 * @throws IOException if the file can not be read or parsed
 */
public Reader parse(Path path) throws IOException {
  return parse(path, new Metadata());
}
origin: apache/tika

/**
 * Parses the given file and returns the extracted text content.
 *
 * @param file the file to be parsed
 * @return extracted text content
 * @throws IOException if the file can not be read or parsed
 * @see #parse(Path)
 */
public Reader parse(File file) throws IOException {
  return parse(file, new Metadata());
}

origin: apache/tika

/**
 * Parses the given document and returns the extracted text content.
 * <p>
 * The returned reader will be responsible for closing the given stream.
 * The stream and any associated resources will be closed at or before
 * the time when the {@link Reader#close()} method is called.
 *
 * @param stream the document to be parsed
 * @return extracted text content
 * @throws IOException if the document can not be read or parsed
 */
public Reader parse(InputStream stream) throws IOException {
  return parse(stream, new Metadata());
}
origin: apache/tika

  public void indexDocument(File file) throws Exception {
    try (Reader fulltext = tika.parse(file)) {
      Document document = new Document();
      document.add(new TextField("filename", file.getName(), Store.YES));
      document.add(new TextField("fulltext", fulltext));
      writer.addDocument(document);
    }
  }
}
origin: apache/tika

/**
 * Parses the file at the given path and returns the extracted text content.
 * <p>
 * Metadata information extracted from the document is returned in 
 *  the supplied metadata instance.
 *
 * @param path the path of the file to be parsed
 * @param metadata where document's metadata will be populated
 * @return extracted text content
 * @throws IOException if the file can not be read or parsed
 */
public Reader parse(Path path, Metadata metadata) throws IOException {
  InputStream stream = TikaInputStream.get(path, metadata);
  return parse(stream, metadata);
}

origin: apache/tika

/**
 * Parses the given file and returns the extracted text content.
 * <p>
 * Metadata information extracted from the document is returned in 
 *  the supplied metadata instance.
 *
 * @param file the file to be parsed
 * @param metadata where document's metadata will be populated
 * @return extracted text content
 * @throws IOException if the file can not be read or parsed
 * @see #parse(Path)
 */
public Reader parse(File file, Metadata metadata) throws IOException {
  @SuppressWarnings("deprecation")
  InputStream stream = TikaInputStream.get(file, metadata);
  return parse(stream, metadata);
}
origin: apache/tika

/**
 * Parses the resource at the given URL and returns the extracted
 * text content.
 *
 * @param url the URL of the resource to be parsed
 * @return extracted text content
 * @throws IOException if the resource can not be read or parsed
 */
public Reader parse(URL url) throws IOException {
  Metadata metadata = new Metadata();
  InputStream stream = TikaInputStream.get(url, metadata);
  return parse(stream, metadata);
}
origin: apache/tika

public void indexContentSpecificMet(File file) throws Exception {
  Metadata met = new Metadata();
  try (InputStream is = new FileInputStream(file)) {
    tika.parse(is, met);
    Document document = new Document();
    for (String key : met.names()) {
      String[] values = met.getValues(key);
      for (String val : values) {
        document.add(new TextField(key, val, Store.YES));
      }
      writer.addDocument(document);
    }
  }
}
origin: apache/tika

.equals(metadata.get(Metadata.CONTENT_TYPE))
? new InputStreamReader(inputStream, StandardCharsets.UTF_8)
: secondaryParser.parse(inputStream);
origin: apache/tika

  reader = new InputStreamReader(inputStream, StandardCharsets.UTF_8);
} else {
  reader = secondaryParser.parse(inputStream);
origin: apache/tika

  private Metadata getMetadata(String name) throws TikaException, IOException, SAXException {
    URL url = this.getClass().getResource("/org/apache/tika/config/"+name);
    assertNotNull("couldn't find: "+name, url);
    TikaConfig tikaConfig = new TikaConfig(url);
    Tika tika = new Tika(tikaConfig);
    Metadata metadata = new Metadata();
    tika.parse(url.openStream(), metadata);
    return metadata;
  }
}
origin: apache/tika

  public void indexWithDublinCore(File file) throws Exception {
    Metadata met = new Metadata();
    met.add(TikaCoreProperties.CREATOR, "Manning");
    met.add(TikaCoreProperties.CREATOR, "Tika in Action");
    met.set(TikaCoreProperties.CREATED, new Date());
    met.set(TikaCoreProperties.FORMAT, tika.detect(file));
    met.set(DublinCore.SOURCE, file.toURI().toURL().toString());
    met.add(TikaCoreProperties.SUBJECT, "File");
    met.add(TikaCoreProperties.SUBJECT, "Indexing");
    met.add(TikaCoreProperties.SUBJECT, "Metadata");
    met.set(Property.externalClosedChoise(TikaCoreProperties.RIGHTS.getName(), "public",
        "private"), "public");
    try (InputStream is = new FileInputStream(file)) {
      tika.parse(is, met);
      Document document = new Document();
      for (String key : met.names()) {
        String[] values = met.getValues(key);
        for (String val : values) {
          document.add(new TextField(key, val, Store.YES));
        }
        writer.addDocument(document);
      }
    }
  }
}
origin: apache/tika

  @Test
  public void testInitializableParser() throws Exception {
    URL configFileUrl = getClass().getClassLoader().getResource(TIKA_CFG_FILE);
    assert configFileUrl != null;
    TikaConfig config = new TikaConfig(configFileUrl);
    Tika tika = new Tika(config);
    Metadata md = new Metadata();
    tika.parse(TikaInputStream.get("someString".getBytes(StandardCharsets.ISO_8859_1)), md);
    assertEquals("5", md.get(DummyInitializableParser.SUM_FIELD));
  }
}
origin: org.apache.tika/tika-core

/**
 * Parses the file at the given path and returns the extracted text content.
 *
 * @param path the path of the file to be parsed
 * @return extracted text content
 * @throws IOException if the file can not be read or parsed
 */
public Reader parse(Path path) throws IOException {
  return parse(path, new Metadata());
}
origin: org.apache.tika/tika-core

/**
 * Parses the given file and returns the extracted text content.
 *
 * @param file the file to be parsed
 * @return extracted text content
 * @throws IOException if the file can not be read or parsed
 * @see #parse(Path)
 */
public Reader parse(File file) throws IOException {
  return parse(file, new Metadata());
}

origin: com.github.lafa.tikaNoExternal/tika-core

/**
 * Parses the file at the given path and returns the extracted text content.
 *
 * @param path the path of the file to be parsed
 * @return extracted text content
 * @throws IOException if the file can not be read or parsed
 */
public Reader parse(Path path) throws IOException {
  return parse(path, new Metadata());
}
origin: com.github.lafa.tikaNoExternal/tika-core

/**
 * Parses the given file and returns the extracted text content.
 *
 * @param file the file to be parsed
 * @return extracted text content
 * @throws IOException if the file can not be read or parsed
 * @see #parse(Path)
 */
public Reader parse(File file) throws IOException {
  return parse(file, new Metadata());
}

origin: org.apache.tika/tika-core

/**
 * Parses the resource at the given URL and returns the extracted
 * text content.
 *
 * @param url the URL of the resource to be parsed
 * @return extracted text content
 * @throws IOException if the resource can not be read or parsed
 */
public Reader parse(URL url) throws IOException {
  Metadata metadata = new Metadata();
  InputStream stream = TikaInputStream.get(url, metadata);
  return parse(stream, metadata);
}
origin: com.github.lafa.tikaNoExternal/tika-core

/**
 * Parses the resource at the given URL and returns the extracted
 * text content.
 *
 * @param url the URL of the resource to be parsed
 * @return extracted text content
 * @throws IOException if the resource can not be read or parsed
 */
public Reader parse(URL url) throws IOException {
  Metadata metadata = new Metadata();
  InputStream stream = TikaInputStream.get(url, metadata);
  return parse(stream, metadata);
}
org.apache.tikaTikaparse

Javadoc

Parses the given file and returns the extracted text content.

Popular methods of Tika

  • <init>
    Creates a Tika facade using the given detector, parser, and translator instances.
  • detect
    Detects the media type of the given document. The type detection is based on the first few bytes of
  • parseToString
    Parses the file at the given path and returns the extracted text content. To avoid unpredictable exc
  • toString
  • getParser
    Returns the parser instance used by this facade.
  • setMaxStringLength
    Sets the maximum length of strings returned by the parseToString methods.

Popular in Java

  • Running tasks concurrently on multiple threads
  • getSupportFragmentManager (FragmentActivity)
  • setScale (BigDecimal)
  • addToBackStack (FragmentTransaction)
  • Container (java.awt)
    A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
  • Selector (java.nio.channels)
    A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
  • Time (java.sql)
    Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
  • NumberFormat (java.text)
    The abstract base class for all number formats. This class provides the interface for formatting and
  • Map (java.util)
    A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
  • Project (org.apache.tools.ant)
    Central representation of an Ant project. This class defines an Ant project with all of its targets,
  • Best plugins for Eclipse
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now