opennlp.tools.formats.letsmt java code examples

LetsmtSentenceStream(LetsmtDocument source) {
 this.source = source;
 reset();
}

 static LetsmtDocument parse(File file) throws IOException {
  try (InputStream in = new FileInputStream(file)) {
   return parse(in);
  }
 }
}

 @Override
 public void reset() {
  sentenceIt = source.getSentences().iterator();
 }
}

CmdLineUtil.checkInputFile("Data", params.getData());
 letsmtDoc = LetsmtDocument.parse(params.getData());
} catch (IOException ex) {
 CmdLineUtil.handleCreateObjectStreamError(ex);
ObjectStream<SentenceSample> samples = new LetsmtSentenceStream(letsmtDoc);
if (params.getDetokenizer() != null) {
 try {
  Detokenizer detokenizer = new DictionaryDetokenizer(
    new DetokenizationDictionary(params.getDetokenizer()));
  samples = new DetokenizeSentenceSampleStream(detokenizer, samples);
 } catch (IOException e) {
  throw new TerminateToolException(-1, "Failed to load detokenizer rules!", e);

try (InputStream letsmtXmlIn = LetsmtDocumentTest.class.getResourceAsStream("letsmt-with-words.xml");) {
 LetsmtDocument doc = LetsmtDocument.parse(letsmtXmlIn);
 List<LetsmtDocument.LetsmtSentence> sents = doc.getSentences();
 Assert.assertNull(sent1.getNonTokenizedText());
   "products",
   "."
   }, sent1.getTokens());
 Assert.assertNull(sent2.getNonTokenizedText());
   "below",
   "."
   }, sent2.getTokens());

@Override
public SentenceSample read() throws IOException {
 StringBuilder sentencesString = new StringBuilder();
 List<Span> sentenceSpans = new LinkedList<>();
 for (int i = 0; sentenceIt.hasNext() && i < 25 ; i++) {
  LetsmtDocument.LetsmtSentence sentence = sentenceIt.next();
  int begin = sentencesString.length();
  if (sentence.getTokens() != null) {
   sentencesString.append(String.join(" ", sentence.getTokens()));
  } else if (sentence.getNonTokenizedText() != null) {
   sentencesString.append(sentence.getNonTokenizedText());
  }
  sentenceSpans.add(new Span(begin, sentencesString.length()));
  sentencesString.append(' ');
 }
 // end of stream is reached, indicate that with null return value
 if (sentenceSpans.size() == 0) {
  return null;
 }
 return new SentenceSample(sentencesString.toString(),
   sentenceSpans.toArray(new Span[sentenceSpans.size()]));
}

static LetsmtDocument parse(InputStream letsmtXmlIn) throws IOException {
 SAXParser saxParser = XmlUtil.createSaxParser();
 try {
  XMLReader xmlReader = saxParser.getXMLReader();
  LetsmtDocumentHandler docHandler = new LetsmtDocumentHandler();
  xmlReader.setContentHandler(docHandler);
  xmlReader.parse(new InputSource(letsmtXmlIn));
  return new LetsmtDocument(docHandler.sentences);
 } catch (SAXException e) {
  throw new IOException("Failed to parse letsmt xml!", e);
 }
}

public static void registerFactory() {
 StreamFactoryRegistry.registerFactory(SentenceSample.class,
   "letsmt", new LetsmtSentenceStreamFactory(
   LetsmtSentenceStreamFactory.Parameters.class));
}

 @Override
 public void endElement(String uri, String localName, String qName) throws SAXException {
  super.endElement(uri, localName, qName);
  // Note:
  // words are optional in sentences, if there are no words just the chars have to be captured
  switch (qName) {
   case "w":
    tokens.add(chars.toString().trim());
    chars.setLength(0);
    break;
   // TODO: The sentence should contain the id, so it can be tracked back to the
   // place it came from
   case "s":
    LetsmtSentence sentence = new LetsmtSentence();
    if (tokens.size() > 0) {
     sentence.tokens = tokens.toArray(new String[tokens.size()]);
     tokens = new ArrayList<>();
    }
    else {
     sentence.nonTokenizedText = chars.toString().trim();
    }
    sentences.add(sentence);
    chars.setLength(0);
  }
 }
}

CmdLineUtil.checkInputFile("Data", params.getData());
 letsmtDoc = LetsmtDocument.parse(params.getData());
} catch (IOException ex) {
 CmdLineUtil.handleCreateObjectStreamError(ex);
ObjectStream<SentenceSample> samples = new LetsmtSentenceStream(letsmtDoc);
if (params.getDetokenizer() != null) {
 try {
  Detokenizer detokenizer = new DictionaryDetokenizer(
    new DetokenizationDictionary(params.getDetokenizer()));
  samples = new DetokenizeSentenceSampleStream(detokenizer, samples);
 } catch (IOException e) {
  throw new TerminateToolException(-1, "Failed to load detokenizer rules!", e);

static LetsmtDocument parse(InputStream letsmtXmlIn) throws IOException {
 SAXParser saxParser = XmlUtil.createSaxParser();
 try {
  XMLReader xmlReader = saxParser.getXMLReader();
  LetsmtDocumentHandler docHandler = new LetsmtDocumentHandler();
  xmlReader.setContentHandler(docHandler);
  xmlReader.parse(new InputSource(letsmtXmlIn));
  return new LetsmtDocument(docHandler.sentences);
 } catch (SAXException e) {
  throw new IOException("Failed to parse letsmt xml!", e);
 }
}

 static LetsmtDocument parse(File file) throws IOException {
  try (InputStream in = new FileInputStream(file)) {
   return parse(in);
  }
 }
}

 @Override
 public void reset() {
  sentenceIt = source.getSentences().iterator();
 }
}

LetsmtSentenceStream(LetsmtDocument source) {
 this.source = source;
 reset();
}

public static void registerFactory() {
 StreamFactoryRegistry.registerFactory(SentenceSample.class,
   "letsmt", new LetsmtSentenceStreamFactory(
   LetsmtSentenceStreamFactory.Parameters.class));
}

static LetsmtDocument parse(InputStream letsmtXmlIn) throws IOException {
 SAXParser saxParser = XmlUtil.createSaxParser();
 try {
  XMLReader xmlReader = saxParser.getXMLReader();
  LetsmtDocumentHandler docHandler = new LetsmtDocumentHandler();
  xmlReader.setContentHandler(docHandler);
  xmlReader.parse(new InputSource(letsmtXmlIn));
  return new LetsmtDocument(docHandler.sentences);
 } catch (SAXException e) {
  throw new IOException("Failed to parse letsmt xml!", e);
 }
}

 static LetsmtDocument parse(File file) throws IOException {
  try (InputStream in = new FileInputStream(file)) {
   return parse(in);
  }
 }
}

 @Override
 public void reset() {
  sentenceIt = source.getSentences().iterator();
 }
}

LetsmtSentenceStream(LetsmtDocument source) {
 this.source = source;
 reset();
}

public static void registerFactory() {
 StreamFactoryRegistry.registerFactory(SentenceSample.class,
   "letsmt", new LetsmtSentenceStreamFactory(
   LetsmtSentenceStreamFactory.Parameters.class));
}

How to use opennlp.tools.formats.letsmt

Best Java code snippets using opennlp.tools.formats.letsmt (Showing top 20 results out of 315)