Tabnine Logo
de.tudarmstadt.ukp.dkpro.core.io.text
Code IndexAdd Tabnine to your IDE (free)

How to use de.tudarmstadt.ukp.dkpro.core.io.text

Best Java code snippets using de.tudarmstadt.ukp.dkpro.core.io.text (Showing top 8 results out of 315)

origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.text-asl

  @Override
  public void collectionProcessComplete()
      throws AnalysisEngineProcessException
  {
    if (getTargetLocation() == null) {
      getLogger().info("Output written to file <stdout>");
    }
    else {
      getLogger().info("Output written to file " + getTargetLocation());
    }

    super.collectionProcessComplete();
  }
}
origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.text-asl

  @Override
  public void process(JCas aJCas)
    throws AnalysisEngineProcessException
  {
    try (OutputStream docOS = getOutputStream(aJCas, filenameSuffix)) {
      IOUtils.write(aJCas.getDocumentText(), docOS, targetEncoding);
    }
    catch (Exception e) {
      throw new AnalysisEngineProcessException(e);
    }
  }
}
origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.text-asl

@Override
public void process(JCas aJCas)
    throws AnalysisEngineProcessException
{
  try {
    OutputStream outputStream = getOutputStream(aJCas, extension);
    /* iterate over sentences */
    for (String[] line : sequenceGenerator.tokenSequences(aJCas)) {
      if (line.length > 0) {
        /* write first token */
        outputStream.write(line[0].getBytes(targetEncoding));
        /* write remaining tokens with token separator */
        for (int i = 1; i < line.length; i++) {
          outputStream.write((TOKEN_SEPARATOR + line[i]).getBytes(targetEncoding));
        }
      }
      outputStream.write(System.lineSeparator().getBytes(targetEncoding));
    }
  }
  catch (FeaturePathException | IOException e) {
    throw new AnalysisEngineProcessException(e);
  }
}
origin: de.tudarmstadt.ukp.dkpro.tc/de.tudarmstadt.ukp.dkpro.tc.core-asl

  @Override
  public void getNext(CAS aCAS)
    throws IOException, CollectionException
  {
    super.getNext(aCAS);
    
    JCas jcas;
    try {
      jcas = aCAS.getJCas();
    }
    catch (CASException e) {
      throw new CollectionException();
    }

    TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
    outcome.setOutcome(getTextClassificationOutcome(jcas));
    outcome.addToIndexes();
  }
}
origin: de.tudarmstadt.ukp.dkpro.core/de.tudarmstadt.ukp.dkpro.core.io.text-asl

  @Override
  public void getNext(CAS aJCas)
    throws IOException, CollectionException
  {
    Resource res = nextFile();
    initCas(aJCas, res);

    try (InputStream is = new BufferedInputStream(
        CompressionUtils.getInputStream(res.getLocation(), res.getInputStream()))) {
      String text;

      if (ENCODING_AUTO.equals(sourceEncoding)) {
        CharsetDetector detector = new CharsetDetector();
        text = IOUtils.toString(detector.getReader(is, null));
      }
      else {
        text = IOUtils.toString(is, sourceEncoding);
      }
      
      aJCas.setDocumentText(text);        
    }
  }
}
origin: dkpro/dkpro-tc

@Override
public void initialize(UimaContext context) throws ResourceInitializationException
{
  super.initialize(context);
  goldLabelMap = new HashMap<String, List<String>>();
  try {
    URL resourceUrl = ResourceUtils.resolveLocation(goldLabelFile, this, context);
    for (String line : FileUtils.readLines(new File(resourceUrl.toURI()), "utf-8")) {
      String[] parts = line.split(" ");
      if (parts.length < 2) {
        throw new IOException("Wrong file format in line: " + line);
      }
      String fileId = parts[0].split("/")[1];
      List<String> labels = new ArrayList<String>();
      for (int i = 1; i < parts.length; i++) {
        labels.add(parts[i]);
      }
      goldLabelMap.put(fileId, labels);
    }
  }
  catch (IOException e) {
    throw new ResourceInitializationException(e);
  }
  catch (URISyntaxException ex) {
    throw new ResourceInitializationException(ex);
  }
}
origin: dkpro/dkpro-tc

@Override
public void getNext(CAS aCAS) throws IOException, CollectionException
{
  super.getNext(aCAS);
  JCas jcas;
  try {
    jcas = aCAS.getJCas();
  }
  catch (CASException e) {
    throw new CollectionException();
  }
  for (String outcomeValue : getTextClassificationOutcomes(jcas)) {
    TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
    outcome.setOutcome(outcomeValue);
    outcome.addToIndexes();
  }
}
origin: de.tudarmstadt.ukp.dkpro.tc/de.tudarmstadt.ukp.dkpro.tc.core-asl

  @Override
  public void getNext(CAS aCAS)
    throws IOException, CollectionException
  {
    super.getNext(aCAS);
    
    JCas jcas;
    try {
      jcas = aCAS.getJCas();
    }
    catch (CASException e) {
      throw new CollectionException();
    }

    for (String outcomeValue : getTextClassificationOutcomes(jcas)) {
      TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
      outcome.setOutcome(outcomeValue);
      outcome.addToIndexes();
    }
  }
}
de.tudarmstadt.ukp.dkpro.core.io.text

Most used classes

  • TextReader
    UIMA collection reader for plain text files.
  • TextWriter
    UIMA CAS consumer writing the CAS document text as plain text file.
  • TokenizedTextWriter
    Write texts into into a large file containing one sentence per line and tokens separated by whitespa
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now