opennlp.tools.util.Span.getType java code examples

/**
 * Creates a new immutable span based on an existing span, where the existing span did not include the prob
 * @param span the span that has no prob or the prob is incorrect and a new Span must be generated
 * @param prob the probability of the span
 */
public Span(Span span, double prob) {
 this(span.start, span.end, span.getType(), prob);
}

/**
 * Initializes a new Span object with an existing Span which is shifted by an
 * offset.
 *
 * @param span
 * @param offset
 */
public Span(Span span, int offset) {
 this(span.start + offset, span.end + offset, span.getType(), span.getProb());
}

/**
 * Generates a human readable string.
 */
@Override
public String toString() {
 StringBuilder toStringBuffer = new StringBuilder(15);
 toStringBuffer.append("[");
 toStringBuffer.append(getStart());
 toStringBuffer.append("..");
 toStringBuffer.append(getEnd());
 toStringBuffer.append(")");
 if (getType() != null) {
  toStringBuffer.append(" ");
  toStringBuffer.append(getType());
 }
 return toStringBuffer.toString();
}

@Override
public NameSample read() throws IOException {
 NameSample sample = samples.read();
 if (sample != null) {
  sentenceCount++;
  tokenCount += sample.getSentence().length;
  for (Span nameSpan : sample.getNames()) {
   Integer nameCounter = nameCounters.get(nameSpan.getType());
   if (nameCounter == null) {
    nameCounter = 0;
   }
   nameCounters.put(nameSpan.getType(), nameCounter + 1);
  }
 }
 return sample;
}

/**
 * Generates a hash code of the current span.
 */
@Override
public int hashCode() {
 return Objects.hash(getStart(), getEnd(), getType());
}

public String[] encode(Span[] names, int length) {
 String[] outcomes = new String[length];
 for (int i = 0; i < outcomes.length; i++) {
  outcomes[i] = BioCodec.OTHER;
 }
 for (Span name : names) {
  if (name.getType() == null) {
   outcomes[name.getStart()] = "default" + "-" + BioCodec.START;
  }
  else {
   outcomes[name.getStart()] = name.getType() + "-" + BioCodec.START;
  }
  // now iterate from begin + 1 till end
  for (int i = name.getStart() + 1; i < name.getEnd(); i++) {
   if (name.getType() == null) {
    outcomes[i] = "default" + "-" + BioCodec.CONTINUE;
   }
   else {
    outcomes[i] = name.getType() + "-" + BioCodec.CONTINUE;
   }
  }
 }
 return outcomes;
}

public void missclassified(T reference, T prediction) {
 samples++;
 Span[] references = asSpanArray(reference);
 Span[] predictions = asSpanArray(prediction);
 Set<Span> refSet = new HashSet<>(Arrays.asList(references));
 Set<Span> predSet = new HashSet<>(Arrays.asList(predictions));
 for (Span ref : refSet) {
  if (predSet.contains(ref)) {
   addTruePositive(ref.getType());
  } else {
   addFalseNegative(ref.getType());
  }
 }
 for (Span pred : predSet) {
  if (!refSet.contains(pred)) {
   addFalsePositive(pred.getType());
  }
 }
}

 @Override
 public void endElement(String name) {

  if (NAME_ELEMENT_NAMES.contains(name)) {
   Span nameSpan = incompleteNames.pop();
   nameSpan = new Span(nameSpan.getStart(), text.size(), nameSpan.getType());
   names.add(nameSpan);
  }

  if (MucElementNames.CONTENT_ELEMENTS.contains(name)) {
   storedSamples.add(new NameSample(text.toArray(new String[text.size()]),
     names.toArray(new Span[names.size()]), isClearAdaptiveData));

   if (isClearAdaptiveData) {
    isClearAdaptiveData = false;
   }

   text.clear();
   names.clear();
   isInsideContentElement = false;
  }
 }
}

/**
 * Compares the specified span to the current span.
 */
public int compareTo(Span s) {
 if (getStart() < s.getStart()) {
  return -1;
 } else if (getStart() == s.getStart()) {
  if (getEnd() > s.getEnd()) {
   return -1;
  } else if (getEnd() < s.getEnd()) {
   return 1;
  } else {
   // compare the type
   if (getType() == null && s.getType() == null) {
    return 0;
   } else if (getType() != null && s.getType() != null) {
    // use type lexicography order
    return getType().compareTo(s.getType());
   } else if (getType() != null) {
    return -1;
   }
   return 1;
  }
 } else {
  return 1;
 }
}

public void correctlyClassified(T reference, T prediction) {
 samples++;
 // add all true positives!
 Span[] spans = asSpanArray(reference);
 for (Span span : spans) {
  addTruePositive(span.getType());
 }
}

/**
 * Checks if the specified span is equal to the current span.
 */
@Override
public boolean equals(Object o) {
 if (o == this) {
  return true;
 }
 if (o instanceof Span) {
  Span s = (Span) o;
  return getStart() == s.getStart() && getEnd() == s.getEnd()
    && Objects.equals(getType(), s.getType());
 }
 return false;
}

 public NameSample read() throws IOException {

  NameSample sample = samples.read();

  if (sample != null) {

   List<Span> filteredNames = new ArrayList<>();

   for (Span name : sample.getNames()) {
    if (types.contains(name.getType())) {
     filteredNames.add(name);
    }
   }

   return new NameSample(sample.getId(), sample.getSentence(),
     filteredNames.toArray(new Span[filteredNames.size()]), null, sample.isClearAdaptiveDataSet());
  }
  else {
   return null;
  }
 }
}

/**
 * Return a copy of this span with leading and trailing white spaces removed.
 *
 * @param text
 *
 * @return the trimmed span or the same object if already trimmed
 */
public Span trim(CharSequence text) {
 int newStartOffset = getStart();
 for (int i = getStart(); i < getEnd() && StringUtil.isWhitespace(text.charAt(i)); i++) {
  newStartOffset++;
 }
 int newEndOffset = getEnd();
 for (int i = getEnd(); i > getStart() && StringUtil.isWhitespace(text.charAt(i - 1)); i--) {
  newEndOffset--;
 }
 if (newStartOffset == getStart() && newEndOffset == getEnd()) {
  return this;
 } else if (newStartOffset > newEndOffset) {
  return new Span(getStart(), getStart(), getType());
 } else {
  return new Span(newStartOffset, newEndOffset, getType());
 }
}

private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash)
  throws Exception {
 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
 TokenNameFinder nameFinder = new NameFinderME(model);
 try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
  LeipzigTestSample line;
  while ((line = lines.read()) != null) {
   Span[] names = nameFinder.find(line.getText());
   for (Span name : names) {
    digest.update((name.getType() + name.getStart()
      + name.getEnd()).getBytes(StandardCharsets.UTF_8));
   }
  }
 }
 Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
}

/**
 * Checks if it accepts name type with some special characters
 */
@Test
public void testTypeWithSpecialChars() throws Exception {
 NameSample parsedSample = NameSample
   .parse(
     "<START:type-1> U . S . <END> "
       + "President <START:type_2> Barack Obama <END> is considering sending "
       + "additional American forces to <START:type_3-/;.,&%$> Afghanistan <END> .",
     false);
 Assert.assertEquals(3, parsedSample.getNames().length);
 Assert.assertEquals("type-1", parsedSample.getNames()[0].getType());
 Assert.assertEquals("type_2", parsedSample.getNames()[1].getType());
 Assert.assertEquals("type_3-/;.,&%$", parsedSample.getNames()[2].getType());
}

@Test
public void testFindTokenizdPattern() {
 Pattern testPattern = Pattern.compile("[0-9]+ year");
 String[] sentence = new String[]{"a", "80", "year", "b", "c"};
 Pattern[] patterns = new Pattern[]{testPattern};
 Map<String, Pattern[]> regexMap = new HashMap<>();
 String type = "match";
 regexMap.put(type, patterns);
 RegexNameFinder finder =
     new RegexNameFinder(regexMap);
 Span[] result = finder.find(sentence);
 Assert.assertTrue(result.length == 1);
 Assert.assertTrue(result[0].getStart() == 1);
 Assert.assertTrue(result[0].getEnd() == 3);
 Assert.assertTrue(result[0].getType().equals("match"));
}

@Test
public void testSingleFilter() throws IOException {
 final String[] types = new String[] {organization};
 filter = new NameSampleTypeFilter(types, sampleStream(text));
 NameSample ns = filter.read();
 Assert.assertEquals(1, ns.getNames().length);
 Assert.assertEquals(organization, ns.getNames()[0].getType());
}

/**
 * Evaluates the given reference {@link NameSample} object.
 *
 * This is done by finding the names with the
 * {@link TokenNameFinder} in the sentence from the reference
 * {@link NameSample}. The found names are then used to
 * calculate and update the scores.
 *
 * @param reference the reference {@link NameSample}.
 *
 * @return the predicted {@link NameSample}.
 */
@Override
protected NameSample processSample(NameSample reference) {
 if (reference.isClearAdaptiveDataSet()) {
  nameFinder.clearAdaptiveData();
 }
 Span[] predictedNames = nameFinder.find(reference.getSentence());
 Span[] references = reference.getNames();
 // OPENNLP-396 When evaluating with a file in the old format
 // the type of the span is null, but must be set to default to match
 // the output of the name finder.
 for (int i = 0; i < references.length; i++) {
  if (references[i].getType() == null) {
   references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default");
  }
 }
 fmeasure.updateScores(references, predictedNames);
 return new NameSample(reference.getSentence(), predictedNames, reference.isClearAdaptiveDataSet());
}

/**
 * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it.
 * This is related to the issue OPENNLP-9
 */
@Test
public void testOnlyWithEntitiesWithTypes() throws Exception {
 // train the name finder
 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
   new PlainTextByLineStream(new MockInputStreamFactory(
    new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), "UTF-8"));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT");
 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
 params.put(TrainingParameters.CUTOFF_PARAM, 1);
 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
   params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
 // now test if it can detect the sample sentences
 String[] sentence = "NATO United States Barack Obama".split("\\s+");
 Span[] names1 = nameFinder.find(sentence);
 Assert.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO
 Assert.assertEquals(new Span(1, 3, "location"), names1[1]); // United States
 Assert.assertEquals("person", names1[2].getType());
 Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel));
}

/**
 * Train NamefinderME using OnlyWithNamesWithTypes.train.
 * The goal is to check if the model validator accepts it.
 * This is related to the issue OPENNLP-9
 */
@Test
public void testOnlyWithNamesWithTypes() throws Exception {
 // train the name finder
 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
   new PlainTextByLineStream(new MockInputStreamFactory(
    new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), "UTF-8"));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
 params.put(TrainingParameters.CUTOFF_PARAM, 1);
 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
   params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
 // now test if it can detect the sample sentences
 String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " +
   "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+");
 Span[] names1 = nameFinder.find(sentence);
 Assert.assertEquals(new Span(0, 2, "person"), names1[0]);
 Assert.assertEquals(new Span(2, 4, "person"), names1[1]);
 Assert.assertEquals(new Span(4, 6, "person"), names1[2]);
 Assert.assertEquals("person", names1[2].getType());
 Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel));
}

Javadoc

Retrieves the type of the span.

Popular methods of Span

getStart
Return the start of a span.
getEnd
Return the end of a span. Note: that the returned index is one past the actual end of the span in th
<init>
Initializes a new Span object with an existing Span which is shifted by an offset.
spansToStrings
getCoveredText
Retrieves the string covered by the current span of the specified text.
contains
Returns true if the specified span is contained by this span. Identical spans are considered to cont
length
Returns the length of this span.
compareTo
Compares the specified span to the current span.
equals
Checks if the specified span is equal to the current span.
intersects
Returns true if the specified span intersects with this span.
toString
Generates a human readable string.
getProb

Popular in Java

Making http post requests using okhttp
onRequestPermissionsResult (Fragment)
scheduleAtFixedRate (ScheduledExecutorService)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
Vector (java.util)
Vector is an implementation of List, backed by an array and synchronized. All optional operations in
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
JTable (javax.swing)
Top plugins for Android Studio

How to use getTypemethodin opennlp.tools.util.Span

Best Java code snippets using opennlp.tools.util.Span.getType (Showing top 20 results out of 315)

How to use
getType
method
in
opennlp.tools.util.Span