opennlp.tools.util.Span java code examples

Refine search

/**
 * Returns true if the specified span is contained by this span. Identical
 * spans are considered to contain each other.
 *
 * @param s The span to compare with this span.
 *
 * @return true is the specified span is contained by this span; false otherwise.
 */
public boolean contains(Span s) {
 return start <= s.getStart() && s.getEnd() <= end;
}

Parse startToken = tokens[nameTokenSpan.getStart()];
Parse endToken = tokens[nameTokenSpan.getEnd() - 1];
Parse commonParent = startToken.getCommonParent(endToken);
 Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd());
 if (nameSpan.equals(commonParent.getSpan())) {
  commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex()));
 } else {
  boolean crossingKids = false;
  for (Parse kid : kids) {
   if (nameSpan.crosses(kid.getSpan())) {
    crossingKids = true;
   if (commonParent.getType().equals("NP")) {
    Parse[] grandKids = kids[0].getChildren();
    if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) {
     commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(),
       tag, 1.0, commonParent.getHeadIndex()));

/**
 * Generates a hash code of the current span.
 */
@Override
public int hashCode() {
 return Objects.hash(getStart(), getEnd(), getType());
}

private void overrideType(Span[] names) {
 for (int i = 0; i < names.length; i++) {
  Span n = names[i];
  names[i] = new Span(n.getStart(), n.getEnd(), this.defaultType,
      n.getProb());
 }
}

/**
 * Initializes a new Span object with an existing Span which is shifted by an
 * offset.
 *
 * @param span
 * @param offset
 */
public Span(Span span, int offset) {
 this(span.start + offset, span.end + offset, span.getType(), span.getProb());
}

/**
 * Returns true if the specified span is the begin of this span and the
 * specified span is contained in this span.
 *
 * @param s The span to compare with this span.
 *
 * @return true if the specified span starts with this span and is contained
 *     in this span; false otherwise
 */
public boolean startsWith(Span s) {
 return getStart() == s.getStart() && contains(s);
}

params.put(TrainingParameters.CUTOFF_PARAM, 1);
TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
  params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
NameFinderME nameFinder = new NameFinderME(nameFinderModel);
  "programs", "." };
Span[] names1 = nameFinder.find(sentence);
Assert.assertEquals(new Span(0, 4, "location"), names1[0]);
Assert.assertEquals(new Span(5, 7, "person"), names1[1]);
Assert.assertEquals(new Span(10, 12, "location"), names1[2]);
Assert.assertEquals(new Span(28, 30, "location"), names1[3]);
Assert.assertEquals("location", names1[0].getType());
Assert.assertEquals("person", names1[1].getType());
Assert.assertEquals("location", names1[2].getType());
Assert.assertEquals("location", names1[3].getType());
Assert.assertEquals(new Span(0, 2, "person"), names2[0]);
Assert.assertEquals(new Span(7, 15, "organization"), names2[1]);
Assert.assertEquals("person", names2[0].getType());
Assert.assertEquals("organization", names2[1].getType());

public Parse adjoinRoot(Parse node, HeadRules rules, int parseIndex) {
 Parse lastChild = parts.get(parseIndex);
 Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(),
   node.getSpan().getEnd()),lastChild.getType(),1,
   rules.getHead(new Parse[]{lastChild,node},lastChild.getType()));
 adjNode.parts.add(lastChild);
 if (node.prevPunctSet != null) {
  adjNode.parts.addAll(node.prevPunctSet);
 }
 adjNode.parts.add(node);
 parts.set(parseIndex,adjNode);
 return adjNode;
}

 /**
  * Converts the parse from the tagger back.
  *
  * @param parseFromTagger
  * @return the final parse
  */
 Parse transformParseFromTagger(Parse parseFromTagger) {
  int start = parseFromTagger.getSpan().getStart();
  int end = parseFromTagger.getSpan().getEnd();
  Parse transformedParse = new Parse(mSentence, new Span(
    mIndexMap.get(start), mIndexMap.get(end)), parseFromTagger.getType(),
    parseFromTagger.getProb(), parseFromTagger.getHeadIndex());
  Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren();
  for (Parse child : parseFromTaggerChildrens) {
   transformedParse.insert(transformParseFromTagger(child));
  }
  return transformedParse;
 }
}

public static void fixPossesives(Parse parse) {
 Parse[] tags = parse.getTagNodes();
 for (int ti = 0; ti < tags.length; ti++) {
  if (tags[ti].getType().equals("POS")) {
   if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) {
    int start = tags[ti + 1].getSpan().getStart();
    int end = tags[ti + 1].getSpan().getEnd();
    for (int npi = ti + 2; npi < tags.length; npi++) {
     if (tags[npi].getParent() == tags[npi - 1].getParent()) {
      end = tags[npi].getSpan().getEnd();
     }
     else {
      break;
     }
    }
    Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]);
    parse.insert(npPos);
   }
  }
 }
}

  for (int i = span.getStart(); i < span.getEnd(); i++) {
   coveredIndexes.put(i, span);
Span conflictingName = coveredIndexes.get(sentence.getStart());
  conflictingName.getStart() < sentence.getStart()) {
 Span lastSentence = sentences.remove(sentences.size() - 1);
 sentences.add(new Span(lastSentence.getStart(), sentence.getEnd()));
String sentenceText = sentence.getCoveredText(
  sample.getText()).toString();
 tokenIndexMap.put(-(sentence.getStart() + tokens[i].getStart()), i);
 tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i + 1);
   if (sentence.contains(entitySpan)) {
    entityIdSet.remove(ann.getId());
    entitySpan = entitySpan.trim(sample.getText());
    Integer nameBeginIndex = tokenIndexMap.get(-entitySpan.getStart());
    Integer nameEndIndex = tokenIndexMap.get(entitySpan.getEnd());
     mappedFragments.add(new Span(nameBeginIndex, nameEndIndex, entity.getType()));
    } else {
     System.err.println("Dropped entity " + entity.getId() + " ("
       + entitySpan.getCoveredText(sample.getText()) + ") " + " in document "
       + sample.getId() + ", it is not matching tokenization!");

/**
 * Obtain {@code Span}s for every parse in the sentence.
 * @param parse the parse from which to obtain the spans
 * @return an array containing every span for the parse
 */
private static Span[] getConstituencySpans(final Parse parse) {
 Stack<Parse> stack = new Stack<>();
 if (parse.getChildCount() > 0) {
  for (Parse child : parse.getChildren()) {
   stack.push(child);
  }
 }
 List<Span> consts = new ArrayList<>();
 while (!stack.isEmpty()) {
  Parse constSpan = stack.pop();
  if (!constSpan.isPosTag()) {
   Span span = constSpan.getSpan();
   consts.add(new Span(span.getStart(), span.getEnd(), constSpan.getType()));
   for (Parse child : constSpan.getChildren()) {
    stack.push(child);
   }
  }
 }
 return consts.toArray(new Span[consts.size()]);
}

public void add(Parse daughter, HeadRules rules) {
 if (daughter.prevPunctSet != null) {
  parts.addAll(daughter.prevPunctSet);
 }
 parts.add(daughter);
 this.span = new Span(span.getStart(),daughter.getSpan().getEnd());
 this.head = rules.getHead(getChildren(),type);
 this.headIndex = head.headIndex;
}

 String tokenString = tokens[i].getCoveredText(sentence).toString();
 String escapedToken = escape(tokenString);
 tokenList[i] = escapedToken;
 int start = tokens[i].getStart();
 mIndexMap.put(escapedStart, start);
 int end = tokens[i].getEnd();
 mIndexMap.put(escapedEnd, end);
mParseForTagger = new Parse(tokenizedSentence,
  new Span(0, tokenizedSentence.length()), "INC", 1, null);
 mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start,
   start + token.length()),
   opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));

 @Override
 BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException {
  
  Span noteSpan = new Span( tokens[START_VALUE_OFFSET].getStart(), 
    tokens[tokens.length - 1].getEnd() );      
  return new AnnotatorNoteAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(), 
    tokens[ATTACH_TO_OFFSET].getCoveredText(line).toString(), 
    noteSpan.getCoveredText(line).toString());
 }
}

String sentenceText = sentenceSpan.getCoveredText(text).toString();
String[] tokens = Span.spansToStrings(tokenSpans, sentenceText);
  int beginOffset = tokenSpans[name.getStart()].getStart() + sentenceSpan.getStart();
  int endOffset = tokenSpans[name.getEnd() - 1].getEnd() + sentenceSpan.getStart();
  ann.texts = textSegments.toArray(new String[textSegments.size()]);
  ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]);
  ann.type = name.getType();

public void remove(int index) {
 parts.remove(index);
 if (! parts.isEmpty()) {
  if (index == 0 || index == parts.size()) { //size is orig last element
   span = new Span((parts.get(0)).span.getStart(),(parts.get(parts.size() - 1)).span.getEnd());
  }
 }
}

private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash)
  throws Exception {
 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
 TokenNameFinder nameFinder = new NameFinderME(model);
 try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
  LeipzigTestSample line;
  while ((line = lines.read()) != null) {
   Span[] names = nameFinder.find(line.getText());
   for (Span name : names) {
    digest.update((name.getType() + name.getStart()
      + name.getEnd()).getBytes(StandardCharsets.UTF_8));
   }
  }
 }
 Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
}

int start = tokens[0].getStart();
int end = tokens[tokens.length - 1].getEnd();
for (Span candToken : candTokens) {
 Span cSpan = candToken;
 String ctok = sent.substring(cSpan.getStart(), cSpan.getEnd());
 cSpan = new Span(cSpan.getStart() + start, cSpan.getEnd() + start);
   if (cSpan.contains(tokens[ti])) {
    if (!foundTrainingTokens) {
     firstTrainingToken = ti;
   else if (cSpan.getEnd() < tokens[ti].getEnd()) {
    break;
   else if (tokens[ti].getEnd() < cSpan.getStart()) {
      " token=" + text.substring(tokens[ti].getStart(), tokens[ti].getEnd()));
    int cStart = cSpan.getStart();
    for (int i = tSpan.getStart() + 1; i < tSpan.getEnd(); i++) {
     String[] context = cg.getContext(ctok, i - cStart);
     events.add(new Event(TokenizerME.NO_SPLIT, context));
    if (tSpan.getEnd() != cSpan.getEnd()) {
     String[] context = cg.getContext(ctok, tSpan.getEnd() - cStart);
     events.add(new Event(TokenizerME.SPLIT, context));

/**
 * Returns true if the specified span intersects with this span.
 *
 * @param s The span to compare with this span.
 *
 * @return true is the spans overlap; false otherwise.
 */
public boolean intersects(Span s) {
 int sstart = s.getStart();
 //either s's start is in this or this' start is in s
 return this.contains(s) || s.contains(this)
     || getStart() <= sstart && sstart < getEnd()
     || sstart <= getStart() && getStart() < s.getEnd();
}

Javadoc

Class for storing start and end integer offsets.

Most used methods

getStart
Return the start of a span.
getEnd
Return the end of a span. Note: that the returned index is one past the actual end of the span in th
<init>
Initializes a new Span object with an existing Span which is shifted by an offset.
getType
Retrieves the type of the span.
spansToStrings
getCoveredText
Retrieves the string covered by the current span of the specified text.
contains
Returns true if the specified span is contained by this span. Identical spans are considered to cont
length
Returns the length of this span.
compareTo
Compares the specified span to the current span.
equals
Checks if the specified span is equal to the current span.
intersects
Returns true if the specified span intersects with this span.
toString
Generates a human readable string.

Popular in Java

Running tasks concurrently on multiple threads
requestLocationUpdates (LocationManager)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
CodeWhisperer alternatives

How to useSpan in opennlp.tools.util

Best Java code snippets using opennlp.tools.util.Span (Showing top 20 results out of 450)

Refine search

How to use
Span
in
opennlp.tools.util