Refine search
/** * Returns true if the specified span is contained by this span. Identical * spans are considered to contain each other. * * @param s The span to compare with this span. * * @return true is the specified span is contained by this span; false otherwise. */ public boolean contains(Span s) { return start <= s.getStart() && s.getEnd() <= end; }
Parse startToken = tokens[nameTokenSpan.getStart()]; Parse endToken = tokens[nameTokenSpan.getEnd() - 1]; Parse commonParent = startToken.getCommonParent(endToken); Span nameSpan = new Span(startToken.getSpan().getStart(), endToken.getSpan().getEnd()); if (nameSpan.equals(commonParent.getSpan())) { commonParent.insert(new Parse(commonParent.getText(), nameSpan, tag, 1.0, endToken.getHeadIndex())); } else { boolean crossingKids = false; for (Parse kid : kids) { if (nameSpan.crosses(kid.getSpan())) { crossingKids = true; if (commonParent.getType().equals("NP")) { Parse[] grandKids = kids[0].getChildren(); if (grandKids.length > 1 && nameSpan.contains(grandKids[grandKids.length - 1].getSpan())) { commonParent.insert(new Parse(commonParent.getText(), commonParent.getSpan(), tag, 1.0, commonParent.getHeadIndex()));
/** * Returns true if the specified span is the begin of this span and the * specified span is contained in this span. * * @param s The span to compare with this span. * * @return true if the specified span starts with this span and is contained * in this span; false otherwise */ public boolean startsWith(Span s) { return getStart() == s.getStart() && contains(s); }
params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); "programs", "." }; Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 4, "location"), names1[0]); Assert.assertEquals(new Span(5, 7, "person"), names1[1]); Assert.assertEquals(new Span(10, 12, "location"), names1[2]); Assert.assertEquals(new Span(28, 30, "location"), names1[3]); Assert.assertEquals("location", names1[0].getType()); Assert.assertEquals("person", names1[1].getType()); Assert.assertEquals("location", names1[2].getType()); Assert.assertEquals("location", names1[3].getType()); Assert.assertEquals(new Span(0, 2, "person"), names2[0]); Assert.assertEquals(new Span(7, 15, "organization"), names2[1]); Assert.assertEquals("person", names2[0].getType()); Assert.assertEquals("organization", names2[1].getType());
public Parse adjoinRoot(Parse node, HeadRules rules, int parseIndex) { Parse lastChild = parts.get(parseIndex); Parse adjNode = new Parse(this.text,new Span(lastChild.getSpan().getStart(), node.getSpan().getEnd()),lastChild.getType(),1, rules.getHead(new Parse[]{lastChild,node},lastChild.getType())); adjNode.parts.add(lastChild); if (node.prevPunctSet != null) { adjNode.parts.addAll(node.prevPunctSet); } adjNode.parts.add(node); parts.set(parseIndex,adjNode); return adjNode; }
/** * Converts the parse from the tagger back. * * @param parseFromTagger * @return the final parse */ Parse transformParseFromTagger(Parse parseFromTagger) { int start = parseFromTagger.getSpan().getStart(); int end = parseFromTagger.getSpan().getEnd(); Parse transformedParse = new Parse(mSentence, new Span( mIndexMap.get(start), mIndexMap.get(end)), parseFromTagger.getType(), parseFromTagger.getProb(), parseFromTagger.getHeadIndex()); Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren(); for (Parse child : parseFromTaggerChildrens) { transformedParse.insert(transformParseFromTagger(child)); } return transformedParse; } }
public static void fixPossesives(Parse parse) { Parse[] tags = parse.getTagNodes(); for (int ti = 0; ti < tags.length; ti++) { if (tags[ti].getType().equals("POS")) { if (ti + 1 < tags.length && tags[ti + 1].getParent() == tags[ti].getParent().getParent()) { int start = tags[ti + 1].getSpan().getStart(); int end = tags[ti + 1].getSpan().getEnd(); for (int npi = ti + 2; npi < tags.length; npi++) { if (tags[npi].getParent() == tags[npi - 1].getParent()) { end = tags[npi].getSpan().getEnd(); } else { break; } } Parse npPos = new Parse(parse.getText(), new Span(start,end), "NP", 1 , tags[ti + 1]); parse.insert(npPos); } } } }
for (int i = span.getStart(); i < span.getEnd(); i++) { coveredIndexes.put(i, span); Span conflictingName = coveredIndexes.get(sentence.getStart()); conflictingName.getStart() < sentence.getStart()) { Span lastSentence = sentences.remove(sentences.size() - 1); sentences.add(new Span(lastSentence.getStart(), sentence.getEnd())); String sentenceText = sentence.getCoveredText( sample.getText()).toString(); tokenIndexMap.put(-(sentence.getStart() + tokens[i].getStart()), i); tokenIndexMap.put(sentence.getStart() + tokens[i].getEnd(), i + 1); if (sentence.contains(entitySpan)) { entityIdSet.remove(ann.getId()); entitySpan = entitySpan.trim(sample.getText()); Integer nameBeginIndex = tokenIndexMap.get(-entitySpan.getStart()); Integer nameEndIndex = tokenIndexMap.get(entitySpan.getEnd()); mappedFragments.add(new Span(nameBeginIndex, nameEndIndex, entity.getType())); } else { System.err.println("Dropped entity " + entity.getId() + " (" + entitySpan.getCoveredText(sample.getText()) + ") " + " in document " + sample.getId() + ", it is not matching tokenization!");
/** * Obtain {@code Span}s for every parse in the sentence. * @param parse the parse from which to obtain the spans * @return an array containing every span for the parse */ private static Span[] getConstituencySpans(final Parse parse) { Stack<Parse> stack = new Stack<>(); if (parse.getChildCount() > 0) { for (Parse child : parse.getChildren()) { stack.push(child); } } List<Span> consts = new ArrayList<>(); while (!stack.isEmpty()) { Parse constSpan = stack.pop(); if (!constSpan.isPosTag()) { Span span = constSpan.getSpan(); consts.add(new Span(span.getStart(), span.getEnd(), constSpan.getType())); for (Parse child : constSpan.getChildren()) { stack.push(child); } } } return consts.toArray(new Span[consts.size()]); }
public void add(Parse daughter, HeadRules rules) { if (daughter.prevPunctSet != null) { parts.addAll(daughter.prevPunctSet); } parts.add(daughter); this.span = new Span(span.getStart(),daughter.getSpan().getEnd()); this.head = rules.getHead(getChildren(),type); this.headIndex = head.headIndex; }
String tokenString = tokens[i].getCoveredText(sentence).toString(); String escapedToken = escape(tokenString); tokenList[i] = escapedToken; int start = tokens[i].getStart(); mIndexMap.put(escapedStart, start); int end = tokens[i].getEnd(); mIndexMap.put(escapedEnd, end); mParseForTagger = new Parse(tokenizedSentence, new Span(0, tokenizedSentence.length()), "INC", 1, null); mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start, start + token.length()), opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));
@Override BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException { Span noteSpan = new Span( tokens[START_VALUE_OFFSET].getStart(), tokens[tokens.length - 1].getEnd() ); return new AnnotatorNoteAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(), tokens[ATTACH_TO_OFFSET].getCoveredText(line).toString(), noteSpan.getCoveredText(line).toString()); } }
String sentenceText = sentenceSpan.getCoveredText(text).toString(); String[] tokens = Span.spansToStrings(tokenSpans, sentenceText); int beginOffset = tokenSpans[name.getStart()].getStart() + sentenceSpan.getStart(); int endOffset = tokenSpans[name.getEnd() - 1].getEnd() + sentenceSpan.getStart(); ann.texts = textSegments.toArray(new String[textSegments.size()]); ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]); ann.type = name.getType();
private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash) throws Exception { MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); TokenNameFinder nameFinder = new NameFinderME(model); try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) { LeipzigTestSample line; while ((line = lines.read()) != null) { Span[] names = nameFinder.find(line.getText()); for (Span name : names) { digest.update((name.getType() + name.getStart() + name.getEnd()).getBytes(StandardCharsets.UTF_8)); } } } Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest())); }
int start = tokens[0].getStart(); int end = tokens[tokens.length - 1].getEnd(); for (Span candToken : candTokens) { Span cSpan = candToken; String ctok = sent.substring(cSpan.getStart(), cSpan.getEnd()); cSpan = new Span(cSpan.getStart() + start, cSpan.getEnd() + start); if (cSpan.contains(tokens[ti])) { if (!foundTrainingTokens) { firstTrainingToken = ti; else if (cSpan.getEnd() < tokens[ti].getEnd()) { break; else if (tokens[ti].getEnd() < cSpan.getStart()) { " token=" + text.substring(tokens[ti].getStart(), tokens[ti].getEnd())); int cStart = cSpan.getStart(); for (int i = tSpan.getStart() + 1; i < tSpan.getEnd(); i++) { String[] context = cg.getContext(ctok, i - cStart); events.add(new Event(TokenizerME.NO_SPLIT, context)); if (tSpan.getEnd() != cSpan.getEnd()) { String[] context = cg.getContext(ctok, tSpan.getEnd() - cStart); events.add(new Event(TokenizerME.SPLIT, context));
/** * Returns true if the specified span intersects with this span. * * @param s The span to compare with this span. * * @return true is the spans overlap; false otherwise. */ public boolean intersects(Span s) { int sstart = s.getStart(); //either s's start is in this or this' start is in s return this.contains(s) || s.contains(this) || getStart() <= sstart && sstart < getEnd() || sstart <= getStart() && getStart() < s.getEnd(); }