/** * returns true if this Span is in the range of the Span s. */ public boolean hits( Span s ){ return start < s.getEnd() && s.getStart() < end; }
private static void handleSpans( List<Span> spans, String text, StringBuilder sb ){ for( Span s: spans ) sb.append( text.substring( s.getStart(), s.getEnd() )+" "); }
public boolean equals(Span s){ return ((this.start == s.getStart() )&&( this.end == s.getEnd() )); }
/** * Replaces the content between s.getStart() (included) and s.getEnd() (excluded) with * a String */ public SpanManager replace(Span s, String str){ return replace( s.getStart(), s.getEnd(), str); }
/** * Deletes the content between s.getStart() (included) and s.getEnd() (excluded). */ public SpanManager delete(Span s){ return delete(s.getStart(), s.getEnd() ); }
public int indexOf(String str, Span s){ return indexOf(str, s.getStart(), s.getEnd() ); }
private void parseTags(SpanManager sm, List<Span> spans) { sm.manageList(spans); Span s = new Span(0, 0); while ((s = getTag(sm, s.getEnd())) != null) { spans.add(s); } if (spans.size() == 0) { sm.removeManagedList(spans); } }
private Span removeHr(SpanManager sm, Span s) { int start = s.getStart(); final int end = s.getEnd(); while (sm.charAt(start) == '-' && start < end) { start++; } return s.setStart(start).trim(sm); }
private String getTagText(SpanManager sm, Span tag) { return sm.substring(new Span(tag.getStart() + 1, tag.getEnd() - 1) .trim(sm)); }
public List<Template> getTemplates(Span s){ List<Template> result = new ArrayList<Template>(); Span a = new Span( -1, -1 ); for( Content c: ccl ){ int offset = a.getEnd()+1; a = new Span( offset, offset+ c.length() ); if( a.hits(s) ) result.addAll( c.getTemplates( s.clone().adjust( -offset ) ) ); } return result; } }
public List<Link> getLinks( Link.type linkType, Span s){ List<Link> result = new ArrayList<Link>(); Span a = new Span( -1, -1 ); for( Content c: ccl ){ int offset = a.getEnd()+1; a = new Span( offset, offset+ c.length() ); if( a.hits(s) ) result.addAll( c.getLinks( linkType, s.clone().adjust( -offset ) ) ); } return result; }
private void setCategories(Article article, ParsedPage page) { final ArrayList<Link> categories = new ArrayList<Link>(10); for (final de.tudarmstadt.ukp.wikipedia.parser.Link c : page.getCategories()) { categories.add(new Link(c.getTarget(), c.getText(), c.getPos().getStart(), c.getPos().getEnd(), Link.Type.CATEGORY)); } article.setCategories(categories); }
public ParsedPageLink(Link link) { if (link.getTarget() != null) { page = normalizePageName(link.getTarget().trim()); } else { page = StringTable.EMPTY_STRING; } if (link.getText() != null) { form = removeSuffix(removeQuotes(link.getText().trim())); } String context = link.getHomeElement().getText(); if (context != null) { Span span = link.getPos(); leftContext = context.substring(0, span.getStart()); rightContext = context.substring(span.getEnd(), context.length()); } }
int offset = a.getEnd()+1; a = new Span( offset, offset+ c.length() );
/** * Returns the number of Equality Chars which are used to specify the level * of the Section. */ private int getSectionLevel(SpanManager sm, Span sectionNameSpan) { int begin = sectionNameSpan.getStart(); int end = sectionNameSpan.getEnd(); int level = 0; try { while ((sm.charAt(begin + level) == '=') && (sm.charAt(end - 1 - level) == '=')) { level++; } } catch (StringIndexOutOfBoundsException e) { // there is no need to do anything! logger.debug("EXCEPTION IS OK: {}", e.getLocalizedMessage()); } if (begin + level == end) { level = (level - 1) / 2; } return level; }
public List<Span> getFormatSpans(FormatType t, Span s){ List<Span> result = new ArrayList<Span>(); Span a = new Span( -1, -1 ); for( Content c: ccl ){ int offset = a.getEnd()+1; a = new Span( offset, offset+ c.length() ); if( a.hits(s) ){ for( Span b: c.getFormatSpans( t, s.clone().adjust( -offset ) ) ) result.add( b.clone().adjust( offset ) ); } } return result; }
private Link addLink(final List<Link> links, final List<Link> externalLinks, final de.tudarmstadt.ukp.wikipedia.parser.Link link, final Link.Type jsonWikipediaType){ if (link.getTarget().isEmpty()){ return null; } final Link jsonWikipediaLink; if (link.getType() == de.tudarmstadt.ukp.wikipedia.parser.Link.type.INTERNAL){ jsonWikipediaLink = new Link(link.getTarget(), link.getText(), link.getPos().getStart(), link.getPos().getEnd(), jsonWikipediaType); links.add(jsonWikipediaLink); } else if (link.getType() == de.tudarmstadt.ukp.wikipedia.parser.Link.type.EXTERNAL){ jsonWikipediaLink = new Link(link.getTarget(), link.getText(), link.getPos().getStart(), link.getPos().getEnd(), jsonWikipediaType); externalLinks.add(new Link(link.getTarget(), link.getText(), link.getPos().getStart(), link.getPos().getEnd(), jsonWikipediaType)); } else if (link.getType() == de.tudarmstadt.ukp.wikipedia.parser.Link.type.IMAGE){ jsonWikipediaLink = new Link(link.getTarget(), link.getText(), link.getPos().getStart(), link.getPos().getEnd(), Link.Type.IMAGE); links.add(jsonWikipediaLink); } else { jsonWikipediaLink = null; } return jsonWikipediaLink; }