public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int tagLevel = Integer.MAX_VALUE; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); } else { if (tb.getTagLevel() > tagLevel && tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT) && tb.hasLabel(DefaultLabels.LI) && tb.getLinkDensity() == 0 ) { tb.setIsContent(true); changes = true; } else { tagLevel = Integer.MAX_VALUE; } } } return changes; } }
if(ok && sameTagLevelOnly && prevBlock.getTagLevel() != block.getTagLevel()) { ok = false;
for (TextBlock tb : doc.getTextBlocks()) { if(tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); break; if (!tb.isContent()) { if(tb.getNumWords() >= 100 && tb.getTagLevel() == tagLevel) { tb.setIsContent(true); changes = true;
level = tb.getTagLevel(); .hasPrevious();) { TextBlock tb = it.previous(); final int tl = tb.getTagLevel(); if(tl < level) { break; .hasNext();) { TextBlock tb = it.next(); final int tl = tb.getTagLevel(); if(tl < level) { break;
if(ok && sameTagLevelOnly && prevBlock.getTagLevel() != block.getTagLevel()) { ok = false;
if(ok && sameTagLevelOnly && prevBlock.getTagLevel() != block.getTagLevel()) { ok = false;
level = tb.getTagLevel(); .hasPrevious();) { TextBlock tb = it.previous(); final int tl = tb.getTagLevel(); if(tl < level) { break; .hasNext();) { TextBlock tb = it.next(); final int tl = tb.getTagLevel(); if(tl < level) { break;
for (TextBlock tb : doc.getTextBlocks()) { if(tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); break; if (!tb.isContent()) { if(tb.getNumWords() >= 100 && tb.getTagLevel() == tagLevel) { tb.setIsContent(true); changes = true;
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int tagLevel = Integer.MAX_VALUE; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); } else { if (tb.getTagLevel() > tagLevel && tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT) && tb.hasLabel(DefaultLabels.LI) && tb.getLinkDensity() == 0 ) { tb.setIsContent(true); changes = true; } else { tagLevel = Integer.MAX_VALUE; } } } return changes; } }
level = tb.getTagLevel(); .hasPrevious();) { TextBlock tb = it.previous(); final int tl = tb.getTagLevel(); if(tl < level) { break; .hasNext();) { TextBlock tb = it.next(); final int tl = tb.getTagLevel(); if(tl < level) { break;
for (TextBlock tb : doc.getTextBlocks()) { if(tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); break; if (!tb.isContent()) { if(tb.getNumWords() >= 100 && tb.getTagLevel() == tagLevel) { tb.setIsContent(true); changes = true;
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int tagLevel = Integer.MAX_VALUE; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); } else { if (tb.getTagLevel() > tagLevel && tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT) && tb.hasLabel(DefaultLabels.LI) && tb.getLinkDensity() == 0 ) { tb.setIsContent(true); changes = true; } else { tagLevel = Integer.MAX_VALUE; } } } return changes; } }