public boolean process(TextDocument doc) throws BoilerpipeProcessingException { List<TextBlock> textBlocks = doc.getTextBlocks(); boolean hasChanges = false; for (Iterator<TextBlock> it = textBlocks.iterator(); it.hasNext();) { TextBlock tb = it.next(); if (!tb.isContent() && (labelToKeep == null || !tb .hasLabel(DefaultLabels.TITLE))) { it.remove(); hasChanges = true; } } return hasChanges; }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { List<TextBlock> textBlocks = doc.getTextBlocks(); boolean hasChanges = false; for (Iterator<TextBlock> it = textBlocks.iterator(); it.hasNext();) { TextBlock tb = it.next(); if (!tb.isContent() && (labelToKeep == null || !tb .hasLabel(DefaultLabels.TITLE))) { it.remove(); hasChanges = true; } } return hasChanges; }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { List<TextBlock> textBlocks = doc.getTextBlocks(); boolean hasChanges = false; for (Iterator<TextBlock> it = textBlocks.iterator(); it.hasNext();) { TextBlock tb = it.next(); if (!tb.isContent() && (labelToKeep == null || !tb .hasLabel(DefaultLabels.TITLE))) { it.remove(); hasChanges = true; } } return hasChanges; }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; List<TextBlock> list = doc.getTextBlocks(); for (ListIterator<TextBlock> it = list.listIterator(list.size()); it.hasPrevious(); ) { TextBlock tb = it.previous(); if(tb.isContent()) { if(tb.hasLabel(DefaultLabels.HEADING)) { tb.setIsContent(false); changes = true; } else { break; } } } return changes; }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; List<TextBlock> list = doc.getTextBlocks(); for (ListIterator<TextBlock> it = list.listIterator(list.size()); it.hasPrevious(); ) { TextBlock tb = it.previous(); if(tb.isContent()) { if(tb.hasLabel(DefaultLabels.HEADING)) { tb.setIsContent(false); changes = true; } else { break; } } } return changes; }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; List<TextBlock> list = doc.getTextBlocks(); for (ListIterator<TextBlock> it = list.listIterator(list.size()); it.hasPrevious(); ) { TextBlock tb = it.previous(); if(tb.isContent()) { if(tb.hasLabel(DefaultLabels.HEADING)) { tb.setIsContent(false); changes = true; } else { break; } } } return changes; }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int tagLevel = Integer.MAX_VALUE; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); } else { if (tb.getTagLevel() > tagLevel && tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT) && tb.hasLabel(DefaultLabels.LI) && tb.getLinkDensity() == 0 ) { tb.setIsContent(true); changes = true; } else { tagLevel = Integer.MAX_VALUE; } } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int tagLevel = Integer.MAX_VALUE; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); } else { if (tb.getTagLevel() > tagLevel && tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT) && tb.hasLabel(DefaultLabels.LI) && tb.getLinkDensity() == 0 ) { tb.setIsContent(true); changes = true; } else { tagLevel = Integer.MAX_VALUE; } } } return changes; } }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int numWords = 0; boolean foundEndOfText = false; for (Iterator<TextBlock> it = doc.getTextBlocks().iterator(); it.hasNext();) { TextBlock block = it.next(); final boolean endOfText = block .hasLabel(DefaultLabels.INDICATES_END_OF_TEXT); if (block.isContent()) { numWords += getNumFullTextWords(block); } if (endOfText && numWords >= minNumWords) { foundEndOfText = true; } if (foundEndOfText) { changes = true; block.setIsContent(false); } } return changes; } }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int numWords = 0; boolean foundEndOfText = false; for (Iterator<TextBlock> it = doc.getTextBlocks().iterator(); it.hasNext();) { TextBlock block = it.next(); final boolean endOfText = block .hasLabel(DefaultLabels.INDICATES_END_OF_TEXT); if (block.isContent()) { numWords += getNumFullTextWords(block); } if (endOfText && numWords >= minNumWords) { foundEndOfText = true; } if (foundEndOfText) { changes = true; block.setIsContent(false); } } return changes; } }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int numWords = 0; boolean foundEndOfText = false; for (Iterator<TextBlock> it = doc.getTextBlocks().iterator(); it.hasNext();) { TextBlock block = it.next(); final boolean endOfText = block .hasLabel(DefaultLabels.INDICATES_END_OF_TEXT); if (block.isContent()) { numWords += getNumFullTextWords(block); } if (endOfText && numWords >= minNumWords) { foundEndOfText = true; } if (foundEndOfText) { changes = true; block.setIsContent(false); } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; BLOCK_LOOP: for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent()) { for (String label : labels) { if (tb.hasLabel(label)) { tb.setIsContent(false); changes = true; continue BLOCK_LOOP; } } } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; BLOCK_LOOP: for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent()) { for (String label : labels) { if (tb.hasLabel(label)) { tb.setIsContent(false); changes = true; continue BLOCK_LOOP; } } } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; BLOCK_LOOP: for (TextBlock tb : doc.getTextBlocks()) { if (!tb.isContent()) { for (String label : labels) { if (tb.hasLabel(label)) { tb.setIsContent(true); changes = true; continue BLOCK_LOOP; } } } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; BLOCK_LOOP: for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent()) { for (String label : labels) { if (tb.hasLabel(label)) { tb.setIsContent(false); changes = true; continue BLOCK_LOOP; } } } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; BLOCK_LOOP: for (TextBlock tb : doc.getTextBlocks()) { if (!tb.isContent()) { for (String label : labels) { if (tb.hasLabel(label)) { tb.setIsContent(true); changes = true; continue BLOCK_LOOP; } } } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; BLOCK_LOOP: for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent()) { for (String label : labels) { if (tb.hasLabel(label)) { tb.setIsContent(false); changes = true; continue BLOCK_LOOP; } } } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; BLOCK_LOOP: for (TextBlock tb : doc.getTextBlocks()) { if (!tb.isContent()) { for (String label : labels) { if (tb.hasLabel(label)) { tb.setIsContent(true); changes = true; continue BLOCK_LOOP; } } } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; BLOCK_LOOP: for (TextBlock tb : doc.getTextBlocks()) { if (!tb.isContent()) { for (String label : labels) { if (tb.hasLabel(label)) { tb.setIsContent(true); changes = true; continue BLOCK_LOOP; } } } } return changes; } }
if(tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); break;