public boolean meetsCondition(TextBlock tb) { return tb.getLinkDensity() == 0 && tb.getNumWords() > 6; } });
public boolean meetsCondition(TextBlock tb) { return tb.getLinkDensity() == 0 && tb.getNumWords() > 6; } });
public boolean meetsCondition(TextBlock tb) { return tb.getLinkDensity() == 0 && tb.getNumWords() > 6; } });
protected boolean classify(final TextBlock prev, final TextBlock curr, final TextBlock next) { final boolean isContent = (curr.getLinkDensity() > 0 && next .getNumWords() > 11) || (curr.getNumWords() > 19 || (next.getNumWords() > 6 && next.getLinkDensity() == 0 && prev.getLinkDensity() == 0 && (curr .getNumWords() > 6 || prev.getNumWords() > 7 || next .getNumWords() > 19))); return curr.setIsContent(isContent); } };
protected boolean classify(final TextBlock prev, final TextBlock curr, final TextBlock next) { final boolean isContent = (curr.getLinkDensity() > 0 && next .getNumWords() > 11) || (curr.getNumWords() > 19 || (next.getNumWords() > 6 && next.getLinkDensity() == 0 && prev.getLinkDensity() == 0 && (curr .getNumWords() > 6 || prev.getNumWords() > 7 || next .getNumWords() > 19))); return curr.setIsContent(isContent); } };
protected boolean classify(final TextBlock prev, final TextBlock curr, final TextBlock next) { final boolean isContent = (curr.getLinkDensity() > 0 && next .getNumWords() > 11) || (curr.getNumWords() > 19 || (next.getNumWords() > 6 && next.getLinkDensity() == 0 && prev.getLinkDensity() == 0 && (curr .getNumWords() > 6 || prev.getNumWords() > 7 || next .getNumWords() > 19))); return curr.setIsContent(isContent); } };
protected boolean classify(final TextBlock prev, final TextBlock curr, final TextBlock next) { final boolean isContent = (curr.getLinkDensity() > 0 && next .getNumWords() > 11) || (curr.getNumWords() > 19 || (next.getNumWords() > 6 && next.getLinkDensity() == 0 && prev.getLinkDensity() == 0 && (curr .getNumWords() > 6 || prev.getNumWords() > 7 || next .getNumWords() > 19))); return curr.setIsContent(isContent); } };
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { List<TextBlock> textBlocks = doc.getTextBlocks(); if (textBlocks.size() < 2) { return false; } TextBlock prevBlock = textBlocks.get(0); boolean changes = false; do { changes = false; for (ListIterator<TextBlock> it = textBlocks.listIterator(1); it .hasNext();) { TextBlock block = it.next(); if (prevBlock.isContent() && block.getLinkDensity() < 0.56 && !block.hasLabel(DefaultLabels.STRICTLY_NOT_CONTENT)) { prevBlock.mergeNext(block); it.remove(); changes = true; } else { prevBlock = block; } } } while (changes); return true; }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { List<TextBlock> textBlocks = doc.getTextBlocks(); if (textBlocks.size() < 2) { return false; } TextBlock prevBlock = textBlocks.get(0); boolean changes = false; do { changes = false; for (ListIterator<TextBlock> it = textBlocks.listIterator(1); it .hasNext();) { TextBlock block = it.next(); if (prevBlock.isContent() && block.getLinkDensity() < 0.56 && !block.hasLabel(DefaultLabels.STRICTLY_NOT_CONTENT)) { prevBlock.mergeNext(block); it.remove(); changes = true; } else { prevBlock = block; } } } while (changes); return true; }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { List<TextBlock> textBlocks = doc.getTextBlocks(); if (textBlocks.size() < 2) { return false; } TextBlock prevBlock = textBlocks.get(0); boolean changes = false; do { changes = false; for (ListIterator<TextBlock> it = textBlocks.listIterator(1); it .hasNext();) { TextBlock block = it.next(); if (prevBlock.isContent() && block.getLinkDensity() < 0.56 && !block.hasLabel(DefaultLabels.STRICTLY_NOT_CONTENT)) { prevBlock.mergeNext(block); it.remove(); changes = true; } else { prevBlock = block; } } } while (changes); return true; }
final boolean isContent; if (curr.getLinkDensity() <= 0.333333) { if (prev.getLinkDensity() <= 0.555556) { if (curr.getNumWords() <= 16) { if (next.getNumWords() <= 15) {
final boolean isContent; if (curr.getLinkDensity() <= 0.333333) { if (prev.getLinkDensity() <= 0.555556) { if (curr.getTextDensity() <= 9) { if (next.getTextDensity() <= 10) {
final boolean isContent; if (curr.getLinkDensity() <= 0.333333) { if (prev.getLinkDensity() <= 0.555556) { if (curr.getNumWords() <= 16) { if (next.getNumWords() <= 15) {
final boolean isContent; if (curr.getLinkDensity() <= 0.333333) { if (prev.getLinkDensity() <= 0.555556) { if (curr.getNumWords() <= 16) { if (next.getNumWords() <= 15) {
final boolean isContent; if (curr.getLinkDensity() <= 0.333333) { if (prev.getLinkDensity() <= 0.555556) { if (curr.getTextDensity() <= 9) { if (next.getTextDensity() <= 10) {
final boolean isContent; if (curr.getLinkDensity() <= 0.333333) { if (prev.getLinkDensity() <= 0.555556) { if (curr.getNumWords() <= 16) { if (next.getNumWords() <= 15) {
final boolean isContent; if (curr.getLinkDensity() <= 0.333333) { if (prev.getLinkDensity() <= 0.555556) { if (curr.getTextDensity() <= 9) { if (next.getTextDensity() <= 10) {
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int tagLevel = Integer.MAX_VALUE; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); } else { if (tb.getTagLevel() > tagLevel && tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT) && tb.hasLabel(DefaultLabels.LI) && tb.getLinkDensity() == 0 ) { tb.setIsContent(true); changes = true; } else { tagLevel = Integer.MAX_VALUE; } } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int tagLevel = Integer.MAX_VALUE; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); } else { if (tb.getTagLevel() > tagLevel && tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT) && tb.hasLabel(DefaultLabels.LI) && tb.getLinkDensity() == 0 ) { tb.setIsContent(true); changes = true; } else { tagLevel = Integer.MAX_VALUE; } } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int tagLevel = Integer.MAX_VALUE; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent() && tb.hasLabel(DefaultLabels.VERY_LIKELY_CONTENT)) { tagLevel = tb.getTagLevel(); } else { if (tb.getTagLevel() > tagLevel && tb.hasLabel(DefaultLabels.MIGHT_BE_CONTENT) && tb.hasLabel(DefaultLabels.LI) && tb.getLinkDensity() == 0 ) { tb.setIsContent(true); changes = true; } else { tagLevel = Integer.MAX_VALUE; } } } return changes; } }