public boolean process(TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int numWords = 0; boolean foundEndOfText = false; for (Iterator<TextBlock> it = doc.getTextBlocks().iterator(); it.hasNext();) { TextBlock block = it.next(); final boolean endOfText = block .hasLabel(DefaultLabels.INDICATES_END_OF_TEXT); if (block.isContent()) { numWords += getNumFullTextWords(block); } if (endOfText && numWords >= minNumWords) { foundEndOfText = true; } if (foundEndOfText) { changes = true; block.setIsContent(false); } } return changes; } }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { return TerminatingBlocksFinder.INSTANCE.process(doc) | new DocumentTitleMatchClassifier(doc.getTitle()).process(doc) | NumWordsRulesClassifier.INSTANCE.process(doc) | IgnoreBlocksAfterContentFilter.DEFAULT_INSTANCE.process(doc) | BlockProximityFusion.MAX_DISTANCE_1.process(doc) | BoilerplateBlockFilter.INSTANCE.process(doc) | BlockProximityFusion.MAX_DISTANCE_1_CONTENT_ONLY.process(doc) | KeepLargestFulltextBlockFilter.INSTANCE.process(doc) | ExpandTitleToContentFilter.INSTANCE.process(doc); } }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { return TerminatingBlocksFinder.INSTANCE.process(doc) | new DocumentTitleMatchClassifier(doc.getTitle()).process(doc) | NumWordsRulesClassifier.INSTANCE.process(doc) | IgnoreBlocksAfterContentFilter.DEFAULT_INSTANCE.process(doc) | TrailingHeadlineToBoilerplateFilter.INSTANCE.process(doc) | BlockProximityFusion.MAX_DISTANCE_1.process(doc) | BoilerplateBlockFilter.INSTANCE_KEEP_TITLE.process(doc) | BlockProximityFusion.MAX_DISTANCE_1_CONTENT_ONLY_SAME_TAGLEVEL.process(doc) | KeepLargestBlockFilter.INSTANCE_EXPAND_TO_SAME_TAGLEVEL_MIN_WORDS.process(doc) | ExpandTitleToContentFilter.INSTANCE.process(doc) | LargeBlockSameTagLevelToContentFilter.INSTANCE.process(doc) | ListAtEndFilter.INSTANCE.process(doc) ; } }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { return TerminatingBlocksFinder.INSTANCE.process(doc) | new DocumentTitleMatchClassifier(doc.getTitle()).process(doc) | NumWordsRulesClassifier.INSTANCE.process(doc) | IgnoreBlocksAfterContentFilter.DEFAULT_INSTANCE.process(doc) | TrailingHeadlineToBoilerplateFilter.INSTANCE.process(doc) | BlockProximityFusion.MAX_DISTANCE_1.process(doc) | BoilerplateBlockFilter.INSTANCE_KEEP_TITLE.process(doc) | BlockProximityFusion.MAX_DISTANCE_1_CONTENT_ONLY_SAME_TAGLEVEL.process(doc) | KeepLargestBlockFilter.INSTANCE_EXPAND_TO_SAME_TAGLEVEL_MIN_WORDS.process(doc) | ExpandTitleToContentFilter.INSTANCE.process(doc) | LargeBlockSameTagLevelToContentFilter.INSTANCE.process(doc) | ListAtEndFilter.INSTANCE.process(doc) ; } }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int numWords = 0; boolean foundEndOfText = false; for (Iterator<TextBlock> it = doc.getTextBlocks().iterator(); it.hasNext();) { TextBlock block = it.next(); final boolean endOfText = block .hasLabel(DefaultLabels.INDICATES_END_OF_TEXT); if (block.isContent()) { numWords += getNumFullTextWords(block); } if (endOfText && numWords >= minNumWords) { foundEndOfText = true; } if (foundEndOfText) { changes = true; block.setIsContent(false); } } return changes; } }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { return TerminatingBlocksFinder.INSTANCE.process(doc) | new DocumentTitleMatchClassifier(doc.getTitle()).process(doc) | NumWordsRulesClassifier.INSTANCE.process(doc) | IgnoreBlocksAfterContentFilter.DEFAULT_INSTANCE.process(doc) | TrailingHeadlineToBoilerplateFilter.INSTANCE.process(doc) | BlockProximityFusion.MAX_DISTANCE_1.process(doc) | BoilerplateBlockFilter.INSTANCE_KEEP_TITLE.process(doc) | BlockProximityFusion.MAX_DISTANCE_1_CONTENT_ONLY_SAME_TAGLEVEL.process(doc) | KeepLargestBlockFilter.INSTANCE_EXPAND_TO_SAME_TAGLEVEL_MIN_WORDS.process(doc) | ExpandTitleToContentFilter.INSTANCE.process(doc) | LargeBlockSameTagLevelToContentFilter.INSTANCE.process(doc) | ListAtEndFilter.INSTANCE.process(doc) ; } }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int numWords = 0; boolean foundEndOfText = false; for (Iterator<TextBlock> it = doc.getTextBlocks().iterator(); it.hasNext();) { TextBlock block = it.next(); final boolean endOfText = block .hasLabel(DefaultLabels.INDICATES_END_OF_TEXT); if (block.isContent()) { numWords += getNumFullTextWords(block); } if (endOfText && numWords >= minNumWords) { foundEndOfText = true; } if (foundEndOfText) { changes = true; block.setIsContent(false); } } return changes; } }
public boolean process(TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; int numWords = 0; boolean foundEndOfText = false; for (Iterator<TextBlock> it = doc.getTextBlocks().iterator(); it.hasNext();) { TextBlock block = it.next(); final boolean endOfText = block .hasLabel(DefaultLabels.INDICATES_END_OF_TEXT); if (block.isContent()) { numWords += getNumFullTextWords(block); } if (endOfText && numWords >= minNumWords) { foundEndOfText = true; } if (foundEndOfText) { changes = true; block.setIsContent(false); } } return changes; } }