public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; for (TextBlock tb : doc.getTextBlocks()) { if (!tb.isContent()) { tb.setIsContent(true); changes = true; } } return changes; } }
for (TextBlock block : td.getTextBlocks()) { if (block.isContent()) { BitSet bs = block.getContainedTextElements(); if (bs != null) { validCharacterRuns.or(bs); for (TextBlock block : td.getTextBlocks()) { if (block.isContent()) { delegate.startElement(XHTMLContentHandler.XHTML, "p", "p", emptyAttrs); char[] chars = block.getText().toCharArray(); delegate.characters(chars, 0, chars.length); delegate.endElement(XHTMLContentHandler.XHTML, "p", "p");
protected boolean classify(final TextBlock prev, final TextBlock curr, final TextBlock next) { final boolean isContent = (curr.getLinkDensity() > 0 && next .getNumWords() > 11) || (curr.getNumWords() > 19 || (next.getNumWords() > 6 && next.getLinkDensity() == 0 && prev.getLinkDensity() == 0 && (curr .getNumWords() > 6 || prev.getNumWords() > 7 || next .getNumWords() > 19))); return curr.setIsContent(isContent); } };
/** * Computes statistics on a given {@link TextDocument}. * * @param doc The {@link TextDocument}. * @param contentOnly if true then o */ public TextDocumentStatistics(final TextDocument doc, final boolean contentOnly) { for (TextBlock tb : doc.getTextBlocks()) { if (contentOnly && !tb.isContent()) { continue; } numWords += tb.getNumWords(); numBlocks++; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; for (TextBlock tb : doc.getTextBlocks()) { if (!tb.isContent()) { continue; } if (tb.getNumWords() < minWords) { tb.setIsContent(false); changes = true; } } return changes; } }
protected static int getNumFullTextWords(final TextBlock tb, float minTextDensity) { if(tb.getTextDensity() >= minTextDensity) { return tb.getNumWords(); } else { return 0; } }
public static Document createTextDocument(int maxLength){ return new TextDocument(maxLength); } ... Document document = createTextDocument(5); // limit to 5 chars textField1.setDocument(document); document = createTextDocument(10); // limit to 10 chars textField2.setDocument(document);
/** * Returns the {@link TextDocument}'s content. * * @return The content text. */ public String getContent() { return getText(true, false); }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent()) { tb.setIsContent(false); changes = true; } } return changes; } }
/** * Computes statistics on a given {@link TextDocument}. * * @param doc The {@link TextDocument}. * @param contentOnly if true then o */ public TextDocumentStatistics(final TextDocument doc, final boolean contentOnly) { for (TextBlock tb : doc.getTextBlocks()) { if (contentOnly && !tb.isContent()) { continue; } numWords += tb.getNumWords(); numBlocks++; } }
protected boolean classify(final TextBlock prev, final TextBlock curr, final TextBlock next) { final boolean isContent = (curr.getLinkDensity() > 0 && next .getNumWords() > 11) || (curr.getNumWords() > 19 || (next.getNumWords() > 6 && next.getLinkDensity() == 0 && prev.getLinkDensity() == 0 && (curr .getNumWords() > 6 || prev.getNumWords() > 7 || next .getNumWords() > 19))); return curr.setIsContent(isContent); } };
protected static int getNumFullTextWords(final TextBlock tb, float minTextDensity) { if(tb.getTextDensity() >= minTextDensity) { return tb.getNumWords(); } else { return 0; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; for (TextBlock tb : doc.getTextBlocks()) { if (!tb.isContent()) { tb.setIsContent(true); changes = true; } } return changes; } }
/** * Computes statistics on a given {@link TextDocument}. * * @param doc The {@link TextDocument}. * @param contentOnly if true then o */ public TextDocumentStatistics(final TextDocument doc, final boolean contentOnly) { for (TextBlock tb : doc.getTextBlocks()) { if (contentOnly && !tb.isContent()) { continue; } numWords += tb.getNumWords(); numBlocks++; } }
protected boolean classify(final TextBlock prev, final TextBlock curr, final TextBlock next) { final boolean isContent = (curr.getLinkDensity() > 0 && next .getNumWords() > 11) || (curr.getNumWords() > 19 || (next.getNumWords() > 6 && next.getLinkDensity() == 0 && prev.getLinkDensity() == 0 && (curr .getNumWords() > 6 || prev.getNumWords() > 7 || next .getNumWords() > 19))); return curr.setIsContent(isContent); } };
protected static int getNumFullTextWords(final TextBlock tb, float minTextDensity) { if(tb.getTextDensity() >= minTextDensity) { return tb.getNumWords(); } else { return 0; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent()) { tb.setIsContent(false); changes = true; } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; for (TextBlock tb : doc.getTextBlocks()) { if (!tb.isContent()) { tb.setIsContent(true); changes = true; } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; for (TextBlock tb : doc.getTextBlocks()) { if (tb.isContent()) { tb.setIsContent(false); changes = true; } } return changes; } }
public boolean process(final TextDocument doc) throws BoilerpipeProcessingException { boolean changes = false; for (TextBlock tb : doc.getTextBlocks()) { if (!tb.isContent()) { tb.setIsContent(true); changes = true; } } return changes; } }