/** * Given the statistics of the document before and after applying the {@link BoilerpipeExtractor}, * can we regard the extraction quality (too) low? * * Works well with {@link DefaultExtractor}, {@link ArticleExtractor} and others. * * @param dsBefore * @param dsAfter * @return true if low quality is to be expected. */ public boolean isLowQuality(final TextDocumentStatistics dsBefore, final TextDocumentStatistics dsAfter) { if (dsBefore.getNumWords() < 90 || dsAfter.getNumWords() < 70) { return true; } if (dsAfter.avgNumWords() < 25) { return true; } return false; }
/** * Given the statistics of the document before and after applying the {@link BoilerpipeExtractor}, * can we regard the extraction quality (too) low? * * Works well with {@link DefaultExtractor}, {@link ArticleExtractor} and others. * * @param dsBefore * @param dsAfter * @return true if low quality is to be expected. */ public boolean isLowQuality(final TextDocumentStatistics dsBefore, final TextDocumentStatistics dsAfter) { if (dsBefore.getNumWords() < 90 || dsAfter.getNumWords() < 70) { return true; } if (dsAfter.avgNumWords() < 25) { return true; } return false; }
/** * Given the statistics of the document before and after applying the {@link BoilerpipeExtractor}, * can we regard the extraction quality (too) low? * * Works well with {@link DefaultExtractor}, {@link ArticleExtractor} and others. * * @param dsBefore * @param dsAfter * @return true if low quality is to be expected. */ public boolean isLowQuality(final TextDocumentStatistics dsBefore, final TextDocumentStatistics dsAfter) { if (dsBefore.getNumWords() < 90 || dsAfter.getNumWords() < 70) { return true; } if (dsAfter.avgNumWords() < 25) { return true; } return false; }
/** * Given the statistics of the document before and after applying the {@link BoilerpipeExtractor}, * can we regard the extraction quality (too) low? * * Works well with {@link DefaultExtractor}, {@link ArticleExtractor} and others. * * @param dsBefore * @param dsAfter * @return true if low quality is to be expected. */ public boolean isLowQuality(final TextDocumentStatistics dsBefore, final TextDocumentStatistics dsAfter) { if (dsBefore.getNumWords() < 90 || dsAfter.getNumWords() < 70) { return true; } if (dsAfter.avgNumWords() < 25) { return true; } return false; }