public LanguageDetectorContextGenerator getContextGenerator() { return new DefaultLanguageDetectorContextGenerator(1, 3, EmojiCharSequenceNormalizer.getInstance(), UrlCharSequenceNormalizer.getInstance(), TwitterCharSequenceNormalizer.getInstance(), NumberCharSequenceNormalizer.getInstance(), ShrinkCharSequenceNormalizer.getInstance()); }
@Test public void extractContext() throws Exception { String doc = "abcde fghijk"; LanguageDetectorContextGenerator cg = new DefaultLanguageDetectorContextGenerator(1, 3); Collection<String> features = Arrays.asList(cg.getContext(doc)); Assert.assertEquals(33, features.size()); Assert.assertTrue(features.contains("ab")); Assert.assertTrue(features.contains("abc")); Assert.assertTrue(features.contains("e f")); Assert.assertTrue(features.contains(" fg")); } }
public LanguageDetectorContextGenerator getContextGenerator() { return new DefaultLanguageDetectorContextGenerator(1, 3, EmojiCharSequenceNormalizer.getInstance(), UrlCharSequenceNormalizer.getInstance(), TwitterCharSequenceNormalizer.getInstance(), NumberCharSequenceNormalizer.getInstance(), ShrinkCharSequenceNormalizer.getInstance()); }
public LanguageDetectorContextGenerator getContextGenerator() { return new DefaultLanguageDetectorContextGenerator(1, 3, EmojiCharSequenceNormalizer.getInstance(), UrlCharSequenceNormalizer.getInstance(), TwitterCharSequenceNormalizer.getInstance(), NumberCharSequenceNormalizer.getInstance(), ShrinkCharSequenceNormalizer.getInstance()); }