/** * Create a new GosenTokenizerFactory * @param args */ public GosenTokenizerFactory(Map<String,String> args) { super(args); compositePosFile = get(args, "compositePOS"); dirVal = get(args, "dictionaryDir"); tokenizeUnknownKatakana = getBoolean(args, "tokenizeUnknownKatakana", false); if (!args.isEmpty()){ throw new IllegalArgumentException("Unknown parameters: " + args); } }
@Test public void testBasics() throws IOException { String tags = "# verb-main:\n" + "動詞-自立\n"; GosenTokenizerFactory tokenizerFactory = new GosenTokenizerFactory(new HashMap<String,String>(){{ put("dictionaryDir", SenTestUtil.IPADIC_DIR); }}); tokenizerFactory.inform(new StringMockResourceLoader("")); Tokenizer tokenizer = tokenizerFactory.create(); tokenizer.setReader(new StringReader("私は制限スピードを超える。")); Map<String,String> args = new HashMap<String,String>(); args.put("luceneMatchVersion", Version.LATEST.toString()); args.put("tags", "stoptags.txt"); GosenPartOfSpeechStopFilterFactory factory = new GosenPartOfSpeechStopFilterFactory(args); factory.inform(new StringMockResourceLoader(tags)); TokenStream ts = factory.create(tokenizer); assertTokenStreamContents(ts, new String[] { "私", "は", "制限", "スピード", "を", "。" } ); }
GosenTokenizerFactory factory = new GosenTokenizerFactory(args); factory.inform(loader); Field field = GosenTokenizerFactory.class.getDeclaredField("dictionaryDir"); field.setAccessible(true); factory = new GosenTokenizerFactory(args); factory.inform(loader); assertEquals("dictionaryDir is incorrect.", dicDir.getName(), field.get(factory)); factory = new GosenTokenizerFactory(args); factory.inform(loader); assertEquals("dictionaryDir is incorrect.", dicDir.getAbsolutePath(), field.get(factory)); factory = new GosenTokenizerFactory(args); factory.inform(loader); assertEquals("dictionaryDir is incorrect.", notExistsPath, field.get(factory));
@Test public void testBogusArgments() throws Exception{ try{ new GosenTokenizerFactory(new HashMap<String, String>() {{ put("bogusArg", "bogusValue"); }}); fail(); } catch (IllegalArgumentException expected) { assertTrue(expected.getMessage().contains("Unknown parameters")); } } }
@Test public void testBasics() throws IOException { String tags = "# verb-main:\n" + "動詞-自立\n"; GosenTokenizerFactory tokenizerFactory = new GosenTokenizerFactory(new HashMap<String,String>(){{ put("dictionaryDir", SenTestUtil.IPADIC_DIR); }}); tokenizerFactory.inform(new StringMockResourceLoader("")); Tokenizer tokenizer = tokenizerFactory.create(); tokenizer.setReader(new StringReader("私は制限スピードを超える。")); Map<String,String> args = new HashMap<String,String>(); args.put("luceneMatchVersion", Version.LATEST.toString()); args.put("tags", "stoptags.txt"); GosenPartOfSpeechKeepFilterFactory factory = new GosenPartOfSpeechKeepFilterFactory(args); factory.inform(new StringMockResourceLoader(tags)); TokenStream ts = factory.create(tokenizer); assertTokenStreamContents(ts, new String[] { "超える" } ); }
/** * Create a new GosenTokenizerFactory * @param args */ public GosenTokenizerFactory(Map<String,String> args) { super(args); compositePosFile = get(args, "compositePOS"); dirVal = get(args, "dictionaryDir"); tokenizeUnknownKatakana = getBoolean(args, "tokenizeUnknownKatakana", false); if (!args.isEmpty()){ throw new IllegalArgumentException("Unknown parameters: " + args); } }