/** * This is used to load the default supplied classifier stored within the jar * file. THIS FUNCTION WILL ONLY WORK IF THE CODE WAS LOADED FROM A JAR FILE * WHICH HAS A SERIALIZED CLASSIFIER STORED INSIDE IT. */ public void loadDefaultClassifier() { loadClassifierNoExceptions(DEFAULT_CLASSIFIER); }
/** * This is used to load the default supplied classifier stored within the jar * file. THIS FUNCTION WILL ONLY WORK IF THE CODE WAS LOADED FROM A JAR FILE * WHICH HAS A SERIALIZED CLASSIFIER STORED INSIDE IT. */ public void loadDefaultClassifier(Properties props) { loadClassifierNoExceptions(DEFAULT_CLASSIFIER, props); }
public static <INN extends CoreMap> CRFClassifier<INN> getClassifierNoExceptions(String loadPath) { CRFClassifier<INN> crf = new CRFClassifier<>(); crf.loadClassifierNoExceptions(loadPath); return crf; }
public static void main(String[] args) throws Exception { System.setOut(new PrintStream(System.out, true, "utf-8")); Properties props = new Properties(); props.setProperty("sighanCorporaDict", basedir); // props.setProperty("NormalizationTable", "data/norm.simp.utf8"); // props.setProperty("normTableEncoding", "UTF-8"); // below is needed because CTBSegDocumentIteratorFactory accesses it props.setProperty("serDictionary", basedir + "/dict-chris6.ser.gz"); if (args.length > 0) { props.setProperty("testFile", args[0]); } props.setProperty("inputEncoding", "UTF-8"); props.setProperty("sighanPostProcessing", "true"); CRFClassifier<CoreLabel> segmenter = new CRFClassifier<>(props); segmenter.loadClassifierNoExceptions(basedir + "/ctb.gz", props); for (String filename : args) { segmenter.classifyAndWriteAnswers(filename); } String sample = "我住在美国。"; List<String> segmented = segmenter.segmentString(sample); System.out.println(segmented); }
crf.loadClassifierNoExceptions(loadPath, props); } else if (loadTextPath != null) { log.info("Warning: this is now only tested for Chinese Segmenter"); crf.loadClassifierNoExceptions(crf.flags.loadJarClassifier, props); } else if (crf.flags.trainFile != null || crf.flags.trainFileList != null) { Timing timing = new Timing();
/** * This is used to load the default supplied classifier stored within the jar * file. THIS FUNCTION WILL ONLY WORK IF THE CODE WAS LOADED FROM A JAR FILE * WHICH HAS A SERIALIZED CLASSIFIER STORED INSIDE IT. */ public void loadDefaultClassifier() { loadClassifierNoExceptions(DEFAULT_CLASSIFIER); }
/** * This is used to load the default supplied classifier stored within the jar * file. THIS FUNCTION WILL ONLY WORK IF THE CODE WAS LOADED FROM A JAR FILE * WHICH HAS A SERIALIZED CLASSIFIER STORED INSIDE IT. */ public void loadDefaultClassifier(Properties props) { loadClassifierNoExceptions(DEFAULT_CLASSIFIER, props); }
public static CRFClassifier getClassifierNoExceptions(String loadPath) { CRFClassifier crf = new CRFClassifier(); crf.loadClassifierNoExceptions(loadPath); return crf; }
public static <INN extends CoreMap> CRFClassifier<INN> getClassifierNoExceptions(String loadPath) { CRFClassifier<INN> crf = new CRFClassifier<>(); crf.loadClassifierNoExceptions(loadPath); return crf; }
public static CRFClassifier<CoreLabel> getClassifierNoExceptions(String loadPath) { CRFClassifier<CoreLabel> crf = new CRFClassifier<CoreLabel>(); crf.loadClassifierNoExceptions(loadPath); return crf; }
public ChineseTokenizer( String basedir ){ Properties props = new Properties(); props.setProperty("sighanCorporaDict", basedir); props.setProperty("serDictionary", basedir + "/dict-chris6.ser.gz"); props.setProperty("inputEncoding", "UTF-8"); props.setProperty("sighanPostProcessing", "true"); segmenter = new CRFClassifier<>(props); segmenter.loadClassifierNoExceptions(basedir + "/ctb.gz", props); loadConversionMap(); }
public ChineseTokenizer( String basedir ){ Properties props = new Properties(); props.setProperty("sighanCorporaDict", basedir); props.setProperty("serDictionary", basedir + "/dict-chris6.ser.gz"); props.setProperty("inputEncoding", "UTF-8"); props.setProperty("sighanPostProcessing", "true"); segmenter = new CRFClassifier<>(props); segmenter.loadClassifierNoExceptions(basedir + "/ctb.gz", props); loadConversionMap(); }
public static void main(String[] args) throws Exception{ args=new String[]{"chi-sen.deseg"}; Properties props = new Properties(); //props.setProperty("sighanCorporaDict", "data"); // String dir="/home/users0/anders/storage/backuped/demos/SRLDemos/models/chi/stanford-chinese-segmenter-2008-05-21"; String dir="/home/users0/anders/storage/scratch/anders/stanford-segmenter-2013-06-20/"; props.setProperty("sighanCorporaDict", dir+"/data"); // props.setProperty("NormalizationTable", "data/norm.simp.utf8"); // props.setProperty("normTableEncoding", "UTF-8"); // below is needed because CTBSegDocumentIteratorFactory accesses it //props.setProperty("serDictionary","data/dict-chris6.ser.gz"); props.setProperty("serDictionary",dir+"/data/dict-chris6.ser.gz"); //props.setProperty("testFile", args[0]); props.setProperty("inputEncoding", "UTF-8"); props.setProperty("sighanPostProcessing", "true"); CRFClassifier<CoreLabel> classifier = new CRFClassifier<CoreLabel>(props); //classifier.loadClassifierNoExceptions("data/ctb.gz", props); classifier.loadClassifierNoExceptions(dir+"/data/ctb.gz", props); // flags must be re-set after data is loaded classifier.flags.setProperties(props); //classifier.writeAnswers(classifier.test(args[0])); //classifier.testAndWriteAnswers(args[0]); //ObjectBank<List<CoreLabel>> documents = classifier.makeObjectBank(args[0]); List<String> forms=classifier.segmentString("上海浦东近年来颁布实行了涉及经济、贸易、建设、规划、科技、文教等领域的七十一件法规性文件,确保了浦东开发的有序进行。"); for(String form:forms) System.out.println(form); }
classifier.loadClassifierNoExceptions(ctbFile.toString(), props);
public StanfordTagger(File file) throws Exception { crf = new CRFClassifier<CoreLabel>(new SeqClassifierFlags()); crf.loadClassifierNoExceptions(file); }
crf.loadClassifierNoExceptions(loadPath, props); } else if (loadTextPath != null) { System.err.println("Warning: this is now only tested for Chinese Segmenter");
crf.loadClassifierNoExceptions(loadPath, props); } else if (loadTextPath != null) { log.info("Warning: this is now only tested for Chinese Segmenter"); crf.loadClassifierNoExceptions(crf.flags.loadJarClassifier, props); } else if (crf.flags.trainFile != null || crf.flags.trainFileList != null) { Timing timing = new Timing();
crf.loadClassifierNoExceptions(loadPath, props); } else if (loadTextPath != null) { System.err.println("Warning: this is now only tested for Chinese Segmenter");