/** * Construct Detector instance with smoothing parameter * * @param alpha smoothing parameter (default value = 0.5) * @return Detector instance * @throws LangDetectException */ public static Detector create(double alpha) throws LangDetectException { Detector detector = createDetector(); detector.setAlpha(alpha); return detector; }
public List<Language> getLanguages(String text) throws LangDetectException { Detector detector = DetectorFactory.create(); detector.append(text); return detector.getProbabilities(); }
public static YLanguage getLanguage(Reader text, Set<YLanguage> possibleLanguages) { try { Detector detector = DetectorFactory.create(0.5f); detector.append(text); return detectLanguage(possibleLanguages, detector); } catch (LangDetectException | IOException e) { log.debug("Couldn't determine content language", e); return YLanguage.Undetermined; } }
public LanguageIdentifier() throws LangDetectException { DetectorFactory.clear(); try { DetectorFactory.loadProfile(loadProfiles("profiles","profiles.cfg")); } catch (Exception e) { throw new LangDetectException(null, "Error in Initialization: "+e.getMessage()); } } /**
/** * load profiles * @return false if load success */ private boolean loadProfile() { String profileDirectory = get("directory") + "/"; try { DetectorFactory.loadProfile(profileDirectory); Long seed = getLong("seed"); if (seed != null) DetectorFactory.setSeed(seed); return false; } catch (LangDetectException e) { System.err.println("ERROR: " + e.getMessage()); return true; } }
/** * Command Line Interface * @param args command line arguments */ public static void main(String[] args) { Command command = new Command(); command.addOpt("-d", "directory", "./"); command.addOpt("-a", "alpha", "" + DEFAULT_ALPHA); command.addOpt("-s", "seed", null); command.addOpt("-l", "lang", null); command.parse(args); if (command.hasOpt("--genprofile")) { command.generateProfile(); } else if (command.hasOpt("--genprofile-text")) { command.generateProfileFromText(); } else if (command.hasOpt("--detectlang")) { command.detectLang(); } else if (command.hasOpt("--batchtest")) { command.batchTest(); } }
/** * Detect language of the target text and return the language name which has the highest probability. * @return detected language name which has most probability. * @throws LangDetectException * code = ErrorCode.CantDetectError : Can't detect because of no valid features in text */ public String detect() throws LangDetectException { ArrayList<Language> probabilities = getProbabilities(); if (probabilities.size() > 0) return probabilities.get(0).lang; return UNKNOWN_LANG; }
static private Detector createDetector() throws LangDetectException { if (instance_.langlist.size()==0) throw new LangDetectException(ErrorCode.NeedLoadProfileError, "need to load profiles"); Detector detector = new Detector(instance_); return detector; }
/** * Load profiles from specified directory. * This method must be called once before language detection. * * @param profileDirectory profile directory path * @throws LangDetectException Can't open profiles(error code = {@link ErrorCode#FileLoadError}) * or profile's format is wrong (error code = {@link ErrorCode#FormatError}) */ public static void loadProfile(List<String> json_profiles) throws LangDetectException { int index = 0; int langsize = json_profiles.size(); if (langsize < 2) throw new LangDetectException(ErrorCode.NeedLoadProfileError, "Need more than 2 profiles"); for (String json: json_profiles) { try { LangProfile profile = JSON.decode(json, LangProfile.class); addProfile(profile, index, langsize); ++index; } catch (JSONException e) { throw new LangDetectException(ErrorCode.FormatError, "profile format error"); } } }
/** * Load profiles from specified directory. * This method must be called once before language detection. * * @param profileDirectory profile directory path * @throws LangDetectException Can't open profiles(error code = {@link ErrorCode#FileLoadError}) * or profile's format is wrong (error code = {@link ErrorCode#FormatError}) */ public static void loadProfile(String profileDirectory) throws LangDetectException { loadProfile(new File(profileDirectory)); }
/** * Get language candidates which have high probabilities * @return possible languages list (whose probabilities are over PROB_THRESHOLD, ordered by probabilities descendently * @throws LangDetectException * code = ErrorCode.CantDetectError : Can't detect because of no valid features in text */ public ArrayList<Language> getProbabilities() throws LangDetectException { if (langprob == null) detectBlock(); ArrayList<Language> list = sortProbability(langprob); return list; }
/** * Construct Detector instance * * @return Detector instance * @throws LangDetectException */ static public Detector create() throws LangDetectException { return createDetector(); }
@Override public String toString() { return language.toString(); } }
/** * @param probabilities HashMap * @return lanugage candidates order by probabilities descendently */ private ArrayList<Language> sortProbability(double[] prob) { ArrayList<Language> list = new ArrayList<Language>(); for(int j=0;j<prob.length;++j) { double p = prob[j]; if (p > PROB_THRESHOLD) { for (int i = 0; i <= list.size(); ++i) { if (i == list.size() || list.get(i).prob < p) { list.add(i, new Language(langlist.get(j), p)); break; } } } } return list; }
public static YLanguage getLanguage(String text, Set<YLanguage> possibleLanguages) { try { Detector detector = DetectorFactory.create(0.5f); detector.append(text); return detectLanguage(possibleLanguages, detector); } catch (LangDetectException e) { log.debug("Couldn't determine content language", e); return YLanguage.Undetermined; } }
private static YLanguage detectLanguage(Set<YLanguage> possibleLanguages, Detector detector) throws LangDetectException { for (Language language : detector.getProbabilities()) { YLanguage lang = YLanguage.byCode(language.lang); if (language.prob > MINIMUM_PROBABILITY) { return lang; } if (language.prob > MINIMUM_PROBABILITY_FROM_SET && possibleLanguages.contains(lang)) { return lang; } } return YLanguage.Undetermined; } }
public DetectedLanguages doDetect(Detector shuyoDetector) throws LanguageDetectorException { try { return new DetectedLanguages(shuyoDetector.getProbabilities()); } catch (LangDetectException e) { throw new LanguageDetectorException( "Cannot detect language(s).", e); } }