public SimilarityDisambiguator(PhraseAnalyzer phraseAnalyzer, SRMetric metric) { this.phraseAnalyzer = phraseAnalyzer; this.metric = metric; this.language = metric.getLanguage(); }
@Override public Language getLanguage() { return delegate.getLanguage(); }
public MilneWittenWikifier(SRMetric metric, AnchorTextPhraseAnalyzer pa, LocalPageDao lpd, RawPageDao rpd, LocalLinkDao lld, LinkProbabilityDao linkProbDao) { this.lpd = lpd; this.linkProbDao = linkProbDao; this.phraseDao = pa.getDao(); this.metric = metric; this.rpd = rpd; this.lld = lld; this.language = metric.getLanguage(); }
public WebSailWikifier(Wikifier identityWikifier, RawPageDao rawPageDao, LocalLinkDao linkDao, LinkProbabilityDao linkProbDao, PhraseAnalyzerDao phraseDao, SRMetric metric) throws DaoException { this.identityWikifier = identityWikifier; this.metric = metric; this.language = metric.getLanguage(); this.linkDao = linkDao; this.linkProbDao = linkProbDao; this.rawPageDao = rawPageDao; this.phraseDao = phraseDao; this.phraseTokenizer = new PhraseTokenizer(linkProbDao); learnMinLinkProbability(); }
/** * * @param metric * @param dataset */ public void trainSimilarity(final SRMetric metric, Dataset dataset) { if (similarityNormalizer instanceof IdentityNormalizer) { return; } if (!dataset.getLanguage().equals(metric.getLanguage())) { throw new IllegalArgumentException("SR metric has language " + metric.getLanguage() + " but dataset has language " + dataset.getLanguage()); } final Normalizer trainee = similarityNormalizer; similarityNormalizer = new IdentityNormalizer(); try { trainee.reset(); ParallelForEach.loop(dataset.getData(), new Procedure<KnownSim>() { public void call(KnownSim ks) throws IOException, DaoException { ks.maybeSwap(); SRResult sim = metric.similarity(ks.phrase1, ks.phrase2, false); trainee.observe(sim == null ? Double.NaN : sim.getScore(), ks.similarity); } }, 100); trainee.observationsFinished(); LOG.info("trained similarity normalizer: " + trainee.dump()); } finally { similarityNormalizer = trainee; } }
String path = String.format("%s/%s.txt", config.getString("mostSimilarConcepts"), metric.getLanguage().getLangCode()); validIds = readIds(path);
return; if (!dataset.getLanguage().equals(metric.getLanguage())) { throw new IllegalArgumentException("SR metric has language " + metric.getLanguage() + " but dataset has language " + dataset.getLanguage());
/** * Construct a new disambiguator that uses a particular metric. * * @param pageDao * @param analyzer * @param metric * @throws DaoException */ public MilneWittenDisambiguator(LocalPageDao pageDao, PhraseAnalyzer analyzer, SRMetric metric) throws DaoException { this.language = metric.getLanguage(); this.pageDao = pageDao; this.analyzer = analyzer; this.metric = metric; this.numPages = pageDao.getCount( new DaoFilter().setLanguages(language) .setNameSpaces(NameSpace.ARTICLE) .setRedirect(false) .setDisambig(false)); }