/** * Constructs a <code>DoubleLSA</code> object. */ public DoubleLSA(File UtFile, File SFile, File rowFile, File colFile, File dfFile, int dim, boolean rescaleIdf, boolean normalize) throws IOException { init(UtFile, SFile, rowFile, colFile, dfFile, dim, rescaleIdf, normalize); } // end constructor
public double compare(BOW bow1, BOW bow2) { DoubleVector d1 = mapDocument(bow1); DoubleVector d2 = mapDocument(bow2); DoubleVector pd1 = mapPseudoDocument(d1); DoubleVector pd2 = mapPseudoDocument(d2); return pd1.dot(pd2) / Math.sqrt(pd1.dot(pd1) * pd2.dot(pd2)); }
BOW bow2 = new BOW(s[1].toLowerCase().replaceAll("category:", "_").split("[_ ]")); DoubleVector d1 = mapDocument(bow1); DoubleVector d2 = mapDocument(bow2); DoubleVector pd1 = mapPseudoDocument(d1); DoubleVector pd2 = mapPseudoDocument(d2); long begin = System.nanoTime(); ScoreTermMap map = new ScoreTermMap(query, 20); DoubleVector vec1 = mapTerm(query); Iterator<String> it = terms(); while (it.hasNext()) { term = it.next(); DoubleVector vec2 = mapTerm(term); double cos = vec1.dot(vec2) / Math.sqrt(vec1.dot(vec1) * vec2.dot(vec2)); map.put(cos, term);
public static void main(String[] args) throws Exception { String logConfig = System.getProperty("log-config"); if (logConfig == null) { logConfig = "log-config.txt"; } long begin = System.currentTimeMillis(); PropertyConfigurator.configure(logConfig); if (args.length != 5) { logger.info(getHelp()); System.exit(1); } File Ut = new File(args[0] + "-Ut"); File Sk = new File(args[0] + "-S"); File r = new File(args[0] + "-row"); File c = new File(args[0] + "-col"); File df = new File(args[0] + "-df"); double threshold = Double.parseDouble(args[1]); int size = Integer.parseInt(args[2]); int dim = Integer.parseInt(args[3]); boolean rescaleIdf = Boolean.parseBoolean(args[4]); DoubleLSA lsi = new DoubleLSA(Ut, Sk, r, c, df, dim, rescaleIdf); lsi.interactive(); long end = System.currentTimeMillis(); logger.info("term similarity calculated in " + (end - begin) + " ms"); } // end main
/** * Returns a document in the VSM. */ public DoubleVector mapDocument(BOW bow) { return mapDocument(bow, true); } // end map
private void createIdf(Vocabulary voc, int l) { long begin = System.currentTimeMillis(); logger.info("creating idf matrix..."); Iidf = new double[voc.entrySet().size()]; //logger.debug("Iidf.size: " + Iidf.length); // iterates over the types Iterator it = voc.entrySet().iterator(); while (it.hasNext()) { Map.Entry me = (Map.Entry) it.next(); String term = (String) me.getKey(); Vocabulary.TermFrequency tf = (Vocabulary.TermFrequency) me.getValue(); int index = termIndex.get(term); Iidf[index] = log2((double) l / tf.get()); //logger.info(index + ": " + l + "/"+ tf.get() + " = " + Iidf[index]); } // end while //for (int i=0;i<Iidf.length;i++) // logger.info(i + " " + Iidf[i]); long end = System.currentTimeMillis(); logger.info("took " + (end - begin) + " ms"); } // end createIdf
/** * Constructs a <code>DoubleLSA</code> object. */ public DoubleLSA(String root, int dim, boolean rescaleIdf, boolean normalize) throws IOException { //logger.info("reading ls model..."); this.dim = dim; File Ut = new File(root + "-Ut"); File Sk = new File(root + "-S"); File r = new File(root + "-row"); File c = new File(root + "-col"); File df = new File(root + "-df"); init(Ut, Sk, r, c, df, dim, rescaleIdf, normalize); } // end loadLSM