/** * Constructs a <code>FloatLSA</code> object. */ public FloatLSA(File UtFile, File SFile, File rowFile, File colFile, File dfFile, int dim, boolean rescaleIdf, boolean normalize) throws IOException { init(UtFile, SFile, rowFile, colFile, dfFile, dim, rescaleIdf, normalize); } // end constructor
public float compare(BOW bow1, BOW bow2) { FloatVector d1 = mapDocument(bow1); FloatVector d2 = mapDocument(bow2); FloatVector pd1 = mapPseudoDocument(d1); FloatVector pd2 = mapPseudoDocument(d2); return (float) (pd1.dot(pd2) / Math.sqrt(pd1.dot(pd1) * pd2.dot(pd2))); }
logger.info("time required " + df.format(end - begin) + " ns"); begin = System.nanoTime(); FloatVector d1 = mapDocument(bow1); FloatVector d2 = mapDocument(bow2); FloatVector pd1 = mapPseudoDocument(d1); FloatVector pd2 = mapPseudoDocument(d2); long begin = System.nanoTime(); ScoreTermMap map = new ScoreTermMap(query, 20); FloatVector vec1 = mapTerm(query); Iterator<String> it = terms(); while (it.hasNext()) { term = it.next(); FloatVector vec2 = mapTerm(term); float cos = (float) (vec1.dot(vec2) / Math.sqrt(vec1.dot(vec1) * vec2.dot(vec2))); map.put(cos, term);
public static void main(String[] args) throws Exception { String logConfig = System.getProperty("log-config"); if (logConfig == null) { logConfig = "log-config.txt"; } long begin = System.currentTimeMillis(); PropertyConfigurator.configure(logConfig); if (args.length != 5) { logger.info(getHelp()); System.exit(1); } File Ut = new File(args[0] + "-Ut"); File Sk = new File(args[0] + "-S"); File r = new File(args[0] + "-row"); File c = new File(args[0] + "-col"); File df = new File(args[0] + "-df"); float threshold = Float.parseFloat(args[1]); int size = Integer.parseInt(args[2]); int dim = Integer.parseInt(args[3]); boolean rescaleIdf = Boolean.parseBoolean(args[4]); FloatLSA lsi = new FloatLSA(Ut, Sk, r, c, df, dim, rescaleIdf); lsi.interactive(); long end = System.currentTimeMillis(); logger.info("term similarity calculated in " + (end - begin) + " ms"); } // end main
/** * Returns a document in the VSM. */ public FloatVector mapDocument(BOW bow) { return mapDocument(bow, true); } // end map
private void createIdf(Vocabulary voc, int l) { long begin = System.currentTimeMillis(); logger.info("creating idf matrix..."); Iidf = new float[voc.entrySet().size()]; //logger.debug("Iidf.size: " + Iidf.length); // iterates over the types Iterator it = voc.entrySet().iterator(); while (it.hasNext()) { Map.Entry me = (Map.Entry) it.next(); String term = (String) me.getKey(); Vocabulary.TermFrequency tf = (Vocabulary.TermFrequency) me.getValue(); int index = termIndex.get(term); Iidf[index] = log2((float) l / tf.get()); //logger.info(index + ": " + l + "/"+ tf.get() + " = " + Iidf[index]); } // end while //for (int i=0;i<Iidf.length;i++) // logger.info(i + " " + Iidf[i]); long end = System.currentTimeMillis(); logger.info("took " + (end - begin) + " ms"); } // end createIdf
/** * Constructs a <code>FloatLSA</code> object. */ public FloatLSA(String root, int dim, boolean rescaleIdf, boolean normalize) throws IOException { //logger.info("reading ls model..."); this.dim = dim; File Ut = new File(root + "-Ut"); File Sk = new File(root + "-S"); File r = new File(root + "-row"); File c = new File(root + "-col"); File df = new File(root + "-df"); init(Ut, Sk, r, c, df, dim, rescaleIdf, normalize); } // end loadLSM