/** * Returns the index of this term in the VSM. */ public int termIndex(String term) throws TermNotFoundException { return termIndex.get(term); } // end termIndex
/** * Returns the idf of the specified term if present in the index; -1 otherwise. */ public float getIdf(String term) { int index = termIndex.get(term); if (index == -1) { return 0; } return Iidf[index]; } // end getIdf
/** * Returns the idf of the specified term if present in the index; -1 otherwise. */ public float getIdf(String term) { int index = termIndex.get(term); if (index == -1) { return 0; } return Iidf[index]; } // end getIdf
/** * Returns the idf of the specified term if present in the index; -1 otherwise. */ public double getIdf(String term) { int index = termIndex.get(term); if (index == -1) { return 0; } return Iidf[index]; } // end getIdf
/** * Returns the index of this term in the VSM. */ public int termIndex(String term) throws TermNotFoundException { return termIndex.get(term); } // end termIndex
/** * Returns the index of this term in the VSM. */ public int termIndex(String term) throws TermNotFoundException { return termIndex.get(term); } // end termIndex
/** * */ public double[] getVector(String term) { int i = termIndex.get(term); logger.debug(term + " " + i); if (i == -1) { return null; } //return Uk.viewRow(i); return Uk[i]; } // end getVector
/** * */ public float[] getVector(String term) { int i = termIndex.get(term); logger.debug(term + " " + i); if (i == -1) { return null; } //return Uk.viewRow(i); return Uk[i]; } // end getVector
/** * */ public float[] getVector(String term) { int i = termIndex.get(term); logger.debug(term + " " + i); if (i == -1) { return null; } //return Uk.viewRow(i); return Uk[i]; } // end getVector
public Node[] mapDocument(Map<String, Double> map) { //logger.debug("mapDocument"); SortedSet<Node> nodes = new TreeSet<Node>(); String term; int index; //double tf; double w; Iterator<String> it = map.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { term = it.next(); index = termIndex.get(term); if (index != -1) { w = map.get(term); //logger.info("\t"+term + "\t" + index + "\ttf= " + bow.augmentedFrequency(term) + "\tidf=" + Iidf[index] + " (" + tfIdf + ")"); nodes.add(new Node(index, w)); } } return nodes.toArray(new Node[nodes.size()]); }
/** * Returns a term in the VSM */ public Vector mapTerm(String term) throws TermNotFoundException { int i = termIndex.get(term); if (i == -1) { throw new TermNotFoundException(term); } return new DenseVector(Uk[i]); } // end mapTerm
/** * Returns a term in the VSM */ public FloatVector mapTerm(String term) throws TermNotFoundException { int i = termIndex.get(term); if (i == -1) { throw new TermNotFoundException(term); } FloatVector nodes = new FloatVector(Uk[i]); return nodes; } // end mapTerm
/** * Returns a term in the VSM */ public DoubleVector mapTerm(String term) throws TermNotFoundException { int i = termIndex.get(term); if (i == -1) { throw new TermNotFoundException(term); } DoubleVector nodes = new DoubleVector(Uk[i]); return nodes; } // end mapTerm
/** * Returns a term in the VSM */ public Node[] mapTerm(String term) throws TermNotFoundException { int i = termIndex.get(term); if (i == -1) { throw new TermNotFoundException(term); } Node[] nodes = new Node[Uk[i].length]; for (int j = 0; j < Uk[i].length; j++) { nodes[j] = new Node(j, Uk[i][j]); } return nodes; } // end mapTerm
private void createIdf(Vocabulary voc, int l) { long begin = System.currentTimeMillis(); logger.info("creating idf matrix..."); Iidf = new float[voc.entrySet().size()]; //logger.debug("Iidf.size: " + Iidf.length); // iterates over the types Iterator it = voc.entrySet().iterator(); while (it.hasNext()) { Map.Entry me = (Map.Entry) it.next(); String term = (String) me.getKey(); Vocabulary.TermFrequency tf = (Vocabulary.TermFrequency) me.getValue(); int index = termIndex.get(term); Iidf[index] = log2((float) l / tf.get()); //logger.info(index + ": " + l + "/"+ tf.get() + " = " + Iidf[index]); } // end while //for (int i=0;i<Iidf.length;i++) // logger.info(i + " " + Iidf[i]); long end = System.currentTimeMillis(); logger.info("took " + (end - begin) + " ms"); } // end createIdf
private void createIdf(Vocabulary voc, int l) { long begin = System.currentTimeMillis(); logger.info("creating idf vector..."); Iidf = new float[voc.entrySet().size()]; //logger.debug("Iidf.size: " + Iidf.length); // iterates over the types Iterator it = voc.entrySet().iterator(); while (it.hasNext()) { Map.Entry me = (Map.Entry) it.next(); String term = (String) me.getKey(); Vocabulary.TermFrequency tf = (Vocabulary.TermFrequency) me.getValue(); int index = termIndex.get(term); Iidf[index] = (float) log2((double) l / tf.get()); //logger.info(index + ": " + l + "/"+ tf.get() + " = " + Iidf[index]); } // end while //for (int i=0;i<Iidf.length;i++) // logger.info(i + " " + Iidf[i]); long end = System.currentTimeMillis(); logger.info("took " + (end - begin) + " ms"); } // end createIdf
private void createIdf(Vocabulary voc, int l) { long begin = System.currentTimeMillis(); logger.info("creating idf matrix..."); Iidf = new double[voc.entrySet().size()]; //logger.debug("Iidf.size: " + Iidf.length); // iterates over the types Iterator it = voc.entrySet().iterator(); while (it.hasNext()) { Map.Entry me = (Map.Entry) it.next(); String term = (String) me.getKey(); Vocabulary.TermFrequency tf = (Vocabulary.TermFrequency) me.getValue(); int index = termIndex.get(term); Iidf[index] = log2((double) l / tf.get()); //logger.info(index + ": " + l + "/"+ tf.get() + " = " + Iidf[index]); } // end while //for (int i=0;i<Iidf.length;i++) // logger.info(i + " " + Iidf[i]); long end = System.currentTimeMillis(); logger.info("took " + (end - begin) + " ms"); } // end createIdf
/** * Returns a document in the VSM. */ public Node[] mapDocument(BOW bow) { //logger.debug("mapDocument"); SortedSet<Node> nodes = new TreeSet<Node>(); String term; int index; //double tf; double tfIdf; Iterator<String> it = bow.termSet().iterator(); for (int i = 0; it.hasNext(); i++) { term = it.next(); index = termIndex.get(term); if (index != -1) { //tfIdf = bow.augmentedFrequency(term) * Iidf[index]; //tfIdf = bow.logarithmicFrequency(term) * Iidf[index]; tfIdf = bow.tf(term) * Iidf[index]; //logger.info("\t"+term + "\t" + index + "\ttf= " + bow.augmentedFrequency(term) + "\tidf=" + Iidf[index] + " (" + tfIdf + ")"); nodes.add(new Node(index, tfIdf)); } //else { // logger.debug(i+"\t"+term+"\t"+index); //} } return nodes.toArray(new Node[nodes.size()]); }
/** * Returns a document in the VSM. */ public Vector mapDocument(BOW bow) { //logger.info("lsm.mapDocument"); SparseVector vector = new SparseVector(); Iterator<String> it = bow.termSet().iterator(); String term = null; int index = 0; //int tf = 0; float tfIdf; for (int i = 0; it.hasNext(); i++) { //logger.info(i + " " + t[i]); term = it.next(); index = termIndex.get(term); if (index != -1) { //tf = bow.getFrequency(term); //tfIdf = (float) (log2(tf)) * Iidf[index]; //tfIdf = (float) bow.augmentedFrequency(term) * Iidf[index]; tfIdf = (float) bow.tf(term) * Iidf[index]; //logger.info(term + " ==> " + index + ", tf.idf = " + tf + "(" + (log2(tf)) + ") * " + Iidf[index] + " = " + tfIdf); vector.add(index, tfIdf); } } // end for return vector; } // end map
/** * Returns a document in the VSM. */ public Vector mapDocument(BOW bow, boolean b) { //logger.info("lsm.mapDocument " + b); SparseVector vector = new SparseVector(); Iterator<String> it = bow.termSet().iterator(); for (int i = 0; it.hasNext(); i++) { //logger.info(i + " " + t[i]); String term = it.next(); int index = termIndex.get(term); if (index != -1) { int tf = bow.getFrequency(term); float tfIdf = (float) (log2(tf)); if (b) { tfIdf *= Iidf[index]; } //logger.info(term + " ==> " + index + ", tf.idf = " + tf + "(" + (log2(tf)) + ") * " + Iidf[index] + " = " + tfIdf); vector.add(index, tfIdf); } } // end for return vector; } // end map