/** * Returns a document in the VSM. */ public Node[] mapDocument(BOW bow) { //logger.debug("mapDocument"); SortedSet<Node> nodes = new TreeSet<Node>(); String term; int index; //double tf; double tfIdf; Iterator<String> it = bow.termSet().iterator(); for (int i = 0; it.hasNext(); i++) { term = it.next(); index = termIndex.get(term); if (index != -1) { //tfIdf = bow.augmentedFrequency(term) * Iidf[index]; //tfIdf = bow.logarithmicFrequency(term) * Iidf[index]; tfIdf = bow.tf(term) * Iidf[index]; //logger.info("\t"+term + "\t" + index + "\ttf= " + bow.augmentedFrequency(term) + "\tidf=" + Iidf[index] + " (" + tfIdf + ")"); nodes.add(new Node(index, tfIdf)); } //else { // logger.debug(i+"\t"+term+"\t"+index); //} } return nodes.toArray(new Node[nodes.size()]); }
int tf; double tfIdf; Iterator<String> it = bow.termSet().iterator(); for (int i = 0; it.hasNext(); i++) { term = it.next();
/** * Returns a document in the VSM. */ public Vector mapDocument(BOW bow) { //logger.info("lsm.mapDocument"); SparseVector vector = new SparseVector(); Iterator<String> it = bow.termSet().iterator(); String term = null; int index = 0; //int tf = 0; float tfIdf; for (int i = 0; it.hasNext(); i++) { //logger.info(i + " " + t[i]); term = it.next(); index = termIndex.get(term); if (index != -1) { //tf = bow.getFrequency(term); //tfIdf = (float) (log2(tf)) * Iidf[index]; //tfIdf = (float) bow.augmentedFrequency(term) * Iidf[index]; tfIdf = (float) bow.tf(term) * Iidf[index]; //logger.info(term + " ==> " + index + ", tf.idf = " + tf + "(" + (log2(tf)) + ") * " + Iidf[index] + " = " + tfIdf); vector.add(index, tfIdf); } } // end for return vector; } // end map
/** * Returns a document in the VSM. */ public Vector mapDocument(BOW bow, boolean b) { //logger.info("lsm.mapDocument " + b); SparseVector vector = new SparseVector(); Iterator<String> it = bow.termSet().iterator(); for (int i = 0; it.hasNext(); i++) { //logger.info(i + " " + t[i]); String term = it.next(); int index = termIndex.get(term); if (index != -1) { int tf = bow.getFrequency(term); float tfIdf = (float) (log2(tf)); if (b) { tfIdf *= Iidf[index]; } //logger.info(term + " ==> " + index + ", tf.idf = " + tf + "(" + (log2(tf)) + ") * " + Iidf[index] + " = " + tfIdf); vector.add(index, tfIdf); } } // end for return vector; } // end map
double tfIdf; int current = 0; Iterator<String> it = bow.termSet().iterator(); for (int i = 0; it.hasNext(); i++) { term = it.next();
float tfIdf; int current = 0; Iterator<String> it = bow.termSet().iterator(); for (int i = 0; it.hasNext(); i++) { term = it.next();