Vector d = lsm.mapDocument(bow); Vector pd = lsm.mapPseudoDocument(d); pw.print(s[i]);
/** * */ public float compare(BOW bow1, BOW bow2) { Vector d1 = lsm.mapDocument(bow1); Vector d2 = lsm.mapDocument(bow2); Vector pd1 = lsm.mapPseudoDocument(d1); Vector pd2 = lsm.mapPseudoDocument(d2); //float cosVSM = d1.dotProduct(d2) / (float) Math.sqrt(d1.dotProduct(d2) * d2.dotProduct(d2)); //float dotLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(d1.dotProduct(pd2) * d2.dotProduct(pd2)); float dotLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(pd1.dotProduct(pd1) * pd2.dotProduct(pd2)); return dotLSM; } // end compare
Vector vec = lsm.mapDocument(bow); vec.normalize();
Vector createVector(List<String[]> list) { BOW bow = new BOW(); String[] s; String[] leftContext; String[] rightContext; for (int i = 0; i < list.size(); i++) { s = list.get(i); leftContext = spacePattern.split(s[OneExamplePerSenseExtractor.LEFT_CONTEXT_INDEX].toLowerCase()); rightContext = spacePattern.split(s[OneExamplePerSenseExtractor.RIGHT_CONTEXT_INDEX].toLowerCase()); bow.addAll(leftContext); bow.addAll(rightContext); } Vector d = lsm.mapDocument(bow); Vector pd = lsm.mapPseudoDocument(d); d.normalize(); pd.normalize(); return pd; }
@Override public void processLine(String line) { //To change body of implemented methods use File | Settings | File Templates. String[] tokens = spacePattern.split(line); if (tokens.length < 2) { return; } try { BOW bow = new BOW(); for (int i = 1; i < tokens.length; i++) { //logger.debug(i + "\t'" + tokens[i] + "'\t" + tokens[0]); bow.add(tokens[i].toLowerCase()); } Vector d = lsm.mapDocument(bow); Vector pd = lsm.mapPseudoDocument(d); d.normalize(); pd.normalize(); synchronized (this) { vectorWriter.print(tokens[0]); //vectorWriter.print(CharacterTable.HORIZONTAL_TABULATION); //vectorWriter.print(bow.toSingleLine()); vectorWriter.print(CharacterTable.HORIZONTAL_TABULATION); vectorWriter.print(pd.toString()); vectorWriter.print(CharacterTable.HORIZONTAL_TABULATION); vectorWriter.println(d.toString()); } } catch (Exception e) { logger.error("Error processing page " + tokens[0]); } }
@Override public void contentPage(String text, String title, int wikiID) { try { WikiMarkupParser wikiMarkupParser = WikiMarkupParser.getInstance(); //logger.debug(title + "\t" + wikiID); String[] prefixes = {filePrefix, imagePrefix}; ParsedPage parsedPage = wikiMarkupParser.parsePage(text, prefixes); String page = tokenizedText(parsedPage, title); BOW bow = new BOW(page.toLowerCase()); Vector d = lsm.mapDocument(bow); Vector pd = lsm.mapPseudoDocument(d); d.normalize(); pd.normalize(); synchronized (this) { vectorWriter.print(title); vectorWriter.print(CharacterTable.HORIZONTAL_TABULATION); vectorWriter.print(pd.toString()); vectorWriter.print(CharacterTable.HORIZONTAL_TABULATION); vectorWriter.println(d.toString()); } } catch (Exception e) { logger.error("Error processing page " + title + " (" + wikiID + ")"); } }
Example(String page, List<String[]> list) { this.page = page; freq = list.size(); totalFreq += freq; StringBuilder sb = new StringBuilder(); bow = new BOW(); String[] s; String[] leftContext; String[] rightContext; for (int i = 0; i < list.size(); i++) { s = list.get(i); try { leftContext = spacePattern.split(s[LEFT_CONTEXT_INDEX].toLowerCase()); rightContext = spacePattern.split(s[RIGHT_CONTEXT_INDEX].toLowerCase()); bow.addAll(leftContext); bow.addAll(rightContext); } catch (Exception e) { logger.error(e); } } bowVector = lsm.mapDocument(bow); if (normalized) { bowVector.normalize(); } lsVector = lsm.mapPseudoDocument(bowVector); //bowVector.normalize(); if (normalized) { lsVector.normalize(); } }
Vector enD1 = enLsm.mapDocument(bow1); enD1.normalize(); log.println("enD1:" + enD1); log.println("enM1:" + enM1); Vector itD1 = itLsm.mapDocument(bow1); itD1.normalize(); log.println("itD1:" + itD1);
/** * dot[0] is cosine * dot[1] is lsa */ public float[] compare2(BOW bow1, BOW bow2) { Vector d1 = lsm.mapDocument(bow1); Vector d2 = lsm.mapDocument(bow2); Vector pd1 = lsm.mapPseudoDocument(d1); Vector pd2 = lsm.mapPseudoDocument(d2); float[] dot = new float[2]; dot[0] = d1.dotProduct(d2) / (float) Math.sqrt(d1.dotProduct(d1) * d2.dotProduct(d2)); //float dotLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(d1.dotProduct(pd2) * d2.dotProduct(pd2)); dot[1] = pd1.dotProduct(pd2) / (float) Math.sqrt(pd1.dotProduct(pd1) * pd2.dotProduct(pd2)); return dot; } // end compare2
Vector enD1 = enLsm.mapDocument(bow1); enD1.normalize(); log.println("enD1:" + enD1); log.println("enM1:" + enM1); Vector itD1 = itLsm.mapDocument(bow1); itD1.normalize(); log.println("itD1:" + itD1);
log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow2:" + bow2); Vector d1 = lsm.mapDocument(bow1); log.println("d1:" + d1); Vector d2 = lsm.mapDocument(bow2); log.println("d2:" + d2);
Vector d1 = mapDocument(bow1); Vector d2 = mapDocument(bow2);
logger.info(bow1); Vector d1 = lsm.mapDocument(bow1); d1.normalize(); log.println("d1:" + d1); log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow2:" + bow2); Vector d2 = lsm.mapDocument(bow2); d2.normalize(); log.println("d2:" + d2);
Vector d1 = lsm.mapDocument(bow1); Vector d2 = lsm.mapDocument(bow2);
public Category[] classify(String text) { String tokenizedText = tokenizer.tokenizedString(text); BOW bow = new BOW(tokenizedText.toLowerCase()); Vector d = lsm.mapDocument(bow); Vector pd = lsm.mapPseudoDocument(d); d.normalize(); pd.normalize(); Node[] nd = d.toNodeArray(); Node[] npd = pd.toNodeArray(); //logger.debug(Node.toString(nd)); //logger.debug(Node.toString(npd)); Category[] categoryArray = new Category[pageCategoryMap.size()]; Iterator<String> it = pageCategoryMap.keySet().iterator(); for (int i = 0; it.hasNext(); i++) { String page = it.next(); Entry entry = pageCategoryMap.get(page); Node[][] nodes = entry.getNodes(); Node[] cd = nodes[1]; Node[] cpd = nodes[0]; double cbow = Node.dot(nd, cd); double clsa = Node.dot(npd, cpd); double combo = (cbow + clsa) / 2; categoryArray[i] = new Category(entry.getCategory(), cbow, clsa); //logger.debug(i + "\t" + categories[i] + "\t" + cbow + "\t" + clsa + "\t" + combo); } Arrays.sort(categoryArray); return categoryArray; }
private Node[][] mapInstance(String[] s) { Tokenizer tokenizer = HardTokenizer.getInstance(); BOW bow = new BOW(); String[] left = tokenizer.stringArray(s[2].toLowerCase()); bow.addAll(left); if (s.length == 5) { String[] right = tokenizer.stringArray(s[4].toLowerCase()); bow.addAll(right); } logger.debug(bow); Vector bowVector = lsm.mapDocument(bow); Vector lsVector = lsm.mapPseudoDocument(bowVector); if (normalized) { bowVector.normalize(); lsVector.normalize(); } logger.debug("bow\t" + bowVector); //logger.debug("lsi\t" + lsVector); Node[][] nodes = new Node[2][]; nodes[0] = bowVector.toNodeArray(); nodes[1] = lsVector.toNodeArray(); return nodes; }