eu.fbk.utils.lsa.LSM java code examples

public VectorExtractor(int numThreads, int numPages, String lsaRoot, int dim, boolean normalized) throws IOException {
  super(numThreads, numPages);
  if (!lsaRoot.endsWith(File.separator)) {
    lsaRoot += File.separator;
  }
  logger.info("reading lsm model from " + lsaRoot + " (" + dim + ")...");
  File Ut = new File(lsaRoot + "X-Ut");
  File Sk = new File(lsaRoot + "X-S");
  File r = new File(lsaRoot + "X-row");
  File c = new File(lsaRoot + "X-col");
  File df = new File(lsaRoot + "X-df");
  boolean rescaleIdf = true;
  lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf, normalized);
}

/**
 *
 */
public float compare(BOW bow1, BOW bow2) {
  Vector d1 = lsm.mapDocument(bow1);
  Vector d2 = lsm.mapDocument(bow2);
  Vector pd1 = lsm.mapPseudoDocument(d1);
  Vector pd2 = lsm.mapPseudoDocument(d2);
  //float cosVSM = d1.dotProduct(d2) / (float) Math.sqrt(d1.dotProduct(d2) * d2.dotProduct(d2));
  //float dotLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(d1.dotProduct(pd2) * d2.dotProduct(pd2));
  float dotLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(pd1.dotProduct(pd1) * pd2.dotProduct(pd2));
  return dotLSM;
} // end compare

Vector vec = lsm.mapDocument(bow);
vec.normalize();
Vector pseudoVec = lsm.mapPseudoDocument(vec);
pseudoVec.normalize();
sb = new StringBuilder();
sb.append(pseudoVec.toString());
sb.append(CharacterTable.HORIZONTAL_TABULATION);
sb.append(vec.toString(lsm.getDimension()));
set.add(sb.toString());

public static void main(String[] args) throws Exception {
  String logConfig = System.getProperty("log-config");
  if (logConfig == null) {
    logConfig = "log-config.txt";
  }
  long begin = System.currentTimeMillis();
  PropertyConfigurator.configure(logConfig);
  if (args.length != 5) {
    logger.info(getHelp());
    System.exit(1);
  }
  File Ut = new File(args[0] + "-Ut");
  File Sk = new File(args[0] + "-S");
  File r = new File(args[0] + "-row");
  File c = new File(args[0] + "-col");
  File df = new File(args[0] + "-df");
  double threshold = Double.parseDouble(args[1]);
  int size = Integer.parseInt(args[2]);
  int dim = Integer.parseInt(args[3]);
  boolean rescaleIdf = Boolean.parseBoolean(args[4]);
  LSM LSM = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf);
  LSM.interactive();
  long end = System.currentTimeMillis();
  logger.info("term similarity calculated in " + (end - begin) + " ms");
} // end main

LSM lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf);
LSSimilarity lss = new LSSimilarity(lsm, size);
    log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow2:" + bow2);
    Vector d1 = lsm.mapDocument(bow1);
    log.println("d1:" + d1);
    Vector d2 = lsm.mapDocument(bow2);
    log.println("d2:" + d2);
    Vector pd1 = lsm.mapPseudoDocument(d1);
    log.println("pd1:" + pd1);
    Vector pd2 = lsm.mapPseudoDocument(d2);
    log.println("pd2:" + pd2);

Vector d1 = lsm.mapDocument(bow1);
Vector d2 = lsm.mapDocument(bow2);
Vector pd1 = lsm.mapPseudoDocument(d1);
Vector pd2 = lsm.mapPseudoDocument(d2);
  long begin = System.nanoTime();
  ScoreTermMap map = new ScoreTermMap(query, size);
  Vector vec1 = lsm.mapTerm(query);
  Iterator<String> it = lsm.terms();
  while (it.hasNext()) {
    term = it.next();
    Vector vec2 = lsm.mapTerm(term);
    float cos = vec1.dotProduct(vec2) / (float) Math
        .sqrt(vec1.dotProduct(vec1) * vec2.dotProduct(vec2));

/**
 * Returns a document in the VSM.
 */
public Vector mapDocument(BOW bow, boolean b) {
  //logger.info("lsm.mapDocument " + b);
  SparseVector vector = new SparseVector();
  Iterator<String> it = bow.termSet().iterator();
  for (int i = 0; it.hasNext(); i++) {
    //logger.info(i + " " + t[i]);
    String term = it.next();
    int index = termIndex.get(term);
    if (index != -1) {
      int tf = bow.getFrequency(term);
      float tfIdf = (float) (log2(tf));
      if (b) {
        tfIdf *= Iidf[index];
      }
      //logger.info(term + " ==> " + index + ", tf.idf = " + tf + "(" + (log2(tf)) + ") * " + Iidf[index] + " = " + tfIdf);
      vector.add(index, tfIdf);
    }
  } // end for
  return vector;
} // end map

/**
 *
 */
public float compare(String term1, String term2) throws TermNotFoundException {
  Vector x1 = lsm.mapTerm(term1);
  Vector x2 = lsm.mapTerm(term2);
  float cos = x1.dotProduct(x2) / (float) Math.sqrt(x1.dotProduct(x1) * x2.dotProduct(x2));
  return cos;
} // end compare

LSM lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf);
LSSimilarity lss = new LSSimilarity(lsm, size);
  logger.info(bow1);
  Vector d1 = lsm.mapDocument(bow1);
  d1.normalize();
  log.println("d1:" + d1);
  Vector pd1 = lsm.mapPseudoDocument(d1);
  pd1.normalize();
  log.println("pd1:" + pd1);
    log.println(i + ":" + j + "(" + s1[0] + ":" + s2[0] + ") bow2:" + bow2);
    Vector d2 = lsm.mapDocument(bow2);
    d2.normalize();
    log.println("d2:" + d2);
    Vector pd2 = lsm.mapPseudoDocument(d2);
    pd2.normalize();
    log.println("pd2:" + pd2);

Vector d1 = mapDocument(bow1);
Vector d2 = mapDocument(bow2);
Vector pd1 = mapPseudoDocument(d1);
Vector pd2 = mapPseudoDocument(d2);
  long begin = System.nanoTime();
  ScoreTermMap map = new ScoreTermMap(query, 20);
  Vector vec1 = mapTerm(query);
  Iterator<String> it = terms();
  while (it.hasNext()) {
    term = it.next();
    Vector vec2 = mapTerm(term);
    double cos = vec1.dotProduct(vec2) / Math.sqrt(vec1.dotProduct(vec1) * vec2.dotProduct(vec2));
    map.put(cos, term);

/**
 * Returns a document in the VSM.
 */
public Vector mapDocument(MultiSet<String> bow) {
  //logger.info("lsm.mapDocument");
  SparseVector vector = new SparseVector();
  Iterator<String> it = bow.iterator();
  String term = null;
  int index = 0;
  int tf = 0;
  float tfIdf;
  for (int i = 0; it.hasNext(); i++) {
    //logger.info(i + " " + t[i]);
    term = it.next();
    index = termIndex.get(term);
    if (index != -1) {
      tf = bow.getFrequency(term);
      tfIdf = (float) (log2(tf)) * Iidf[index];
      //logger.info(term + " ==> " + index + ", tf.idf = " + tf + "(" + (log2(tf)) + ") * " + Iidf[index] + " = " + tfIdf);
      vector.add(index, tfIdf);
    }
  } // end for
  return vector;
} // end map

/**
 * dot[0] is cosine
 * dot[1] is lsa
 */
public float[] compare2(BOW bow1, BOW bow2) {
  Vector d1 = lsm.mapDocument(bow1);
  Vector d2 = lsm.mapDocument(bow2);
  Vector pd1 = lsm.mapPseudoDocument(d1);
  Vector pd2 = lsm.mapPseudoDocument(d2);
  float[] dot = new float[2];
  dot[0] = d1.dotProduct(d2) / (float) Math.sqrt(d1.dotProduct(d1) * d2.dotProduct(d2));
  //float dotLSM = pd1.dotProduct(pd2) / (float) Math.sqrt(d1.dotProduct(pd2) * d2.dotProduct(pd2));
  dot[1] = pd1.dotProduct(pd2) / (float) Math.sqrt(pd1.dotProduct(pd1) * pd2.dotProduct(pd2));
  return dot;
} // end compare2

public WikipediaVectorExtractor(int numThreads, int numPages, Locale locale, String lsaRoot) throws IOException {
  super(numThreads, numPages, locale);
  if (!lsaRoot.endsWith(File.separator)) {
    lsaRoot += File.separator;
  }
  File Ut = new File(lsaRoot + "X-Ut");
  File Sk = new File(lsaRoot + "X-S");
  File r = new File(lsaRoot + "X-row");
  File c = new File(lsaRoot + "X-col");
  File df = new File(lsaRoot + "X-df");
  int dim = 100;
  boolean rescaleIdf = true;
  lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf);
}

Vector d = lsm.mapDocument(bow);
Vector pd = lsm.mapPseudoDocument(d);
pw.print(s[i]);
pw.print(" ");

  public static void main(String[] args) throws Exception {
    String logConfig = System.getProperty("log-config");
    if (logConfig == null) {
      logConfig = "log-config.txt";
    }

    PropertyConfigurator.configure(logConfig);

    if (args.length != 6) {
      System.out.println(
          "Usage: java -mx512M eu.fbk.utils.lsa.util.NgramComparator input threshold size dim idf file");
      System.exit(1);
    }

    File Ut = new File(args[0] + "-Ut");
    File Sk = new File(args[0] + "-S");
    File r = new File(args[0] + "-row");
    File c = new File(args[0] + "-col");
    File df = new File(args[0] + "-df");
    double threshold = Double.parseDouble(args[1]);
    int size = Integer.parseInt(args[2]);
    int dim = Integer.parseInt(args[3]);
    boolean rescaleIdf = Boolean.parseBoolean(args[4]);

    LSM lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf);
    LSSimilarity lss = new LSSimilarity(lsm, size);

    new NgramComparator(args[5], lss);
  } // end main
} // end NgramComparator

Vector createVector(List<String[]> list) {
  BOW bow = new BOW();
  String[] s;
  String[] leftContext;
  String[] rightContext;
  for (int i = 0; i < list.size(); i++) {
    s = list.get(i);
    leftContext = spacePattern.split(s[OneExamplePerSenseExtractor.LEFT_CONTEXT_INDEX].toLowerCase());
    rightContext = spacePattern.split(s[OneExamplePerSenseExtractor.RIGHT_CONTEXT_INDEX].toLowerCase());
    bow.addAll(leftContext);
    bow.addAll(rightContext);
  }
  Vector d = lsm.mapDocument(bow);
  Vector pd = lsm.mapPseudoDocument(d);
  d.normalize();
  pd.normalize();
  return pd;
}

File doc = new File(args[3]);
LSM lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf);
LSSimilarity lss = new LSSimilarity(lsm, size);
double qt = Double.parseDouble(args[4]);

@Override
public void processLine(String line) {
  //To change body of implemented methods use File | Settings | File Templates.
  String[] tokens = spacePattern.split(line);
  if (tokens.length < 2) {
    return;
  }
  try {
    BOW bow = new BOW();
    for (int i = 1; i < tokens.length; i++) {
      //logger.debug(i + "\t'" + tokens[i] + "'\t" + tokens[0]);
      bow.add(tokens[i].toLowerCase());
    }
    Vector d = lsm.mapDocument(bow);
    Vector pd = lsm.mapPseudoDocument(d);
    d.normalize();
    pd.normalize();
    synchronized (this) {
      vectorWriter.print(tokens[0]);
      //vectorWriter.print(CharacterTable.HORIZONTAL_TABULATION);
      //vectorWriter.print(bow.toSingleLine());
      vectorWriter.print(CharacterTable.HORIZONTAL_TABULATION);
      vectorWriter.print(pd.toString());
      vectorWriter.print(CharacterTable.HORIZONTAL_TABULATION);
      vectorWriter.println(d.toString());
    }
  } catch (Exception e) {
    logger.error("Error processing page " + tokens[0]);
  }
}

boolean rescaleIdf = Boolean.parseBoolean(args[4]);
LSM lsm = new LSM(Ut, Sk, r, c, df, dim, rescaleIdf);
LSSimilarity lss = new LSSimilarity(lsm, size);

@Override
public void contentPage(String text, String title, int wikiID) {
  try {
    WikiMarkupParser wikiMarkupParser = WikiMarkupParser.getInstance();
    //logger.debug(title + "\t" + wikiID);
    String[] prefixes = {filePrefix, imagePrefix};
    ParsedPage parsedPage = wikiMarkupParser.parsePage(text, prefixes);
    String page = tokenizedText(parsedPage, title);
    BOW bow = new BOW(page.toLowerCase());
    Vector d = lsm.mapDocument(bow);
    Vector pd = lsm.mapPseudoDocument(d);
    d.normalize();
    pd.normalize();
    synchronized (this) {
      vectorWriter.print(title);
      vectorWriter.print(CharacterTable.HORIZONTAL_TABULATION);
      vectorWriter.print(pd.toString());
      vectorWriter.print(CharacterTable.HORIZONTAL_TABULATION);
      vectorWriter.println(d.toString());
    }
  } catch (Exception e) {
    logger.error("Error processing page " + title + " (" + wikiID + ")");
  }
}

Javadoc

Maps text into the latent semantic space.

This class is equals to LSM but uses Vector instead of Node.

Most used methods

<init>
mapDocument
Returns a document in the VSM.
mapPseudoDocument
Returns a document in the latent semantic space.
getDimension
getHelp
Returns a command-line help. return a command-line help.
interactive
log2
mapTerm
Returns a term in the VSM
terms

Popular in Java

Reactive rest calls using spring rest template
compareTo (BigDecimal)
onRequestPermissionsResult (Fragment)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
JFrame (javax.swing)
JPanel (javax.swing)
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Top Vim plugins

How to useLSM in eu.fbk.utils.lsa

Best Java code snippets using eu.fbk.utils.lsa.LSM (Showing top 20 results out of 315)

How to use
LSM
in
eu.fbk.utils.lsa