eu.fbk.utils.lsa.Index java code examples

/**
 * Returns the index of this term in the VSM.
 */
public int termIndex(String term) throws TermNotFoundException {
  return termIndex.get(term);
} // end termIndex

public Iterator<String> terms() {
  return termIndex.itemSet().iterator();
} // end terms

public int termCount() {
  return termIndex.size();
} // end termCount

termIndex = new Index();
termIndex.read(new InputStreamReader(new FileInputStream(rowFile), "UTF-8"));
documentIndex = new Index();
documentIndex.read(new InputStreamReader(new FileInputStream(colFile), "UTF-8"));
int l = documentNumber = documentIndex.itemSet().size();
logger.info(l + " documents");

File dfFile = new File(matrixName + "-df");
termIndex = new Index();
documentIndex = new Index();
if (saveMatrix) {
  matrixWriter = new SparseBinaryMatrixFileWriter(matrixFile);
    logger.info(
        lineCount + "\t" + ((double) (end.getTime() - start1.getTime()) / 1000) + " total s (" + end
            + "), voc size:" + corpusVocabulary.size() + ", term index size:" + termIndex.size()
            + ", totalKW: " + totalKW);
  termIndex.write(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(rowFile), "UTF-8")));
  documentIndex.write(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(colFile), "UTF-8")));

int documentID = documentIndex.get(array[0]);
  Vocabulary.TermFrequency tf = (Vocabulary.TermFrequency) me.getValue();
  int ti = termIndex.add(term);
  indexes[j] = ti;

/**
 * Closes the readers.
 */
public void close() throws IOException {
  //
  termIndex.write(new FileWriter(rowFile));
  //
  documentIndex.write(new FileWriter(colFile));
  //
  matrixWriter.close();
  //
  corpusVocabulary.write(new FileWriter(dfFile));
} // end close

/**
 * Constructs a reader.
 */
public TermDocumentMatrixBuilder(String matrixName, File stopwordFile, File keywordFile) throws IOException {
  totalKW = 0;
  keywordSet = new TermSet();
  keywordSet.read(new FileReader(keywordFile));
  logger.info("keyword to be indexed: " + keywordSet.size());
  stopwordSet = new TermSet();
  stopwordSet.read(new FileReader(stopwordFile));
  logger.info(stopwordFile + "(" + stopwordSet.size() + ")");
  lengthFreq = new int[101];
  columnCount = 0;
  matrixFile = new File(matrixName + "-matrix");
  rowFile = new File(matrixName + "-row");
  colFile = new File(matrixName + "-col");
  dfFile = new File(matrixName + "-df");
  termIndex = new Index();
  documentIndex = new Index();
  matrixWriter = new SparseBinaryMatrixFileWriter(matrixFile);
  corpusVocabulary = new Vocabulary();
} // end constructor

int documentID = documentIndex.add(array[0]);
  indexes[j] = termIndex.add(term);

termIndex = new Index();
termIndex.read(new InputStreamReader(new FileInputStream(rowFile), "UTF-8"));
documentIndex = new Index();
documentIndex.read(new InputStreamReader(new FileInputStream(colFile), "UTF-8"));
int l = documentNumber = documentIndex.itemSet().size();
logger.info(l + " documents");

termIndex = new Index();
termIndex.read(new InputStreamReader(new FileInputStream(rowFile), "UTF-8"));
documentIndex = new Index();
documentIndex.read(new InputStreamReader(new FileInputStream(colFile), "UTF-8"));
int l = documentNumber = documentIndex.itemSet().size();
logger.info(l + " documents");

/**
 * Returns the idf of the specified term if present in the index; -1 otherwise.
 */
public float getIdf(String term) {
  int index = termIndex.get(term);
  if (index == -1) {
    return 0;
  }
  return Iidf[index];
} // end getIdf

public Iterator<String> documents() {
  return documentIndex.itemSet().iterator();
} // end terms

public int termCount() {
  return termIndex.size();
} // end termCount

/**
 * Returns the idf of the specified term if present in the index; -1 otherwise.
 */
public double getIdf(String term) {
  int index = termIndex.get(term);
  if (index == -1) {
    return 0;
  }
  return Iidf[index];
} // end getIdf

public Iterator<String> documents() {
  return documentIndex.itemSet().iterator();
} // end terms

public int termCount() {
  return termIndex.size();
} // end termCount

/**
 * Returns the idf of the specified term if present in the index; -1 otherwise.
 */
public float getIdf(String term) {
  int index = termIndex.get(term);
  if (index == -1) {
    return 0;
  }
  return Iidf[index];
} // end getIdf

public Iterator<String> terms() {
  return termIndex.itemSet().iterator();
} // end terms

/**
 * Returns the index of this term in the VSM.
 */
public int termIndex(String term) throws TermNotFoundException {
  return termIndex.get(term);
} // end termIndex

Javadoc

This class maps items into indexes. The items are stored in alphabetical order.

(index \t item)+

Most used methods

<init>
Constructs a Index object.
add
Returns the index of the specified item and adds the item to the termIndex if it is not present yet.
get
Returns the index of the specified item and adds the item to the termIndex if it is not present yet.
itemSet
read
Reads the feature termIndex from the specified input stream. This method processes input in terms of
size
write
Writes the feature termIndex into the specified output stream in a format suitable for loading into

Popular in Java

Parsing JSON documents to java classes using gson
getSupportFragmentManager (FragmentActivity)
runOnUiThread (Activity)
findViewById (Activity)
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Join (org.hibernate.mapping)
Top Vim plugins

How to useIndex in eu.fbk.utils.lsa

Best Java code snippets using eu.fbk.utils.lsa.Index (Showing top 20 results out of 315)

How to use
Index
in
eu.fbk.utils.lsa