/** {@inheritDoc} */ public int getFrequency() { int freq = 0; for (LexiconEntry le : children) if (le != null) freq += le.getFrequency(); return freq; }
/** * {@inheritDoc} */ public void count(LexiconEntry value) { numberOfTokens += value.getFrequency(); numberOfPointers += value.getDocumentFrequency(); numberOfTerms++; } /**
public void count(LexiconEntry value) { numberOfTokens += value.getFrequency(); numberOfPointers += value.getDocumentFrequency(); numberOfTerms++; int[] fieldFreqs = ((FieldLexiconEntry)value).getFieldFrequencies(); for(int fi = 0; fi < numFields; fi++) { tokensF[fi] += (long)fieldFreqs[fi]; } }
/** * {@inheritDoc} */ public String toString() { return '('+getDocumentFrequency()+","+getFrequency()+')' + pointerToString(); } /**
/** * {@inheritDoc} */ public String toString() { return '('+getDocumentFrequency()+","+getFrequency()+')' + pointerToString(); } /**
protected TIntArrayList[] createPointerForTerm(LexiconEntry le) { TIntArrayList[] tmpArray = new TIntArrayList[4+fieldCount]; final int tmpNT = le.getDocumentFrequency(); for(int i = 0; i < fieldCount+3; i++) tmpArray[i] = new TIntArrayList(tmpNT); tmpArray[fieldCount+3] = new TIntArrayList(le.getFrequency()); return tmpArray; }
/** Add statistics for this posting onto the given LexiconEntry */ public void addToLexiconEntry(LexiconEntry le) { le.setStatistics(le.getDocumentFrequency() + termDf, le.getFrequency() + termTF); if (maxtf > le.getMaxFrequencyInDocuments()) le.setMaxFrequencyInDocuments(maxtf); }
/** * Returns the weight of a term with the given * term identifier, computed by the specified * query expansion model. * @param termId int the term identifier to set the weight for. * @param model QueryExpansionModel the used query expansion model. * @return double the weight of the specified term. */ public double getExpansionWeight(int termId, QueryExpansionModel model){ double score = 0; ExpansionTerm o = terms.get(termId); if (o != null) { double TF = 0; //double Nt = 0; Map.Entry<String, LexiconEntry> lee = lexicon.getLexiconEntry(termId); TF = lee.getValue().getFrequency(); //Nt = lee.getValue().getDocumentFrequency(); score = model.score(o.getWithinDocumentFrequency(), TF, this.totalDocumentLength, this.numberOfTokens, this.averageDocumentLength ); } return score; }
/** * Returns the weight of a term with the given * term identifier, computed by the specified * query expansion model. * @param termId int the term identifier to set the weight for. * @param model QueryExpansionModel the used query expansion model. * @return double the weight of the specified term. */ public double getExpansionWeight(int termId, QueryExpansionModel model){ double score = 0; ExpansionTerm o = terms.get(termId); if (o != null) { double TF = 0; //double Nt = 0; Map.Entry<String, LexiconEntry> lee = lexicon.getLexiconEntry(termId); TF = lee.getValue().getFrequency(); //Nt = lee.getValue().getDocumentFrequency(); score = model.score(o.getWithinDocumentFrequency(), TF, this.totalDocumentLength, this.numberOfTokens, this.averageDocumentLength ); } return score; }
TF[i] = le.next().getValue().getFrequency(); i++;
@Test public void test_iterator() throws Exception { MemoryLexicon lexicon = new MemoryLexicon(); assertNotNull(lexicon); for (int i = 0; i < 10; i++) lexicon.term(terms[i].toString(), entries[i]); Iterator<Entry<String, LexiconEntry>> it = lexicon.iterator(); assertNotNull(it); int i = 0; while (it.hasNext()) { Entry<String, LexiconEntry> kv1 = it.next(); Entry<String, LexiconEntry> kv2 = lexicon.getLexiconEntry(i++); assertEquals(kv1.getKey(), kv2.getKey()); assertEquals(kv1.getValue().getTermId(), kv2.getValue().getTermId()); assertEquals(kv1.getValue().getDocumentFrequency(), kv2.getValue().getDocumentFrequency()); assertEquals(kv1.getValue().getFrequency(), kv2.getValue().getFrequency()); } }
@Test public void test_getLexiconEntry2() throws Exception { MemoryLexicon lexicon = new MemoryLexicon(); assertNotNull(lexicon); for (int i = 0; i < 10; i++) lexicon.term(terms[i].toString(), entries[i]); for (int i = 0; i < 10; i++) { Entry<String, LexiconEntry> kv = lexicon.getLexiconEntry(i); assertEquals("t" + i, kv.getKey()); assertEquals(i, kv.getValue().getTermId()); assertEquals(i + 1, kv.getValue().getDocumentFrequency()); assertEquals(i + 1, kv.getValue().getFrequency()); } }
@Test public void test_getIthLexiconEntry() throws Exception { MemoryLexicon lexicon = new MemoryLexicon(); assertNotNull(lexicon); for (int i = 0; i < 10; i++) lexicon.term(terms[i].toString(), entries[i]); for (int i = 0; i < 10; i++) { Entry<String, LexiconEntry> kv = lexicon.getIthLexiconEntry(i); assertEquals("t" + i, kv.getKey()); assertEquals(i, kv.getValue().getTermId()); assertEquals(i + 1, kv.getValue().getDocumentFrequency()); assertEquals(i + 1, kv.getValue().getFrequency()); } }
@Test public void test_incrementTerm2() throws Exception { MemoryLexicon lexicon = new MemoryLexicon(); assertNotNull(lexicon); for (int i = 0; i < 10; i++) lexicon.term(terms[i].toString(), entries[i]); for (int i = 0; i < 10; i++) lexicon.term(terms[i].toString(), entries[i]); assertEquals(10, lexicon.numberOfEntries()); for (int i = 0; i < 10; i++) { Entry<String, LexiconEntry> kv = lexicon.getLexiconEntry(i); assertEquals("t" + i, kv.getKey()); assertEquals(i, kv.getValue().getTermId()); assertEquals((i + 1) * 2, kv.getValue().getDocumentFrequency()); assertEquals((i + 1) * 2, kv.getValue().getFrequency()); } }
@Test public void test_getLexiconEntry1() throws Exception { MemoryLexicon lexicon = new MemoryLexicon(); assertNotNull(lexicon); for (int i = 0; i < 10; i++) lexicon.term(terms[i].toString(), entries[i]); for (int i = 0; i < 10; i++) { LexiconEntry le = lexicon.getLexiconEntry(terms[i].toString()); assertEquals(i, le.getTermId()); assertEquals(i + 1, le.getDocumentFrequency()); assertEquals(i + 1, le.getFrequency()); } }
@Test public void testTwoTermThreeOccurrence() throws Exception { Index index = createLexiconIndex(new String[]{"a", "b", "a"}); Lexicon<String> lexicon = index.getLexicon(); assertEquals(2, lexicon.numberOfEntries()); assertNotNull(lexicon.getLexiconEntry("a")); assertEquals(2, lexicon.getLexiconEntry("a").getFrequency()); assertNotNull(lexicon.getLexiconEntry("b")); assertEquals(1, lexicon.getLexiconEntry("b").getFrequency()); }
protected void checkFrequencies(Index index) { LexiconEntry le = null; le = index.getLexicon().getLexiconEntry("dramatis"); assertNotNull(le); assertEquals(6, le.getFrequency()); assertEquals(5, le.getDocumentFrequency()); le = index.getLexicon().getLexiconEntry("personae"); assertNotNull(le); assertEquals(6, le.getFrequency()); assertEquals(5, le.getDocumentFrequency()); le = index.getLexicon().getLexiconEntry("isnae"); assertNotNull(le); assertEquals(1, le.getFrequency()); assertEquals(1, le.getDocumentFrequency()); }
@Test public void testOneTermOneOccurrence() throws Exception { Index index = createLexiconIndex(new String[]{"a"}); Lexicon<String> lexicon = index.getLexicon(); assertEquals(1, lexicon.numberOfEntries()); assertNotNull(lexicon.getLexiconEntry("a")); assertEquals(1, lexicon.getLexiconEntry("a").getFrequency()); assertEquals("a", lexicon.getIthLexiconEntry(0).getKey()); assertEquals("a", lexicon.getLexiconEntry(0).getKey()); }
@Test public void testOneTermTwoOccurrence() throws Exception { Index index = createLexiconIndex(new String[]{"a", "a"}); Lexicon<String> lexicon = index.getLexicon(); assertEquals(1, lexicon.numberOfEntries()); assertNotNull(lexicon.getLexiconEntry("a")); assertEquals(2, lexicon.getLexiconEntry("a").getFrequency()); }
@SuppressWarnings("unchecked") public static void checkContents(Index index, String term, int freq, int[] docids, int[] freqs, int[] docLens) throws Exception { Lexicon<String> lexicon = index.getLexicon(); assertNotNull(lexicon); LexiconEntry le = lexicon.getLexiconEntry(term); assertNotNull(le); assertEquals(freq, le.getFrequency()); assertEquals(docids.length, le.getDocumentFrequency()); PostingIndex<Pointer> inverted = (PostingIndex<Pointer>) index.getInvertedIndex(); assertNotNull(inverted); IterablePosting ip = inverted.getPostings(le); assertNotNull(ip); for(int i=0;i<docids.length;i++) { assertEquals(docids[i], ip.next()); assertEquals(docids[i], ip.getId()); assertEquals(freqs[i], ip.getFrequency()); assertEquals(docLens[i],ip.getDocumentLength()); } assertEquals(IterablePosting.EOL, ip.next()); }