/** Increment the statistics by the specified amount */ public void addStatistics(CollectionStatistics cs) { numberOfDocuments += cs.getNumberOfDocuments(); numberOfPointers += cs.getNumberOfPointers(); numberOfTokens += cs.getNumberOfTokens(); numberOfUniqueTerms = Math.max(cs.getNumberOfUniqueTerms(), numberOfUniqueTerms); final long[] otherFieldTokens = cs.getFieldTokens(); for(int fi=0;fi<numberOfFields;fi++) fieldTokens[fi] += otherFieldTokens[fi]; relcaluateAverageLengths(); }
/** Increment the statistics by the specified amount */ public void addStatistics(CollectionStatistics cs) { numberOfDocuments += cs.getNumberOfDocuments(); numberOfPointers += cs.getNumberOfPointers(); numberOfTokens += cs.getNumberOfTokens(); numberOfUniqueTerms = Math.max(cs.getNumberOfUniqueTerms(), numberOfUniqueTerms); final long[] otherFieldTokens = cs.getFieldTokens(); for(int fi=0;fi<numberOfFields;fi++) fieldTokens[fi] += otherFieldTokens[fi]; relcaluateAverageLengths(); }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); p = new double[fieldCount]; fieldWeights = new double[fieldCount]; this.fieldNormalisations = new Normalisation[fieldCount]; try{ for(int fi=0;fi<fieldCount;fi++) { fieldWeights[fi] = Double.parseDouble(ApplicationSetup.getProperty("w."+ fi, ""+1.0)); final Normalisation nf = normClass.newInstance(); this.fieldNormalisations[fi] = nf; final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); p[fi] = 1.0d / ((double)fieldCount * (double)_cs.getNumberOfDocuments()); //System.err.println("p["+fi+"]="+ p[fi]); p[fi] = p[fi] / Double.parseDouble( ApplicationSetup.getProperty("p." + fi, "1.0d")); //System.err.println("p["+fi+"]="+ p[fi]); } } catch (Exception e) { throw new IllegalArgumentException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); p = new double[fieldCount]; fieldWeights = new double[fieldCount]; this.fieldNormalisations = new Normalisation[fieldCount]; try{ for(int fi=0;fi<fieldCount;fi++) { final Normalisation nf = this.fieldNormalisations[fi] = normClass.newInstance(); final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); p[fi] = 1.0d / ((double)fieldCount * (double) _cs.getNumberOfDocuments()); p[fi] = p[fi] / (fieldWeights[fi] = Double.parseDouble( ApplicationSetup.getProperty("p." + fi, "1.0d"))); } } catch (Exception e) { throw new IllegalArgumentException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); p = new double[fieldCount]; fieldWeights = new double[fieldCount]; this.fieldNormalisations = new Normalisation[fieldCount]; try{ for(int fi=0;fi<fieldCount;fi++) { final Normalisation nf = this.fieldNormalisations[fi] = normClass.newInstance(); final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); p[fi] = 1.0d / ((double)fieldCount * (double) _cs.getNumberOfDocuments()); p[fi] = p[fi] / (fieldWeights[fi] = Double.parseDouble( ApplicationSetup.getProperty("p." + fi, "1.0d"))); } } catch (Exception e) { throw new IllegalArgumentException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); int fieldCount = _cs.getNumberOfFields(); if (fieldCount < 1) throw new IllegalStateException("Fields must be 1 or more"); long tokens = 0; final long[] tokensf = _cs.getFieldTokens(); for(int fieldId : activeFieldIds) { tokens += tokensf[fieldId]; } super.numberOfTokens = tokens; super.averageDocumentLength = (double)tokens / (double)_cs.getNumberOfDocuments(); basicModel.setCollectionStatistics( new CollectionStatistics(_cs.getNumberOfDocuments(), _cs.getNumberOfUniqueTerms(), tokens, _cs.getNumberOfPointers(), new long[0])); }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); p = new double[fieldCount]; fieldWeights = new double[fieldCount]; this.fieldNormalisations = new Normalisation[fieldCount]; try{ for(int fi=0;fi<fieldCount;fi++) { fieldWeights[fi] = Double.parseDouble(ApplicationSetup.getProperty("w."+ fi, ""+1.0)); final Normalisation nf = normClass.newInstance(); this.fieldNormalisations[fi] = nf; final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); p[fi] = 1.0d / ((double)fieldCount * (double)_cs.getNumberOfDocuments()); //System.err.println("p["+fi+"]="+ p[fi]); p[fi] = p[fi] / Double.parseDouble( ApplicationSetup.getProperty("p." + fi, "1.0d")); //System.err.println("p["+fi+"]="+ p[fi]); } } catch (Exception e) { throw new IllegalArgumentException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); if (fieldCount < 1) throw new IllegalStateException("Fields must be 1 or more"); basicModel.setNumberOfDocuments(_cs.getNumberOfDocuments()); basicModel.setNumberOfTokens(_cs.getNumberOfTokens()); fieldNormalisations = new Normalisation[fieldCount]; fieldGlobalFrequencies = new double[fieldCount]; fieldWeights = new double[fieldCount]; try { for(int fi=0;fi<fieldCount;fi++) { fieldWeights[fi] = Double.parseDouble(ApplicationSetup.getProperty("w."+ fi, ""+1.0)); Normalisation nf = this.fieldNormalisations[fi] = normClass.newInstance(); final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); } } catch (Exception e) { throw new IllegalStateException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); if (fieldCount < 1) throw new IllegalStateException("Fields must be 1 or more"); basicModel.setNumberOfDocuments(_cs.getNumberOfDocuments()); basicModel.setNumberOfTokens(_cs.getNumberOfTokens()); fieldNormalisations = new Normalisation[fieldCount]; fieldGlobalFrequencies = new double[fieldCount]; fieldWeights = new double[fieldCount]; try { for(int fi=0;fi<fieldCount;fi++) { fieldWeights[fi] = Double.parseDouble(ApplicationSetup.getProperty("w."+ fi, ""+1.0)); Normalisation nf = this.fieldNormalisations[fi] = normClass.newInstance(); final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); } } catch (Exception e) { throw new IllegalStateException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); int fieldCount = _cs.getNumberOfFields(); if (fieldCount < 1) throw new IllegalStateException("Fields must be 1 or more"); long tokens = 0; final long[] tokensf = _cs.getFieldTokens(); for(int fieldId : activeFieldIds) { tokens += tokensf[fieldId]; } super.numberOfTokens = tokens; super.averageDocumentLength = (double)tokens / (double)_cs.getNumberOfDocuments(); basicModel.setCollectionStatistics( new CollectionStatistics(_cs.getNumberOfDocuments(), _cs.getNumberOfUniqueTerms(), tokens, _cs.getNumberOfPointers(), new long[0])); }
assertEquals(1, index.getCollectionStatistics().getNumberOfDocuments()); assertEquals(2, index.getCollectionStatistics().getNumberOfFields()); assertEquals(2, index.getCollectionStatistics().getFieldTokens()[0]); assertEquals(7, index.getCollectionStatistics().getFieldTokens()[1]); assertEquals(9, index.getDocumentIndex().getDocumentLength(0)); Matching matching = makeMatching(index); assertEquals( index.getCollectionStatistics().getNumberOfDocuments(), fatIndex.getCollectionStatistics().getNumberOfDocuments()); assertEquals(index.getCollectionStatistics().getNumberOfFields(), fatIndex.getCollectionStatistics().getNumberOfFields()); assertEquals(index.getCollectionStatistics().getFieldTokens()[0], fatIndex.getCollectionStatistics().getFieldTokens()[0]); assertEquals(index.getCollectionStatistics().getFieldTokens()[1], fatIndex.getCollectionStatistics().getFieldTokens()[1]);
assertEquals(611, index.getIntIndexProperty("num.field.1.Tokens", -1)); assertEquals(2, index.getCollectionStatistics().getNumberOfFields()); assertEquals(123, index.getCollectionStatistics().getFieldTokens()[0]); assertEquals(611, index.getCollectionStatistics().getFieldTokens()[1]);