private static void processDoc(IntArrayList doc, IntArrayList foreignDoc, int N, PrintWriter[] outs, BetaSimFunction beta, int W) throws IOException { final NGramCarousel carousel = new NGramCarousel(N);
@Override public double scoreNGrams(IntList document, int W) { double salience = 0.0; final NGramCarousel carousel = new NGramCarousel(N); for(int w : document) { carousel.offer(w); for(int i = 1; i < carousel.maxNGram(); i++) { if(ngrams.containsKey(carousel.ngram(i))) { salience += 1.0; } } } return salience / document.size(); }
final NGramCarousel carousel = new NGramCarousel(N); while (true) { try {
public static Object2IntMap<NGram> ngramHistogram(DataInputStream stream, int W, int N) throws IOException { final DataInputStream dis = new DataInputStream(stream); final Object2IntMap<NGram> arr = new Object2IntRBTreeMap<NGram>(); final NGramCarousel carousel = new NGramCarousel(N); while (dis.available() > 0) { try { final int w = dis.readInt(); if (w != 0) { carousel.offer(w); if (carousel.maxNGram() >= N) { final NGram ngram = carousel.ngram(N); if (!arr.containsKey(ngram)) { arr.put(ngram, 1); } else { arr.put(ngram, arr.get(ngram) + 1); } } } else { carousel.reset(); } } catch (EOFException x) { break; } } return arr; }
public static Object2DoubleMap<NGram> countInReference(final File reference, final int N) throws IOException { final Object2DoubleMap<NGram> counts = new Object2DoubleRBTreeMap<NGram>(); final DataInputStream in = new DataInputStream(CLIOpts.openInputAsMaybeZipped(reference)); final NGramCarousel carousel = new NGramCarousel(N); int docNo = 0; while (true) { try { final int w = in.readInt(); if (w != 0) { //if (inDoc(docNo, sourceType)) { carousel.offer(w); for (int n = 1; n <= carousel.maxNGram(); n++) { final NGram ngram = carousel.ngram(n); if (counts.containsKey(ngram)) { counts.put(ngram, counts.getDouble(ngram) + 1.0); } else { counts.put(ngram, 1.0); } } //} } else { carousel.reset(); docNo++; } } catch (EOFException x) { break; } } return counts; }
public static Object2DoubleMap<NGram> countInCorpus(final File reference, final int N, final ObjectSet<NGram> referenceNGrams, SourceType sourceType) throws IOException { final Object2DoubleMap<NGram> counts = new Object2DoubleRBTreeMap<NGram>(); final DataInputStream in = new DataInputStream(CLIOpts.openInputAsMaybeZipped(reference)); final NGramCarousel carousel = new NGramCarousel(N); int docNo = 0; while (true) {
public static Object2IntMap<NGram> ngramHistogram(int[] data, int W, int N) throws IOException { final Object2IntMap<NGram> arr = new Object2IntRBTreeMap<NGram>(); final NGramCarousel carousel = new NGramCarousel(N); for (int w : data) { if (w != 0) { carousel.offer(w); if (carousel.maxNGram() >= N) { final NGram ngram = carousel.ngram(N); if (!arr.containsKey(ngram)) { arr.put(ngram, 1); } else { arr.put(ngram, arr.get(ngram) + 1); } } } else { carousel.reset(); } } return arr; } }
public Object2IntMap<NGram>[] nextNGramPair(int N) throws IOException { final int[][] pair = nextPair(); if (pair == null) { return null; } else { final NGramCarousel carousel = new NGramCarousel(N); final Object2IntRBTreeMap<NGram>[] ngramPair = new Object2IntRBTreeMap[]{ new Object2IntRBTreeMap(), new Object2IntRBTreeMap() }; for (int l = 0; l < 2; l++) { for (int i = 0; i < pair[l].length; i++) { carousel.offer(pair[l][i]); for (int n = 1; n <= carousel.maxNGram(); n++) { final NGram ng = carousel.ngram(n); if (ngramPair[l].containsKey(ng)) { ngramPair[l].put(ng, ngramPair[l].getInt(ng) + 1); } else { ngramPair[l].put(ng, 1); } } } } return ngramPair; } }