@Override public int skip(int n) { int i = 0; for (; hasNext && i < n; i++) { advance(); } return i; }
/** * Load a word map from a file * * @param file The file containing the word map * @return A new word map instance * @throws IOException If the file cannot be read */ public static WordMap fromFile(File file) throws IOException { return fromFile(file, false); }
public LDATrain(File corpus, int K, int J, int W, double alpha, double beta) throws IOException { this.K = K; this.J = J; this.W = W; this.alpha = alpha; this.beta = beta; this.N_kj = new int[K][J]; this.N_lkw = new int[2][K][W]; this.N_lk = new int[2][K]; this.corpus = AssignmentBuffer.interleavedFrom(corpus); this.P = new double[K]; }
public static void main(String[] args) throws Exception { final CLIOpts opts = new CLIOpts(args); final File histFile = opts.roFile("history", "The history list"); final boolean future = opts.flag("future", "Future history format"); final PrintStream out = opts.outFileOrStdout(); if(!opts.verify(Hist.class)) { return; } hist(histFile, future, out); }
public static void main(String[] args) throws Exception { final CLIOpts opts = new CLIOpts(args); final File file = opts.roFile("file", "The file to count uniqueness among"); final PrintStream out = opts.outFileOrStdout(); if (!opts.verify(Uniq.class)) { return; } uniq(file, out); }
public Object2IntMap<NGram>[] nextNGramPair(int N) throws IOException { final int[][] pair = nextPair(); if (pair == null) { return null; } else { final NGramCarousel carousel = new NGramCarousel(N); final Object2IntRBTreeMap<NGram>[] ngramPair = new Object2IntRBTreeMap[]{ new Object2IntRBTreeMap(), new Object2IntRBTreeMap() }; for (int l = 0; l < 2; l++) { for (int i = 0; i < pair[l].length; i++) { carousel.offer(pair[l][i]); for (int n = 1; n <= carousel.maxNGram(); n++) { final NGram ng = carousel.ngram(n); if (ngramPair[l].containsKey(ng)) { ngramPair[l].put(ng, ngramPair[l].getInt(ng) + 1); } else { ngramPair[l].put(ng, 1); } } } } return ngramPair; } }
@Override public IntIterator iterator() { try { return new DataInputStreamAsIntIterator(CLIOpts.openInputAsMaybeZipped(file)); } catch(IOException x) { throw new RuntimeException(x); } }
public NGram history() { return new NGram(Arrays.copyOfRange(ngram, 0, ngram.length-1)); }
public SparseIntArray[][] readAll(int W) throws IOException { final ArrayList<SparseIntArray[]> sparseArrays = new ArrayList<SparseIntArray[]>(); SparseIntArray[] sa; while ((sa = nextFreqPair(W)) != null) { sparseArrays.add(sa); } return sparseArrays.toArray(new SparseIntArray[sparseArrays.size()][]); }
@Override public Integer next() { return nextInt(); }
public NGram(int[] ngram) { this.ngram = ngram; assert ngramNonnegative(ngram); }
@Override public int compareTo(NGram o) { if(o.ngram.length < this.ngram.length) { return -1; } else if(o.ngram.length > this.ngram.length) { return +1; } else { for(int i = 0; i < this.ngram.length; i++) { final int c = compare(this.ngram[i], o.ngram[i]); if(c != 0) { return c; } } return 0; } }
public Object2IntMap<NGram>[][] readAllNGrams(int N) throws IOException { final ArrayList<Object2IntMap<NGram>[]> sparseArrays = new ArrayList<Object2IntMap<NGram>[]>(); Object2IntMap<NGram>[] sa; while ((sa = nextNGramPair(N)) != null) { sparseArrays.add(sa); } return sparseArrays.toArray(new Object2IntMap[sparseArrays.size()][]); }
public int getNext() throws IOException { if (buf == null || (pos != 0 && pos % bufSize == 0)) { loadBuf(); } final int i = buf.getInt(); pos += 4; return i; }
public SparseIntArray[] nextFreqPair(int W) throws IOException { final int[][] pair = nextPair(); if (pair == null) { return null; } else { return new SparseIntArray[]{ SparseIntArray.histogram(pair[0], W), SparseIntArray.histogram(pair[1], W) }; } }
@Override public int compare(NGram o1, NGram o2) { final double salience1 = salience.getDouble(o1); final double salience2 = salience.getDouble(o2); if(salience1 < salience2) { return +1; } else if(salience1 > salience2) { return -1; } else { return o1.compareTo(o2); } } });
public NGram future() { return new NGram(Arrays.copyOfRange(ngram, 1, ngram.length)); }
public DataInputStreamAsIntIterator(InputStream is) { this.data = new DataInputStream(is); advance(); }
public CPLSATrain(File corpus, int J, int W, int K, double alpha, double beta) throws IOException { this.buf = AssignmentBuffer.interleavedFrom(corpus); this.W = W; this.K = K; this.J = J; this.alpha = alpha; this.beta = beta; this.N_jl = new int[J][L]; this.phi_lwk = new double[L][W][K]; this.theta_lkj = new double[L][K][J]; this.N_lkj = new int[L][K][J]; this.N_lwk = new int[L][W][K]; this.N_lk = new int[L][K]; }
@Override public int nextInt() { if (!hasNext) { throw new NoSuchElementException(); } int rv = next; advance(); return rv; }