@Override public void map(Chunk[] cs) { int wordPos = (int) cs[0].start(); int pos = _wordVecSize * wordPos; for (int i = 0; i < cs[0]._len; i++) { _words[wordPos++] = cs[0].atStr(new BufferedString(), i); for (int j = 1; j < cs.length; j++) _syn0[pos++] = (float) cs[j].atd(i); } _job.update(1); }
private int nextLength() { if (_len >= 0) return _len; if (_pos >= _chk._len) return -1; _len = 0; BufferedString tmp = new BufferedString(); for (; _pos < _chk._len && ! _chk.isNA(_pos) && _len < MAX_SENTENCE_LEN; _pos++) { BufferedString str = _chk.atStr(tmp, _pos); if (! _vocab.containsKey(str)) continue; // not in the vocab, skip if (_sentSampleRate > 0) { // sub-sampling while creating a sentence long count = _wordCounts.get(str)._val; float ran = (float) ((Math.sqrt(count / (_sentSampleRate * _vocabWordCount)) + 1) * (_sentSampleRate * _vocabWordCount) / count); if (ran * 65536 < cheapRandInt(0xFFFF)) continue; } _sent[_len++] = _vocab.get(tmp); } _sent[_len] = -1; _pos++; return _len; }
@Override public void map(Chunk cs) { _counts = new IcedHashMap<>(); for (int i = 0; i < cs._len; i++) { if (cs.isNA(i)) continue; BufferedString str = cs.atStr(new BufferedString(), i); IcedLong count = _counts.get(str); if (count != null) count._val++; else _counts.put(str, new IcedLong(1)); } }
@Override public void map(Chunk[] cs, NewChunk[] ncs) { assert cs.length == 1; Chunk chk = cs[0]; BufferedString tmp = new BufferedString(); for (int i = 0; i < chk._len; i++) { if (chk.isNA(i)) { for (NewChunk nc : ncs) nc.addNA(); } else { BufferedString word = chk.atStr(tmp, i); float[] vs = _model.transform(word); if (vs == null) for (NewChunk nc : ncs) nc.addNA(); else for (int j = 0; j < ncs.length; j++) ncs[j].addNum(vs[j]); } } } }
break chunkLoop; // we just closed a sequence that was left open in one of the previous chunks } else { BufferedString word = chk.atStr(tmp, i); float[] vs = _model.transform(word); if (vs != null) {
@Override public void map(Chunk[] cs, NewChunk[] ncs) { for (int i = 0; i < cs[0]._len; i++) { BufferedString bs = cs[0].atStr(new BufferedString(), i); int val = bs == null ? 0 : Integer.parseInt(bs.toString()); ncs[0].addStr(bs); ncs[1].addNum(val); ncs[2].addNum(i); ncs[3].addUUID(i, val); } } }.doAll(new byte[]{Vec.T_STR, Vec.T_NUM, Vec.T_NUM, Vec.T_UUID}, tmpFrm).outputFrame();
@Override public void map(Chunk[] cs) { for (int i = 0; i < cs[0]._len; i++) { BufferedString bs = cs[0].atStr(new BufferedString(), i); int expectedVal = bs == null ? 0 : Integer.parseInt(bs.toString()); int expectedIndex = (int) cs[2].atd(i); Assert.assertEquals((double) expectedVal, cs[1].atd(i), 0.00001); Assert.assertEquals(expectedIndex, (int) cs[3].at16l(i)); Assert.assertEquals(expectedVal, (int) cs[3].at16h(i)); } } });
@Override public void map(Chunk[] cs) { int wordPos = (int) cs[0].start(); int pos = _wordVecSize * wordPos; for (int i = 0; i < cs[0]._len; i++) { _words[wordPos++] = cs[0].atStr(new BufferedString(), i); for (int j = 1; j < cs.length; j++) _syn0[pos++] = (float) cs[j].atd(i); } _job.update(1); }
private int nextLength() { if (_len >= 0) return _len; if (_pos >= _chk._len) return -1; _len = 0; BufferedString tmp = new BufferedString(); for (; _pos < _chk._len && ! _chk.isNA(_pos) && _len < MAX_SENTENCE_LEN; _pos++) { BufferedString str = _chk.atStr(tmp, _pos); if (! _vocab.containsKey(str)) continue; // not in the vocab, skip if (_sentSampleRate > 0) { // sub-sampling while creating a sentence long count = _wordCounts.get(str)._val; float ran = (float) ((Math.sqrt(count / (_sentSampleRate * _vocabWordCount)) + 1) * (_sentSampleRate * _vocabWordCount) / count); if (ran * 65536 < cheapRandInt(0xFFFF)) continue; } _sent[_len++] = _vocab.get(tmp); } _sent[_len] = -1; _pos++; return _len; }
@Override public void map(Chunk cs) { _counts = new IcedHashMap<>(); for (int i = 0; i < cs._len; i++) { if (cs.isNA(i)) continue; BufferedString str = cs.atStr(new BufferedString(), i); IcedLong count = _counts.get(str); if (count != null) count._val++; else _counts.put(str, new IcedLong(1)); } }
@Override public void map(Chunk[] cs, NewChunk[] ncs) { assert cs.length == 1; Chunk chk = cs[0]; BufferedString tmp = new BufferedString(); for (int i = 0; i < chk._len; i++) { if (chk.isNA(i)) { for (NewChunk nc : ncs) nc.addNA(); } else { BufferedString word = chk.atStr(tmp, i); float[] vs = _model.transform(word); if (vs == null) for (NewChunk nc : ncs) nc.addNA(); else for (int j = 0; j < ncs.length; j++) ncs[j].addNum(vs[j]); } } } }
break chunkLoop; // we just closed a sequence that was left open in one of the previous chunks } else { BufferedString word = chk.atStr(tmp, i); float[] vs = _model.transform(word); if (vs != null) {