public static TrieParser textToNumberTrieParser() { if (textToNumberParser == null) { TrieParser p = new TrieParser(8,false); //supports streaming, so false p.setUTF8Value("%i%.", 1); //p.setUTF8Value("%i%.%/%.", 1); //the above pattern can parse 234 and 234.34 and 23/45 and 23.4/56.7 //it also supports the normal capture methods for ints and decimals with no change //TrieParserReader.capturedLongField(parserReader, 0) textToNumberParser = p; } return textToNumberParser; }
private static TrieParser stringBeginParser() { //TODO: double check that slash and " if appearing inside UTF8 encoding do not get picked up as an escape.. TrieParser trie = new TrieParser(256,1,false,true); trie.setValue(JSONConstants.string221, STRING_PART | 0x2200); trie.setValue(JSONConstants.string222, STRING_END | 0x2200); return trie; }
public String debugRouterMap() { return routerMap.toString(); }
void recurseAltBranch(int pos, int offset) { if (data[pos] == TrieParser.TYPE_ALT_BRANCH) { pos++; assert(data[pos]>=0): "bad value "+data[pos]; assert(data[pos+1]>=0): "bad value "+data[pos+1]; altBranch( pos, offset, (((int)data[pos++])<<15) | (0x7FFF&data[pos++]), data[pos]); } else { pushAlt(pos, offset); } }
assert(isValidSize(value)); assert(sourceLength<=source.length); assert((sourceMask&sourcePos)<=source.length); longestKnown = Math.max(longestKnown, computeMax(source, sourcePos, sourceLength, sourceMask)); shortestKnown = Math.min(shortestKnown, sourceLength); writeEnd(writeRuns(insertAltBranch(0, pos-1, source, sourcePos1, sourceLength1, sourceMask), source, sourcePos1, sourceLength1, sourceMask), value); int jumpMask = computeJumpMask((short) v, data[pos1++]); pos = 0==jumpMask? 1+pos1 : 1+(jumpMask&((((int)data[pos1++])<<15) | (0x7FFF&data[pos1])))+pos1; altBranch(pos, sourcePos, (((int)data[pos++])<<15) | (0x7FFF&data[pos++]), data[pos]); int[] choices = selectDataSpecificOrderedChoices(source, sourcePos, sourceMask); int selectedStackPos = choseOptimalPathFromStack(choices); extractions[activeExtractionCount++] = second; maxExtractedFields = Math.max(maxExtractedFields, activeExtractionCount); if (isNumber(second)) { writeEnd(writeRuns(appendAltBranch(pos-1, source, sourcePos, insertLengthNumericCapture, sourceMask), source, sourcePos, insertLengthNumericCapture, sourceMask), value); return; writeEnd(writeRuns(appendAltBranch(pos-1, source, sourcePos, insertLengthBytesCapture, sourceMask), source, sourcePos, insertLengthBytesCapture, sourceMask), value); insertAtBranchValueAlt(pos, source, sourceLength, sourceMask, value, length, runPos, run, r+afterWhileRun, sourcePos-2); //TODO: this count can be off by buried extractions.
@Ignore public void testNow() { TrieParser headerMap = new TrieParser(1024,true); String newHeaderMap = null; headerMap.setUTF8Value("%b: %b\n", UNKNOWN_HEADER_ID); //TODO: bug in trie if we attemp to set this first... newHeaderMap = headerMap.toString(); assertTrue("After adding value we expect the map to have changed but was \n"+newHeaderMap,!lastHeaderMap.equals(newHeaderMap)); lastHeaderMap = newHeaderMap; headerMap.setUTF8Value("%b: %b\r\n", UNKNOWN_HEADER_ID); newHeaderMap = headerMap.toString(); assertTrue("After adding value we expect the map to have changed but was \n"+newHeaderMap,!lastHeaderMap.equals(newHeaderMap)); lastHeaderMap = newHeaderMap; headerMap.setUTF8Value(shr[x].readingTemplate(), "\n",shr[x].ordinal()); headerMap.setUTF8Value(shr[x].readingTemplate(), "\r\n",shr[x].ordinal()); newHeaderMap = headerMap.toString(); assertTrue("After adding value we expect the map to have changed but was \n"+newHeaderMap,!lastHeaderMap.equals(newHeaderMap)); lastHeaderMap = newHeaderMap; headerMap.setUTF8Value("\r\n", END_OF_HEADER_ID); headerMap.setUTF8Value("\n", END_OF_HEADER_ID); //Detecting this first but not right!! we did not close the revision??
/** * Used to add ids and templates to GreenTokenMaps * @param id long id to add to GreenTokenMap * @param templatePart1 CharSequence template to add to GreenTokenMap * @param templatePart2 CharSequence template to add to GreenTokenMap */ public GreenTokenMap addTemplate(long id, CharSequence templatePart1, CharSequence templatePart2) { tp.setUTF8Value(templatePart1, templatePart2, id); return this; }
public GreenTokenMap(boolean ignoreCase) { boolean skipDeepChecks = false; boolean supportsExtraction = true; tp = new TrieParser(128, 4, skipDeepChecks, supportsExtraction, ignoreCase); }
private void visitNumeric(TrieParserVisitor pv, int i, byte[] buffer, int bufferPosition) { assert(TYPE_VALUE_NUMERIC == data[i]); i++;//skip over the ID; buffer[bufferPosition++] = TYPE_VALUE_NUMERIC; buffer[bufferPosition++] = (byte)data[i++]; //type visitPatterns(pv,i,buffer,bufferPosition); }
private static void visitor_initForQuery(TrieParserReader reader, TrieParser trie, byte[] source, int sourcePos, long unfoundResult) { reader.capturedPos = 0; reader.capturedBlobArray = source; //working vars reader.pos = 0; reader.runLength = 0; reader.localSourcePos = sourcePos; reader.result = unfoundResult; reader.normalExit = true; reader.altStackPos = 0; if (null==reader.capturedValues || (reader.capturedValues.length>>2)<trie.maxExtractedFields()) { reader.capturedValues = new int[4*(1+trie.maxExtractedFields())*4]; } assert(trie.getLimit()>0) : "SequentialTrieParser must be setup up with data before use."; reader.type = trie.data[reader.pos++]; }
public int getSize() { return tp.getLimit(); }
private void reportCorruptStream(String label, ClientConnection cc) { StringBuilder builder = new StringBuilder(); TrieParserReader.debugAsUTF8(trieReader, builder, Math.min(trieReader.sourceLen,revisionMap.longestKnown()*2),false); logger.warn("{} looking for {} but found:\n{}\n\n",cc.id,label,builder); }
public static void capturedFieldSetValue(TrieParserReader reader, int idx, TrieParser trie, long value) { int pos = idx*4; assert(pos < reader.capturedValues.length) : "Either the idx argument is too large or TrieParseReader was not constructed to hold this many fields"; int type = reader.capturedValues[pos++]; assert(type==0); int bpos = reader.capturedValues[pos++]; int blen = reader.capturedValues[pos++]; int bmsk = reader.capturedValues[pos++]; trie.setValue(reader.capturedBlobArray, bpos, blen, bmsk, value); }
public void debugRouterMap(String name) { try { routerMap.toDOTFile(File.createTempFile(name,".dot")); } catch (IOException e) { } }
if (startingLength1 >= (revisionMap.shortestKnown()+1)) { TrieParserReader.loadPositionMemo(trieReader, positionMemoData, memoIdx); if (-1==revisionId && (trieReader.sourceLen < (revisionMap.longestKnown()+1))) { foundWork = 0;//we must exit to give the other stages a chance to fix this issue maxIter = 0;
/** * Used to add ids and templates to GreenTokenMaps * @param id long id to add to GreenTokenMap * @param templatePart1 CharSequence template to add to GreenTokenMap * @param templatePart2 CharSequence template to add to GreenTokenMap * @param templatePart3 CharSequence template to add to GreenTokenMap */ public GreenTokenMap addTemplate(long id, CharSequence templatePart1, CharSequence templatePart2, CharSequence templatePart3) { tp.setUTF8Value(templatePart1, templatePart2, templatePart3, id); return this; }
public JSONFieldSchema(int nullPosition) { this.mappings = new JSONFieldMapping[0]; this.parser = new TrieParser(256,2,false,true); JSONStreamParser.populateWithJSONTokens(parser); }
private void visitBytes(TrieParserVisitor pv, int i, byte[] buffer, int bufferPosition) { assert(TYPE_VALUE_BYTES == data[i]); i++;//skip over the ID; buffer[bufferPosition++] = TYPE_VALUE_BYTES; buffer[bufferPosition++] = (byte)data[i++]; //stopper visitPatterns(pv,i,buffer,bufferPosition); }
private static void visitorInitForQuery(TrieParserReader reader, TrieParser trie, byte[] source, int sourcePos, long unfoundResult, long noMatchResult) { reader.capturedPos = 0; reader.capturedBlobArray = source; //working vars reader.pos = 0; reader.runLength = 0; reader.localSourcePos = sourcePos; reader.result = unfoundResult; reader.unfoundConstant = unfoundResult; reader.noMatchConstant = noMatchResult; reader.normalExit = true; reader.altStackPos = 0; if (trie.maxExtractedFields()>0) { if (null==reader.capturedValues || (reader.capturedValues.length>>2)<trie.maxExtractedFields()) { reader.capturedValues = new int[4*(1+trie.maxExtractedFields())*4]; } } assert(trie.getLimit()>0) : "SequentialTrieParser must be setup up with data before use."; reader.type = trie.data[reader.pos++]; }
public static long query(TrieParserReader reader, TrieParser trie, byte[] source, int sourcePos, long sourceLength, int sourceMask, final long unfoundResult, final long noMatchResult) { return (TrieParser.getLimit(trie)>0) ? query2(reader, trie, source, sourcePos, sourceLength, sourceMask, unfoundResult, noMatchResult): unfoundResult; }