private boolean shouldRender(String name, Object value) { if (debugRendering) return true; if (name.startsWith(VESPA_HIDDEN_FIELD_PREFIX)) return false; if (value instanceof CharSequence && ((CharSequence) value).length() == 0) return false; // StringFieldValue cannot hold a null, so checking length directly is OK: if (value instanceof StringFieldValue && ((StringFieldValue) value).getString().isEmpty()) return false; if (value instanceof NanNumber) return false; return true; }
public static void printStringXml(StringFieldValue s, XmlStream xml) { String content = s.getString(); if (containsNonPrintableCharactersString(content)) { byte[] bytecontent = Utf8.toBytes(content); xml.addAttribute("binaryencoding", "base64"); xml.addContent(new Base64(0).encodeToString(bytecontent)); } else { xml.addContent(content); } }
@Override protected void checkCompatibility(DataType fieldType) { if (fieldType instanceof ArrayDataType) { if (!(value instanceof IntegerFieldValue)) { throw new IllegalArgumentException("Expected integer, got " + value.getClass().getName() + "."); } update.checkCompatibility(((ArrayDataType)fieldType).getNestedType()); } else if (fieldType instanceof WeightedSetDataType) { ((WeightedSetDataType)fieldType).getNestedType().createFieldValue().assign(value); update.checkCompatibility(DataType.INT); } else if (fieldType instanceof StructuredDataType) { if (!(value instanceof StringFieldValue)) { throw new IllegalArgumentException("Expected string, got " + value.getClass().getName() + "."); } Field field = ((StructuredDataType)fieldType).getField(((StringFieldValue)value).getString()); if (field == null) { throw new IllegalArgumentException("Field '" + value + "' not found."); } update.checkCompatibility(field.getDataType()); } else { throw new UnsupportedOperationException("Field type " + fieldType.getName() + " not supported."); } }
/** * Annotates the given string with the appropriate linguistics annotations. * * @param text the text to annotate * @return whether or not anything was annotated */ public boolean annotate(StringFieldValue text) { if (text.getSpanTree(SpanTrees.LINGUISTICS) != null) return true; // Already annotated with LINGUISTICS. Tokenizer tokenizer = factory.getTokenizer(); String input = (text.getString().length() <= config.getMaxTokenizeLength()) ? text.getString() : text.getString().substring(0, config.getMaxTokenizeLength()); Iterable<Token> tokens = tokenizer.tokenize(input, config.getLanguage(), config.getStemMode(), config.getRemoveAccents()); TermOccurrences termOccurrences = new TermOccurrences(config.getMaxTermOccurrences()); SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS); for (Token token : tokens) { addAnnotationSpan(text.getString(), tree.spanList(), tokenizer, token, config.getStemMode(), termOccurrences); } if (tree.numAnnotations() == 0) return false; text.setSpanTree(tree); return true; }
generator.writeRawValue(((JsonProducer) field).toJson()); } else if (field instanceof StringFieldValue) { generator.writeString(((StringFieldValue)field).getString()); } else if (field instanceof TensorFieldValue) { renderTensor(((TensorFieldValue)field).getTensor());
byte[] stringBytes = createUTF8CharArray(value.getString()); try { bytePositions = calculateBytePositions(value.getString()); } catch (RuntimeException e) { throw new SerializationException("Exception thrown while serializing span tree '" + tree.getName() + "'; string='" + value.getString() + "'", e);
byte[] stringBytes = createUTF8CharArray(value.getString()); try { bytePositions = calculateBytePositions(value.getString()); } catch (RuntimeException e) { throw new SerializationException("Exception thrown while serializing span tree '" + tree.getName() + "'; string='" + value.getString() + "'", e);
@Override protected void doExecute(ExecutionContext ctx) { StringFieldValue input = (StringFieldValue)ctx.getValue(); if (input.getString().isEmpty()) { return; } StringFieldValue output = input.clone(); ctx.setValue(output); String prev = output.getString(); String next = toLowerCase(prev); SpanList root = new SpanList(); SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS, root); SpanNode node = new Span(0, prev.length()); tree.annotate(node, new Annotation(AnnotationTypes.TERM, next.equals(prev) ? null : new StringFieldValue(next))); tree.annotate(node, new Annotation(AnnotationTypes.TOKEN_TYPE, new IntegerFieldValue(TokenType.ALPHABETIC.getValue()))); root.add(node); output.setSpanTree(tree); }
StringFieldValue treeName = new StringFieldValue(); treeName.deserialize(this); tree.setName(treeName.getString());
StringFieldValue treeName = new StringFieldValue(); treeName.deserialize(this); tree.setName(treeName.getString());
@Override protected void doExecute(ExecutionContext ctx) { StringFieldValue input = (StringFieldValue)ctx.getValue(); SpanList spanList = input.setSpanTree(new SpanTree(SpanTrees.LINGUISTICS)).spanList(); int lastPosition = 0; for (Iterator<GramSplitter.Gram> it = linguistics.getGramSplitter().split(input.getString(), gramSize); it.hasNext();) { GramSplitter.Gram gram = it.next(); // if there is a gap before this gram, then annotate the gram as punctuation // (technically it may be of various types, but it does not matter - we just // need to annotate it somehow (as a non-term) to make sure it is added to the summary) if (lastPosition < gram.getStart()) { typedSpan(lastPosition, gram.getStart() - lastPosition, TokenType.PUNCTUATION, spanList); } // annotate gram as a word term String gramString = gram.extractFrom(input.getString()); typedSpan(gram.getStart(), gram.getLength(), TokenType.ALPHABETIC, spanList). annotate(LinguisticsAnnotator.lowerCaseTermAnnotation(gramString, gramString)); lastPosition = gram.getStart() + gram.getLength(); } // handle punctuation at the end if (lastPosition < input.toString().length()) { typedSpan(lastPosition, input.toString().length() - lastPosition, TokenType.PUNCTUATION, spanList); } }
StringFieldValue treeName = new StringFieldValue(); treeName.deserialize(this); tree.setName(treeName.getString()); value.setSpanTree(tree); readSpanTree(tree, false);
StringFieldValue treeName = new StringFieldValue(); treeName.deserialize(this); tree.setName(treeName.getString()); value.setSpanTree(tree); readSpanTree(tree, false);