@Override WordItem newTermItem(String word) { return new WordItem(word); }
@Override public void disclose(Discloser discloser) { super.disclose(discloser); discloser.addProperty("markerWord", markerWord); } }
public boolean equals(Object o) { if (!super.equals(o)) { return false; } if (!(o instanceof MarkerWordItem)) { return false; } MarkerWordItem other = (MarkerWordItem) o; return markerWord.equals(other.markerWord); }
private CompositeItem populate(CompositeItem parent, String fieldName, Map<String,Integer> tokens) { for (Map.Entry<String,Integer> entry : tokens.entrySet()) { WordItem wordItem = new WordItem(entry.getKey(), fieldName); wordItem.setWeight(entry.getValue()); wordItem.setStemmed(true); wordItem.setNormalizable(false); parent.addItem(wordItem); } return parent; }
@Override protected void encodeThis(ByteBuffer buffer) { super.encodeThis(buffer); // takes care of index bytes putString(getEncodedWord(), buffer); }
private WordItem singleStemSegment(Item blockAsItem, String stem, String indexName, Substring substring) { WordItem replacement = new WordItem(stem, indexName, true, substring); replacement.setStemmed(true); copyAttributes(blockAsItem, replacement); return replacement; }
private static String wordAnnotations(WordItem item) { Substring origin = item.getOrigin(); boolean usePositionData = item.usePositionData(); boolean stemmed = item.isStemmed(); boolean lowercased = item.isLowercased(); boolean accentDrop = item.isNormalizable(); SegmentingRule andSegmenting = item.getSegmentingRule(); boolean isFromQuery = item.isFromQuery(); StringBuilder annotation = new StringBuilder(); boolean prefix = item instanceof PrefixItem; image = item.getRawWord(); offset = 0; length = image.length(); if (!image.substring(offset, offset + length).equals(item.getIndexedString())) { VespaSerializer.serializeOrigin(annotation, image, offset, length);
WordItem w = new WordItem(token.toString(), true, token.substring); w.setWords(false); w.setFromSpecialToken(true); return w; return new WordItem(normalizedToken, true, token.substring); return new WordItem(segments.get(0), "", true, token.substring); int n = 0; for (String segment : segments) { WordItem w = new WordItem(segment, "", true, token.substring); w.setFromSegmented(true); w.setSegmentIndex(n++); w.setStemmed(false); composite.addItem(w);
@Override public void disclose(Discloser discloser) { super.disclose(discloser); discloser.addProperty("index", indexName); for (Map.Entry<Object, Integer> entry : set.entrySet()) { WordItem subitem = new WordItem(entry.getKey().toString(), indexName); subitem.setWeight(entry.getValue()); discloser.addChild(subitem); } }
/** * Splits the given item into n-grams and adds them as a CompositeItem containing WordItems searching the * index of the input term. If the result is a single gram, that single WordItem is returned rather than the AndItem * * @param term the term to split, must be an item which implement the IndexedItem and BlockItem "mixins" * @param text the text of the item, just stringValue() if the item is a TermItem * @param gramSize the gram size to split to * @param query the query in which this rewriting is done * @return the root of the query subtree produced by this, containing the split items */ protected Item splitToGrams(Item term, String text, int gramSize, Query query) { String index = ((HasIndexItem)term).getIndexName(); CompositeItem gramsItem = createGramRoot(query); gramsItem.setIndexName(index); Substring origin = ((BlockItem)term).getOrigin(); for (Iterator<GramSplitter.Gram> i = getGramSplitter().split(text,gramSize); i.hasNext(); ) { GramSplitter.Gram gram = i.next(); WordItem gramWord = new WordItem(gram.extractFrom(text), index, false, origin); gramWord.setWeight(term.getWeight()); gramWord.setProtected(true); gramsItem.addItem(gramWord); } return gramsItem.getItemCount()==1 ? gramsItem.getItem(0) : gramsItem; // return the AndItem, or just the single gram if not multiple }
@Override public boolean shouldLowercase(String commonPath, WordItem word, IndexFacts.Session indexFacts) { if (word.isLowercased()) return false; StringBuilder sb = new StringBuilder(); sb.append(commonPath).append(".").append(word.getIndexName()); Index index = indexFacts.getIndex(sb.toString()); return index.isLowercase() || index.isAttribute(); } }
/** Returns the word for encoding. By default simply the word */ protected String getEncodedWord() { return getIndexedString(); }
@Override boolean serialize(StringBuilder destination, Item item) { WordItem w = (WordItem) item; StringBuilder wordAnnotations = getAllAnnotations(w); destination.append(normalizeIndexName(w.getIndexName())).append(" contains "); VespaSerializer.annotatedTerm(destination, w, wordAnnotations.toString()); return false; }
/** Returns whether the item is a specific word item */ private boolean isTheWord(String word, Item item) { if (!(item instanceof WordItem)) { return false; } return word.equalsIgnoreCase(((WordItem) item).getRawWord()); // TODO: Why not search for getWord w.o lowercasing? }
@Override public Result search(Query query, Execution execution) { String recall = query.properties().getString(recallName); if (recall == null) return execution.search(query); AnyParser parser = new AnyParser(ParserEnvironment.fromExecutionContext(execution.context())); QueryTree root = parser.parse(Parsable.fromQueryModel(query.getModel()).setQuery("foo").setFilter(recall)); String err; if (root.getRoot() instanceof NullItem) { err = "Failed to parse recall parameter."; } else if (!(root.getRoot() instanceof CompositeItem)) { err = "Expected CompositeItem root node, got " + root.getClass().getSimpleName() + "."; } else if (hasRankItem(root.getRoot())) { query.getModel().getQueryTree().setRoot(root.getRoot()); err = "Recall contains at least one rank item."; } else { WordItem placeholder = findOrigWordItem(root.getRoot(), "foo"); if (placeholder == null) { err = "Could not find placeholder workQuery root."; } else { updateFilterTerms(root); CompositeItem parent = placeholder.getParent(); parent.setItem(parent.getItemIndex(placeholder), query.getModel().getQueryTree().getRoot()); query.getModel().getQueryTree().setRoot(root.getRoot()); query.trace("ANDed recall tree with root workQuery node.", true, 3); return execution.search(query); } } return new Result(query, ErrorMessage.createInvalidQueryParameter(err)); }
public void produce(RuleEvaluation e,int offset) { WordItem newItem=new WordItem(literal,getLabel()); if (replacing) { Match matched=e.getNonreferencedMatch(0); insertMatch(e,matched,newItem,offset); } else { newItem.setWeight(getWeight()); if (e.getTraceLevel()>=6) e.trace(6,"Adding '" + newItem + "'"); e.addItem(newItem,getTermType()); } }
@Override public boolean shouldLowercase(WordItem word, IndexFacts.Session indexFacts) { if (word.isLowercased()) return false; Index index = indexFacts.getIndex(word.getIndexName()); return index.isLowercase() || index.isAttribute(); }
@Override public boolean visit(Item item) { if (item instanceof NullItem) { throw new IllegalArgumentException("Got NullItem inside nonEmpty()."); } else if (item instanceof WordItem) { if (((WordItem) item).getIndexedString().isEmpty()) { throw new IllegalArgumentException("Searching for empty string inside nonEmpty()"); } } else if (item instanceof CompositeItem) { if (((CompositeItem) item).getItemCount() == 0) { throw new IllegalArgumentException("Empty composite operator (" + item.getName() + ") inside nonEmpty()"); } } return true; }
/** * Creates a phrase containing the same words and state (as pertinent) as * the given SegmentAndItem. */ public PhraseSegmentItem(AndSegmentItem segAnd) { super(segAnd.getRawWord(), segAnd.stringValue(), segAnd.isFromQuery(), segAnd.isStemmed(), segAnd.getOrigin()); if (segAnd.getItemCount() > 0) { WordItem w = (WordItem) segAnd.getItem(0); setIndexName(w.getIndexName()); for (Iterator<Item> i = segAnd.getItemIterator(); i.hasNext();) { WordItem word = (WordItem) i.next(); addWordItem(word); } } }