/** * Returns the item to insert from these referenced matches, or null if none * * @param label the label of the matches */ public Item toItem(String label) { if (matches.size()==0) return null; if (matches.size()==1) return matches.get(0).toItem(label); PhraseItem phrase=new PhraseItem(); // TODO: Somehow allow AND items instead here phrase.setIndexName(label); for (Iterator<Match> i=matches.iterator(); i.hasNext(); ) { phrase.addItem(i.next().toItem(label)); } return phrase; }
@Override public Item setItem(int index, Item item) { if (item instanceof WordItem || item instanceof PhraseSegmentItem) { return setIndexedItem(index, (IndexedItem) item); } else if (item instanceof IntItem) { return setIndexedItem(index, convertIntToWord(item)); } else if (item instanceof PhraseItem) { PhraseItem phrase = (PhraseItem) item; Iterator<Item> i = phrase.getItemIterator(); // we assume we don't try to add empty phrases IndexedItem firstItem = (IndexedItem) i.next(); Item toReturn = setIndexedItem(index++, firstItem); while (i.hasNext()) { addIndexedItem(index++, (IndexedItem) i.next()); } return toReturn; } else { throw new IllegalArgumentException("Can not add " + item + " to a phrase"); } }
/** A special purpose check used to simplify the above */ private boolean equalIndexNameIfParentIsPhrase(Item item,CompositeItem parent) { if ( ! (parent instanceof PhraseItem)) return true; if ( ! (item instanceof IndexedItem)) return true; return ((PhraseItem)parent).getIndexName().equals(((IndexedItem)item).getIndexName()); }
private Item collapsePhrase(PhraseItem root) { if (root.getItemCount() == 1) return root.getItem(0); else return root; }
private static Item collapsePhrase(PhraseItem phrase) { if (phrase.getItemCount() == 1 && phrase.getItem(0) instanceof WordItem) { // TODO: Other stuff which needs propagation? WordItem word = (WordItem) phrase.getItem(0); word.setWeight(phrase.getWeight()); return word; } else { return phrase; } }
@Override public void addItem(int index, Item item) { if (item instanceof WordItem || item instanceof PhraseSegmentItem) { addIndexedItem(index, (IndexedItem) item); } else if (item instanceof IntItem) { addIndexedItem(index, convertIntToWord(item)); } else if (item instanceof PhraseItem) { PhraseItem phrase = (PhraseItem) item; for (Iterator<Item> i = phrase.getItemIterator(); i.hasNext();) { addIndexedItem(index++, (WordItem) i.next()); } } else { throw new IllegalArgumentException("Can not add " + item + " to a phrase"); } }
public void produce(RuleEvaluation e,int offset) { PhraseItem newPhrase=new PhraseItem(); newPhrase.setIndexName(getLabel()); for (String term : terms) newPhrase.addItem(new WordItem(term)); if (replacing) { Match matched=e.getNonreferencedMatch(0); insertMatch(e,matched,newPhrase,offset); } else { newPhrase.setWeight(getWeight()); if (e.getTraceLevel()>=6) e.trace(6,"Adding '" + newPhrase + "'"); e.addItem(newPhrase,getTermType()); } }
private void repeatedConsecutiveTermsInPhraseCheck(PhraseItem phrase) { if (phrase.getItemCount() > MAX_REPEATED_CONSECUTIVE_TERMS_IN_PHRASE) { String prev = null; int repeatedCount = 0; for (int i = 0; i < phrase.getItemCount(); ++i) { Item item = phrase.getItem(i); if (item instanceof TermItem) { TermItem term = (TermItem) item; String current = term.getIndexedString(); if (prev != null) { if (prev.equals(current)) { repeatedCount++; if (repeatedCount >= MAX_REPEATED_CONSECUTIVE_TERMS_IN_PHRASE) { repeatedConsecutiveTermsInPhraseRejections.add(); throw new IllegalArgumentException("More than " + MAX_REPEATED_CONSECUTIVE_TERMS_IN_PHRASE + " ocurrences of term '" + current + "' in a row detected in phrase : " + phrase.toString()); } } else { repeatedCount = 0; } } prev = current; } else { prev = null; repeatedCount = 0; } } } } private static final class Count {
destination.append(normalizeIndexName(phrase.getIndexName())) .append(" contains "); for (int i = 0; i < phrase.getItemCount(); ++i) { if (i > 0) { destination.append(", "); Item current = phrase.getItem(i); if (current instanceof WordItem) { WordSerializer.serializeWordWithoutIndex(destination, current);
phrase.addItem(word); } else if (firstWord != null) { phrase = new PhraseItem(); if (quoted || submodes.site || submodes.url) { phrase.setExplicit(true); phrase.addItem(MarkerWordItem.createStartOfHost()); phrase.addItem(firstWord); phrase.addItem(word); } else if (word instanceof PhraseItem) { phrase = (PhraseItem) word; phrase.addItem(MarkerWordItem.createEndOfHost()); return firstWord; } else { phrase = new PhraseItem(); if (addStartOfHostMarker) { phrase.addItem(MarkerWordItem.createStartOfHost()); phrase.addItem(firstWord); if (!starAfterFirst) { phrase.addItem(MarkerWordItem.createEndOfHost()); phrase.setExplicit(true); return phrase;
firstWord = newWord; } else if (phrase == null) { // Second pass phrase = new PhraseItem(); phrase.addItem(firstWord); phrase.addItem(newWord); } else { // Following passes phrase.addItem(newWord);
public int encode(ByteBuffer buffer) { encodeThis(buffer); int itemCount = 1; for (Iterator<Item> i = getItemIterator(); i.hasNext();) { Item subitem = i.next(); if (subitem instanceof PhraseSegmentItem) { PhraseSegmentItem seg = (PhraseSegmentItem) subitem; // "What encode does, minus what encodeThis does" itemCount += seg.encodeContent(buffer); } else { itemCount += subitem.encode(buffer); } } return itemCount; }
protected void appendBodyString(StringBuilder buffer) { appendIndexString(buffer); buffer.append("\""); for (Iterator<Item> i = getItemIterator(); i.hasNext();) { Item item = i.next(); if (item instanceof WordItem) { WordItem wordItem = (WordItem) item; buffer.append(wordItem.getWord()); } else if (item instanceof PhraseSegmentItem) { PhraseSegmentItem seg = (PhraseSegmentItem) item; seg.appendContentsString(buffer); } else { buffer.append(item.toString()); } if (i.hasNext()) { buffer.append(" "); } } buffer.append("\""); }
private Item normalizePhrase(Language language, IndexFacts.Session indexFacts, PhraseItem phrase) { if ( ! indexFacts.getIndex(phrase.getIndexName()).getNormalize()) return phrase; for (ListIterator<Item> i = phrase.getItemIterator(); i.hasNext();) { IndexedItem content = (IndexedItem) i.next(); if (content instanceof TermItem) { normalizeWord(language, indexFacts, (TermItem) content, i); } else { PhraseSegmentItem segment = (PhraseSegmentItem) content; for (ListIterator<Item> j = segment.getItemIterator(); j.hasNext();) normalizeWord(language, indexFacts, (TermItem) j.next(), j); } } return phrase; }
private void replaceItemByGrams(Item item, Item grams, int indexInParent) { if (!(grams instanceof CompositeItem) || !(item.getParent() instanceof PhraseItem)) { // usually, simply replace item.getParent().setItem(indexInParent, grams); } else { // but if the parent is a phrase, we cannot add the AND to it, so add each gram to the phrase PhraseItem phraseParent = (PhraseItem)item.getParent(); phraseParent.removeItem(indexInParent); int addedTerms = 0; for (Iterator<Item> i = ((CompositeItem)grams).getItemIterator(); i.hasNext(); ) { phraseParent.addItem(indexInParent+(addedTerms++),i.next()); } } }
/** * Convert segment items into their mutable counterpart, do not update query tree. * Non-segment items are returned directly. * * @return a mutable CompositeItem instance */ private CompositeItem convertSegmentItem(CompositeItem item) { if (!(item instanceof SegmentItem)) { return item; } CompositeItem converted = null; if (item instanceof AndSegmentItem) { converted = new AndItem(); } else if (item instanceof PhraseSegmentItem) { PhraseItem p = new PhraseItem(); PhraseSegmentItem old = (PhraseSegmentItem) item; p.setIndexName(old.getIndexName()); converted = p; } else { // TODO: Do something else than nothing for unknowns? return item; } for (Iterator<Item> i = item.getItemIterator(); i.hasNext();) { converted.addItem(i.next()); } return converted; }
/** * Returns a subitem as a word item * * @param index * the (0-base) index of the item to return * @throws IndexOutOfBoundsException * if there is no subitem at index */ public WordItem getWordItem(int index) { return (WordItem) getItem(index); }
/** Creates a phrase containing the given words */ public PhraseItem(String[] words) { for (int i = 0; i < words.length; i++) { addIndexedItem(new WordItem(words[i])); } }
PhraseItem phraseDeletionCandidate = (PhraseItem) deleteCandidate; PhraseItem phraseToCheck = (PhraseItem) check; if (phraseDeletionCandidate.getIndexedString().equals(phraseToCheck.getIndexedString())) { composite.removeItem(i); break;
/** * Adds subitem. The word will have its index name set to the index name of * this phrase. If the item is a word, it will simply be added, if the item * is a phrase, each of the words of the phrase will be added. * * @throws IllegalArgumentException * if the given item is not a WordItem or PhraseItem */ public void addItem(Item item) { if (item instanceof WordItem || item instanceof PhraseSegmentItem || item instanceof WordAlternativesItem) { addIndexedItem((IndexedItem) item); } else if (item instanceof IntItem) { addIndexedItem(convertIntToWord(item)); } else if (item instanceof PhraseItem) { PhraseItem phrase = (PhraseItem) item; for (Iterator<Item> i = phrase.getItemIterator(); i.hasNext();) { addIndexedItem((IndexedItem) i.next()); } } else { throw new IllegalArgumentException("Can not add " + item + " to a phrase"); } }