public CustomFieldQuery(Query query, IndexReader reader, FastVectorHighlighter highlighter) throws IOException { this(query, reader, highlighter.isPhraseHighlight(), highlighter.isFieldMatch()); }
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });
@Override protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, String[] preTags, String[] postTags, Encoder encoder ){ WeightedFragInfo weightedFragInfo = FragmentBuilderHelper.fixWeightedFragInfo(fieldType, values, fragInfo); return super.makeFragment(buffer, index, values, weightedFragInfo, preTags, postTags, encoder); } }
boolean forceSource = context.highlight().forceSource(field); if (field.fieldOptions().numberOfFragments() == 0) { fragListBuilder = new SingleFragListBuilder(); new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset()); if (field.fieldOptions().scoreOrdered()) { if (!forceSource && fieldType.stored()) { fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner); } else { fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue); entry = new FieldHighlightEntry(); if (field.fieldOptions().requireFieldMatch()) { entry.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch()); } else { entry.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch()); cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter(); cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit()); fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(),
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(MappedFieldType fieldType, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert fieldType.name().equals(values[0].name()) : "Expected MappedFieldType for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && containsBrokenAnalysis(fieldType.indexAnalyzer())) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using solely the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
for (Term[] currentPosTerm : terms) { for (Term term : currentPosTerm) { super.flatten(new TermQuery(term), reader, flatQueries, 1F); this.flatten(query, reader, flatQueries, 1F); } else { Term[] t = terms[currentPos]; for (int i = 0; i < t.length; i++) { termsIdx[currentPos] = i; convertMultiPhraseQuery(currentPos+1, termsIdx, orig, terms, pos, reader, flatQueries);
/** * a constructor. Using {@link SimpleFragListBuilder} and {@link ScoreOrderFragmentsBuilder}. * * @param phraseHighlight true or false for phrase highlighting * @param fieldMatch true of false for field matching */ public FastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch ){ this( phraseHighlight, fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder() ); }
@Override public void add( int startOffset, int endOffset, List<WeightedPhraseInfo> phraseInfoList ) { float totalBoost = 0; List<SubInfo> subInfos = new ArrayList<>(); for( WeightedPhraseInfo phraseInfo : phraseInfoList ){ subInfos.add( new SubInfo( phraseInfo.getText(), phraseInfo.getTermsOffsets(), phraseInfo.getSeqnum(), phraseInfo.getBoost() ) ); totalBoost += phraseInfo.getBoost(); } getFragInfos().add( new WeightedFragInfo( startOffset, endOffset, subInfos, totalBoost ) ); }
private FieldFragList getFieldFragList( final FieldQuery fieldQuery, IndexReader reader, int docId, String fieldName, int fragCharSize ) throws IOException { FieldTermStack fieldTermStack = new FieldTermStack( reader, docId, fieldName, fieldQuery ); FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery ); return fragListBuilder.createFieldFragList( fieldPhraseList, fragCharSize ); }
private static BoundaryScanner getBoundaryScanner(Field field) { final FieldOptions fieldOptions = field.fieldOptions(); final Locale boundaryScannerLocale = fieldOptions.boundaryScannerLocale() != null ? fieldOptions.boundaryScannerLocale() : Locale.ROOT; final HighlightBuilder.BoundaryScannerType type = fieldOptions.boundaryScannerType() != null ? fieldOptions.boundaryScannerType() : HighlightBuilder.BoundaryScannerType.CHARS; switch(type) { case SENTENCE: if (boundaryScannerLocale != null) { return new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(boundaryScannerLocale)); } return DEFAULT_SENTENCE_BOUNDARY_SCANNER; case WORD: if (boundaryScannerLocale != null) { return new BreakIteratorBoundaryScanner(BreakIterator.getWordInstance(boundaryScannerLocale)); } return DEFAULT_WORD_BOUNDARY_SCANNER; case CHARS: if (fieldOptions.boundaryMaxScan() != SimpleBoundaryScanner.DEFAULT_MAX_SCAN || fieldOptions.boundaryChars() != SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS) { return new SimpleBoundaryScanner(fieldOptions.boundaryMaxScan(), fieldOptions.boundaryChars()); } return DEFAULT_SIMPLE_BOUNDARY_SCANNER; default: throw new IllegalArgumentException("Invalid boundary scanner type: " + type.toString()); } }
@Override protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, String[] preTags, String[] postTags, Encoder encoder ){ WeightedFragInfo weightedFragInfo = FragmentBuilderHelper.fixWeightedFragInfo(fieldType, values, fragInfo); return super.makeFragment(buffer, index, values, weightedFragInfo, preTags, postTags, encoder); } }
/** * * @param fieldName * @param phraseCandidate * @return QueryPhraseMap */ public QueryPhraseMap searchPhrase( String fieldName, final List<TermInfo> phraseCandidate ){ QueryPhraseMap root = getRootMap( fieldName ); if( root == null ) return null; return root.searchPhrase( phraseCandidate ); }
@Override public FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, int fragCharSize ){ return createFieldFragList( fieldPhraseList, new SimpleFieldFragList( fragCharSize ), fragCharSize ); }
private void markTerminal( int slop, float boost ){ this.terminal = true; this.slop = slop; this.boost = boost; this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber(); }
public String createFragment( IndexReader reader, int docId, String fieldName, FieldFragList fieldFragList ) throws IOException { String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1 ); if( fragments == null || fragments.length == 0 ) return null; return fragments[0]; }
@Override public FieldFragList createFieldFragList( FieldPhraseList fieldPhraseList, int fragCharSize ){ return createFieldFragList( fieldPhraseList, new WeightedFieldFragList( fragCharSize ), fragCharSize ); }
sourceQuery = bq.getQuery(); boost *= bq.getBoost(); flatten(sourceQuery, reader, flatQueries, boost); } else if (sourceQuery instanceof SpanTermQuery) { super.flatten(new TermQuery(((SpanTermQuery) sourceQuery).getTerm()), reader, flatQueries, boost); } else if (sourceQuery instanceof ConstantScoreQuery) { flatten(((ConstantScoreQuery) sourceQuery).getQuery(), reader, flatQueries, boost); } else if (sourceQuery instanceof FunctionScoreQuery) { flatten(((FunctionScoreQuery) sourceQuery).getSubQuery(), reader, flatQueries, boost); } else if (sourceQuery instanceof MultiPhrasePrefixQuery) { flatten(sourceQuery.rewrite(reader), reader, flatQueries, boost); } else if (sourceQuery instanceof MultiPhraseQuery) { MultiPhraseQuery q = ((MultiPhraseQuery) sourceQuery); convertMultiPhraseQuery(0, new int[q.getTermArrays().length], q, q.getTermArrays(), q.getPositions(), reader, flatQueries); } else if (sourceQuery instanceof BlendedTermQuery) { final BlendedTermQuery blendedTermQuery = (BlendedTermQuery) sourceQuery; flatten(blendedTermQuery.rewrite(reader), reader, flatQueries, boost); } else if (sourceQuery instanceof BoostingQuery) { BoostingQuery boostingQuery = (BoostingQuery) sourceQuery; flatten(boostingQuery.getMatch(), reader, flatQueries, boost); flatten(boostingQuery.getContext(), reader, flatQueries, boostingQuery.getBoost()); } else if (sourceQuery instanceof SynonymQuery) { flatten(new TermQuery(term), reader, flatQueries, boost); flatten(childQuery, reader, flatQueries, boost);
/** * a constructor. Using SimpleFragListBuilder and ScoreOrderFragmentsBuilder. * * @param phraseHighlight true or false for phrase highlighting * @param fieldMatch true of false for field matching */ public FastVectorHighlighter( boolean phraseHighlight, boolean fieldMatch ){ this( phraseHighlight, fieldMatch, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder() ); }
/** * Build a FieldFragList for one field. */ private FieldFragList getFieldFragList( FragListBuilder fragListBuilder, final FieldQuery fieldQuery, IndexReader reader, int docId, String matchedField, int fragCharSize ) throws IOException { FieldTermStack fieldTermStack = new FieldTermStack( reader, docId, matchedField, fieldQuery ); FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery, phraseLimit ); return fragListBuilder.createFieldFragList( fieldPhraseList, fragCharSize ); }
/** * * @return QueryPhraseMap */ public QueryPhraseMap searchPhrase( String fieldName, final List<TermInfo> phraseCandidate ){ QueryPhraseMap root = getRootMap( fieldName ); if( root == null ) return null; return root.searchPhrase( phraseCandidate ); }