/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(MappedFieldType fieldType, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert fieldType.name().equals(values[0].name()) : "Expected MappedFieldType for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && containsBrokenAnalysis(fieldType.indexAnalyzer())) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using solely the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
@Override public String toString(){ StringBuilder sb = new StringBuilder(); sb.append( text ).append( '(' ); for( Toffs to : termsOffsets ) sb.append( to.toString() ); sb.append( ')' ); return sb.toString(); } }
if (fragInfo.getStartOffset() >= fieldEnd || firstToffs.getStartOffset() >= fieldEnd) { continue; while (toffsIterator.hasNext()) { Toffs toffs = toffsIterator.next(); if (toffs.getStartOffset() >= fieldEnd) { boolean startsAfterField = toffs.getStartOffset() >= fieldStart; boolean endsBeforeField = toffs.getEndOffset() < fieldEnd; if (startsAfterField && endsBeforeField) { toffsList.add(new Toffs(toffs.getStartOffset(), fieldEnd - 1)); } else if (endsBeforeField) { toffsList.add(new Toffs(fieldStart, toffs.getEndOffset())); toffsIterator.remove(); } else { toffsList.add(new Toffs(fieldStart, fieldEnd - 1));
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().names().indexName().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(MappedFieldType fieldType, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert fieldType.name().equals(values[0].name()) : "Expected MappedFieldType for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && containsBrokenAnalysis(fieldType.indexAnalyzer())) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(MappedFieldType fieldType, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert fieldType.name().equals(values[0].name()) : "Expected MappedFieldType for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && containsBrokenAnalysis(fieldType.indexAnalyzer())) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using solely the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().name().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, new Comparator<SubInfo>() { @Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int number ){ this.boost = boost; this.seqnum = number; termsOffsets = new ArrayList<Toffs>( terms.size() ); TermInfo ti = terms.get( 0 ); termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) ); if( terms.size() == 1 ){ text = ti.getText(); return; } StringBuilder sb = new StringBuilder(); sb.append( ti.getText() ); int pos = ti.getPosition(); for( int i = 1; i < terms.size(); i++ ){ ti = terms.get( i ); sb.append( ti.getText() ); if( ti.getPosition() - pos == 1 ){ Toffs to = termsOffsets.get( termsOffsets.size() - 1 ); to.setEndOffset( ti.getEndOffset() ); } else{ termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) ); } pos = ti.getPosition(); } text = sb.toString(); }
/** * Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to * {StringIndexOutOfBoundsException} in the {FastVectorHighlighter} */ public static WeightedFragInfo fixWeightedFragInfo(FieldMapper mapper, Field[] values, WeightedFragInfo fragInfo) { assert fragInfo != null : "FragInfo must not be null"; assert mapper.fieldType().name().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name(); if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.fieldType().indexAnalyzer()))) { /* This is a special case where broken analysis like WDF is used for term-vector creation at index-time * which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort * the fragments based on their offsets rather than using soley the positions as it is done in * the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather * than in this hack... aka. "we are are working on in!" */ final List<SubInfo> subInfos = fragInfo.getSubInfos(); CollectionUtil.introSort(subInfos, (o1, o2) -> { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); }); return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(), fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost()); } else { return fragInfo; } }
public WeightedPhraseInfo( LinkedList<TermInfo> terms, float boost, int seqnum ){ this.boost = boost; this.seqnum = seqnum; // We keep TermInfos for further operations termsInfos = new ArrayList<>( terms ); termsOffsets = new ArrayList<>( terms.size() ); TermInfo ti = terms.get( 0 ); termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) ); if( terms.size() == 1 ){ return; } int pos = ti.getPosition(); for( int i = 1; i < terms.size(); i++ ){ ti = terms.get( i ); if( ti.getPosition() - pos == 1 ){ Toffs to = termsOffsets.get( termsOffsets.size() - 1 ); to.setEndOffset( ti.getEndOffset() ); } else{ termsOffsets.add( new Toffs( ti.getStartOffset(), ti.getEndOffset() ) ); } pos = ti.getPosition(); } }
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null) { return false; } if (getClass() != obj.getClass()) { return false; } Toffs other = (Toffs) obj; if (getStartOffset() != other.getStartOffset()) { return false; } if (getEndOffset() != other.getEndOffset()) { return false; } return true; } @Override
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, String[] preTags, String[] postTags, Encoder encoder ){ StringBuilder fragment = new StringBuilder(); final int s = fragInfo.getStartOffset(); int[] modifiedStartOffset = { s }; String src = getFragmentSourceMSO( buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset ); int srcIndex = 0; for( SubInfo subInfo : fragInfo.getSubInfos() ){ for( Toffs to : subInfo.getTermsOffsets() ){ fragment .append( encoder.encodeText( src.substring( srcIndex, to.getStartOffset() - modifiedStartOffset[0] ) ) ) .append( getPreTag( preTags, subInfo.getSeqnum() ) ) .append( encoder.encodeText( src.substring( to.getStartOffset() - modifiedStartOffset[0], to.getEndOffset() - modifiedStartOffset[0] ) ) ) .append( getPostTag( postTags, subInfo.getSeqnum() ) ); srcIndex = to.getEndOffset() - modifiedStartOffset[0]; } } fragment.append( encoder.encodeText( src.substring( srcIndex ) ) ); return fragment.toString(); }
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });
@Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + getStartOffset(); result = prime * result + getEndOffset(); return result; } @Override
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });
@Override public String toString(){ StringBuilder sb = new StringBuilder(); sb.append( text ).append( '(' ); for( Toffs to : termsOffsets ) sb.append( to.toString() ); sb.append( ')' ); return sb.toString(); } }
@Override public int compareTo( Toffs other ) { int diff = getStartOffset() - other.getStartOffset(); if ( diff != 0 ) { return diff; } return getEndOffset() - other.getEndOffset(); } @Override
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });
@Override public int compare(SubInfo o1, SubInfo o2) { int startOffset = o1.getTermsOffsets().get(0).getStartOffset(); int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset(); return FragmentBuilderHelper.compare(startOffset, startOffset2); } });