try (TokenStream stream = tokenStream = field.tokenStream(docState.analyzer, tokenStream)) {
@Override public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { return getRealValue().tokenStream(analyzer, reuse); } }
@Override public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { return getRealValue().tokenStream(analyzer, reuse); } }
@Override public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { return getRealValue().tokenStream(analyzer, reuse); } }
/** * This methods performs the analysis for the seed document and extract the boosts if present. * This is done only one time for the Seed Document. * * @param inputDocument the seed unseen document * @param fieldName2tokensArray a map that associated to a field name the list of token arrays for all its values * @param fieldName2boost a map that associates the boost to the field * @throws IOException If there is a low-level I/O error */ private void analyzeSeedDocument(Document inputDocument, Map<String, List<String[]>> fieldName2tokensArray, Map<String, Float> fieldName2boost) throws IOException { for (int i = 0; i < textFieldNames.length; i++) { String fieldName = textFieldNames[i]; float boost = 1; List<String[]> tokenizedValues = new LinkedList<>(); if (fieldName.contains("^")) { String[] field2boost = fieldName.split("\\^"); fieldName = field2boost[0]; boost = Float.parseFloat(field2boost[1]); } IndexableField[] fieldValues = inputDocument.getFields(fieldName); for (IndexableField fieldValue : fieldValues) { TokenStream fieldTokens = fieldValue.tokenStream(field2analyzer.get(fieldName), null); String[] fieldTokensArray = getTokenArray(fieldTokens); tokenizedValues.add(fieldTokensArray); } fieldName2tokensArray.put(fieldName, tokenizedValues); fieldName2boost.put(fieldName, boost); textFieldNames[i] = fieldName; } }
public static List<String> getTermsFromField(Analyzer analyzer, IndexableField field) throws IOException { TokenStream ts = null; try { ArrayList<String> terms = new ArrayList<String>(); ts = field.tokenStream(analyzer, ts); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { terms.add(termAtt.toString()); } ts.end(); return terms; } finally { if(ts != null){ IOUtils.closeWhileHandlingException(ts); } } }
private int getFieldLengthFromAnalysisChain(int docNum, String indexedField, IndexReader ir) { TokenStream ts = null; try { Set<String> fields = new HashSet<String>(); fields.add(indexedField); Document d = ir.document(docNum, fields); IndexableField field = d.getField(indexedField); if(field == null){ return -1; } ts = field.tokenStream(analyzer, ts); ts.reset(); int length = 0; while (ts.incrementToken()) { length++; } ts.end(); return length; } catch (Exception ex) { throw new RuntimeException("caught exception in function " + description()+" : doc=" + docNum, ex); } finally { if(ts != null) { IOUtils.closeWhileHandlingException(ts); } } }
/** * Adds a lucene {@link IndexableField} to the MemoryIndex using the provided analyzer * @param field the field to add * @param analyzer the analyzer to use for term analysis * @param boost a field boost * @throws IllegalArgumentException if the field is a DocValues or Point field, as these * structures are not supported by MemoryIndex */ public void addField(IndexableField field, Analyzer analyzer, float boost) { if (field.fieldType().docValuesType() != DocValuesType.NONE) throw new IllegalArgumentException("MemoryIndex does not support DocValues fields"); if (analyzer == null) { addField(field.name(), field.tokenStream(null, null), boost); } else { addField(field.name(), field.tokenStream(analyzer, null), boost, analyzer.getPositionIncrementGap(field.name()), analyzer.getOffsetGap(field.name())); } }
MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer, MemoryIndex memoryIndex) { for (IndexableField field : d.getFields()) { if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) { continue; } try { // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous, // like the indexer does try (TokenStream tokenStream = field.tokenStream(analyzer, null)) { if (tokenStream != null) { memoryIndex.addField(field.name(), tokenStream, field.boost()); } } } catch (IOException e) { throw new ElasticsearchException("Failed to create token stream", e); } } return memoryIndex; }
@Override public int doLogic() throws Exception { List<IndexableField> fields = doc.getFields(); Analyzer analyzer = getRunData().getAnalyzer(); int tokenCount = 0; for(final IndexableField field : fields) { if (field.fieldType().indexOptions() == IndexOptions.NONE || field.fieldType().tokenized() == false) { continue; } final TokenStream stream = field.tokenStream(analyzer, null); // reset the TokenStream to the first token stream.reset(); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); while(stream.incrementToken()) { termAtt.getBytesRef(); tokenCount++; } stream.end(); stream.close(); } totalTokenCount += tokenCount; return tokenCount; }
if (analyzer != null) { offsetGap = analyzer.getOffsetGap(field.name()); tokenStream = field.tokenStream(analyzer, null); positionIncrementGap = analyzer.getPositionIncrementGap(field.name()); } else { offsetGap = 1; tokenStream = field.tokenStream(null, null); positionIncrementGap = 0;
try (TokenStream stream = tokenStream = field.tokenStream(docState.analyzer, tokenStream)) {
try (TokenStream stream = tokenStream = field.tokenStream(docState.analyzer, tokenStream)) {
try (TokenStream stream = tokenStream = field.tokenStream(docState.analyzer, tokenStream)) {
@Override public void prepare(PercolateContext context, ParsedDocument parsedDocument) { MemoryIndex memoryIndex = cache.get(); for (IndexableField field : parsedDocument.rootDoc().getFields()) { if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) { continue; } try { Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer(); // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous, // like the indexer does try (TokenStream tokenStream = field.tokenStream(analyzer, null)) { if (tokenStream != null) { memoryIndex.addField(field.name(), tokenStream, field.boost()); } } } catch (Exception e) { throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e); } } context.initialize(new DocEngineSearcher(memoryIndex), parsedDocument); }