Refine search
@Override public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { final FieldType ft = new FieldType(TextField.TYPE_STORED); ft.setStoreTermVectors(fieldInfo.hasVectors()); ft.setOmitNorms(fieldInfo.omitsNorms()); ft.setIndexOptions(fieldInfo.getIndexOptions()); doc.add(new StoredField(fieldInfo.name, new String(value, StandardCharsets.UTF_8), ft)); }
private static FieldType getType(int numDims) { FieldType type = new FieldType(); type.setDimensions(numDims, Double.BYTES); type.freeze(); return type; }
public static Document Document(String inLine, int lineNumber) { Document doc = new Document(); doc.add(new StoredField("line_number", ""+lineNumber)); doc.add(new StoredField("modified", DateTools.timeToString(System.currentTimeMillis(), DateTools.Resolution.MINUTE))); //create new FieldType to store term positions (TextField is not sufficiently configurable) FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); ft.setTokenized(true); ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); Field contentsField = new Field("contents", inLine, ft); doc.add(contentsField); return doc; } }
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setRAMBufferSizeMB(1024.0); indexWriterConfig.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, indexWriterConfig); FieldType stringType = new FieldType(StringField.TYPE_STORED); stringType.setStoreTermVectors(false); FieldType textType = new FieldType(TextField.TYPE_STORED); textType.setStoreTermVectors(false); text = triple.getObject().getLiteralLexicalForm(); doc = new Document(); doc.add(new Field("uri", uri, stringType)); doc.add(new Field(searchField, text, textType)); writer.addDocument(doc); if(i++ % 10000 == 0){ writer.commit(); writer.close();
String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexed(true); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) { textOptions.setStoreTermVectors(true); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); int cnt = 0; Status status; doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES)); writer.addDocument(doc); if (cnt % 100000 == 0) { LOG.info(cnt + " statuses indexed"); writer.forceMerge(1); LOG.info("Done!");
Analyzer analyzer = new SimpleAnalyzer(true); try { IndexWriterConfig config = new IndexWriterConfig(version, analyzer); config.setOpenMode(OpenMode.CREATE); FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED); fieldType.setIndexed(true); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); fieldType.freeze(); writer = new IndexWriter(FSDirectory.open(indexPath), config); String text; int count = 0; text = docIterator.next(); if (text.length() > 0) { writer.addDocument(toLuceneDocument(analyzer, text, fieldType)); ++count; if (count >= commitInterval) { writer.commit(); System.gc(); count = 0;
IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); iwc.setRAMBufferSizeMB(ramBufferSizeMB); IndexWriter writer = new IndexWriter(dir, iwc); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); ft.setOmitNorms(true); ft.freeze(); Document doc = new Document(); Field field = new Field("body", "", ft); doc.add(field); break; field.setStringValue(surfaceForm.utf8ToString()); writer.addDocument(doc); count++; writer.rollback(); success = true; } finally {
String currentURLStr,String contentType,long lastModified) { try { Document document = new Document(); currentURL = currentURLStr; uid = currentURLStr; FieldType fieldType = new FieldType(); fieldType.setStored(false); fieldType.setIndexed(true); fieldType.setStoreTermVectors(false); document.add(new Field("uid",currentURLStr, fieldType)); document.add(new StringField("url", currentURLStr, Field.Store.YES)); document.add(new StringField("contentType", contentType, Field.Store.YES)); document.add(new LongField("lastModified", if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(document); } else { writer.updateDocument(new Term("uid", uid), document);
public void addDocument(final String id, final String json) throws IOException { final Document doc = new Document(); doc.add(new StringField(DEFAULT_ID_FIELD, id, Store.YES)); final FieldType sirenFieldType = new FieldType(); sirenFieldType.setIndexed(true); sirenFieldType.setTokenized(true); sirenFieldType.setOmitNorms(true); sirenFieldType.setStored(false); sirenFieldType.setStoreTermVectors(false); doc.add(new Field(DEFAULT_SIREN_FIELD, json, sirenFieldType)); writer.addDocument(doc); }
public Index buildIndex() throws Exception{ Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(directory, indexWriterConfig); System.out.println( "Creating index ..." ); FieldType stringType = new FieldType(StringField.TYPE_STORED); stringType.setStoreTermVectors(false); FieldType textType = new FieldType(TextField.TYPE_STORED); textType.setStoreTermVectors(false); org.apache.lucene.document.Document luceneDocument = new org.apache.lucene.document.Document(); luceneDocument.add(new Field("uri", entity.toStringID(), stringType)); luceneDocument.add(new Field(searchField, label, textType)); luceneDocuments.add(luceneDocument); writer.addDocuments(luceneDocuments); writer.close();
FieldType stringType = new FieldType(StringField.TYPE_STORED); stringType.setStoreTermVectors(false); FieldType textType = new FieldType(TextField.TYPE_STORED); textType.setStoreTermVectors(false); Document luceneDocument = new Document(); luceneDocument.add(new Field("uri", resource.getURI(), stringType)); luceneDocument.add(new Field("label", label, textType)); writer.addDocument(luceneDocument); writer.addDocuments(documents); writer.commit(); writer.close();
IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(analyzer)); IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer)); IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer)); FieldType ft = new FieldType(TextField.TYPE_STORED); if (termVectors) { ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true);
private static void addGram(String text, Document doc, int ng1, int ng2) { int len = text.length(); for (int ng = ng1; ng <= ng2; ng++) { String key = "gram" + ng; String end = null; for (int i = 0; i < len - ng + 1; i++) { String gram = text.substring(i, i + ng); FieldType ft = new FieldType(StringField.TYPE_NOT_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); Field ngramField = new Field(key, gram, ft); // spellchecker does not use positional queries, but we want freqs // for scoring these multivalued n-gram fields. doc.add(ngramField); if (i == 0) { // only one term possible in the startXXField, TF/pos and norms aren't needed. Field startField = new StringField("start" + ng, gram, Field.Store.NO); doc.add(startField); } end = gram; } if (end != null) { // may not be present if len==ng1 // only one term possible in the endXXField, TF/pos and norms aren't needed. Field endField = new StringField("end" + ng, end, Field.Store.NO); doc.add(endField); } } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, null); if (indexWriter == null) { try { indexWriter = new IndexWriter(FSDirectory.open(luceneDir), config); } catch (IOException e) { throw new ResourceInitializationException(e); } } currentDocumentId = null; currentDocument = null; fieldType = new FieldType(); fieldType.setIndexed(true); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); fieldType.setStored(true); fieldType.setOmitNorms(true); fieldType.setTokenized(false); fieldType.freeze(); }
static Document buildDoc(JSONObject json) throws Exception{ Document doc = new Document(); doc.add(new NumericDocValuesField("id", json.getLong("id"))); doc.add(new DoubleDocValuesField("price", json.optDouble("price"))); doc.add(new TextField("contents", json.optString("contents"), Store.NO)); doc.add(new NumericDocValuesField("year", json.optInt("year"))); FieldType ft = new FieldType(); ft.setOmitNorms(false); ft.setTokenized(true); ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPayloads(true); ft.setStoreTermVectorPositions(true); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field tagPayload = new Field("tags_payload", new PayloadTokenizer(tagsString), ft); doc.add(tagPayload);
private FieldType identifyFieldType(boolean stored, boolean indexed, boolean analyzed, SerializableTermVector termVector, boolean omitNorms, boolean omitTermFreqAndPositions) { final FieldType type = new FieldType(); type.setStored( stored ); type.setTokenized( analyzed ); type.setStoreTermVectors( termVector != SerializableTermVector.NO ); type.setStoreTermVectorOffsets( termVector == SerializableTermVector.WITH_OFFSETS || termVector == SerializableTermVector.WITH_POSITIONS_OFFSETS ); type.setStoreTermVectorPositions( termVector == SerializableTermVector.WITH_POSITIONS || termVector == SerializableTermVector.WITH_POSITIONS_OFFSETS ); type.setOmitNorms( omitNorms ); type.setIndexOptions( omitTermFreqAndPositions ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS ); return type; }
private static Document createLuceneDocumentFromObject(BroadcastDocumentObject objectDoc) throws IOException { Document indexDoc = new Document(); FieldType ftSyn = new FieldType(); ftSyn.setStored(false); ftSyn.setIndexed(true); ftSyn.setOmitNorms(true); ftSyn.freeze(); indexDoc.add(new StringField(DictionarySearcher.F_CATID, objectDoc.getCategory(), Field.Store.YES)); for (String value : objectDoc.getValueSet()) { // F_RAW field is necessary for broadcasting with new validation modes. indexDoc.add(new Field(DictionarySearcher.F_RAW, value, DictionaryUtils.FIELD_TYPE_RAW_VALUE)); indexDoc.add(new StringField(DictionarySearcher.F_SYNTERM, DictionarySearcher.getJointTokens(value), Field.Store.NO)); } return indexDoc; } }
public static FieldType fieldType(Properties properties, AbstractType validator) { FieldType fieldType = new FieldType(); fieldType.setIndexOptions(properties.getIndexOptions()); fieldType.setTokenized(properties.isTokenized()); fieldType.setStored(properties.isStored()); fieldType.setStoreTermVectors(properties.isStoreTermVectors()); fieldType.setStoreTermVectorOffsets(properties.isStoreTermVectorOffsets()); fieldType.setStoreTermVectorPayloads(properties.isStoreTermVectorPayloads()); fieldType.setStoreTermVectorPositions(properties.isStoreTermVectorPositions()); fieldType.setOmitNorms(properties.isOmitNorms()); fieldType.setIndexOptions(properties.getIndexOptions()); Fields.setNumericType(validator, fieldType); if (fieldType.numericType() != null) { fieldType.setNumericPrecisionStep(properties.getNumericPrecisionStep()); } return fieldType; }
protected Document newGeoDocument(OIdentifiable oIdentifiable, Shape shape) { FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS); ft.setStored(true); Document doc = new Document(); doc.add(OLuceneIndexType.createField(RID, oIdentifiable.getIdentity().toString(), Field.Store.YES)); for (IndexableField f : strategy.createIndexableFields(shape)) { doc.add(f); } doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape))); return doc; }
private Document newGeoDocument(OIdentifiable oIdentifiable, Shape shape) { FieldType ft = new FieldType(); ft.setIndexed(true); ft.setStored(true); Document doc = new Document(); doc.add(OLuceneIndexType.createField(RID, oIdentifiable.getIdentity().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); for (IndexableField f : strategy.createIndexableFields(shape)) { doc.add(f); } doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape))); return doc; } }