Refine search
private void addPatterns(String id, Map<Integer, Set<E>> p, boolean commit) { try{ setIndexWriter(); Document doc = new Document(); doc.add(new StringField("sentid", id, Field.Store.YES)); doc.add(new Field("patterns", getBytes(p), LuceneFieldType.NOT_INDEXED)); indexWriter.addDocument(doc); if(commit){ indexWriter.commit(); //closeIndexWriter(); } }catch(IOException e){ throw new RuntimeException(e); } }
@Override protected void add(List<CoreLabel> tokens, String sentid, boolean addProcessedText){ try{ setIndexWriter(); Document doc = new Document(); for(CoreLabel l : tokens) { for (Map.Entry<String, String> en: transformCoreLabeltoString.apply(l).entrySet()) { doc.add(new StringField(en.getKey(), en.getValue(), Field.Store.YES));//, ANALYZED)); } if(addProcessedText){ String ptxt = l.get(PatternsAnnotations.ProcessedTextAnnotation.class); if(!stopWords.contains(ptxt.toLowerCase())) doc.add(new StringField(Token.getKeyForClass(PatternsAnnotations.ProcessedTextAnnotation.class), ptxt, Field.Store.YES));//, ANALYZED)); } } doc.add(new StringField("sentid", sentid, Field.Store.YES)); if(tokens!= null && saveTokens) doc.add(new Field("tokens", getProtoBufAnnotation(tokens), LuceneFieldType.NOT_INDEXED)); indexWriter.addDocument(doc); }catch(IOException e){ throw new RuntimeException(e); } }
Document doc = new Document(); doc.add(new Field(LUCENE_FIELD_QUERY, query, Field.Store.YES, Field.Index.NOT_ANALYZED)); long time = System.currentTimeMillis(); doc.add(new Field(LUCENE_FIELD_MODIFIED, DateTools.timeToString(time, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED)); columnName = LUCENE_FIELD_COLUMN_PREFIX + columnName; doc.add(new Field(columnName, data, Field.Store.NO, Field.Index.ANALYZED)); buff.appendExceptFirst(" "); doc.add(new Field(LUCENE_FIELD_DATA, buff.toString(), storeText, Field.Index.ANALYZED)); try {
@Override public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException, InterruptedException { FileAnalyzer fa; StreamSource bzSrc = wrap(src); String path = doc.get("path"); if (path != null && (path.endsWith(".bz2") || path.endsWith(".BZ2") || path.endsWith(".bz"))) { String newname = path.substring(0, path.lastIndexOf('.')); //System.err.println("BZIPPED OF = " + newname); try (InputStream in = bzSrc.getStream()) { fa = AnalyzerGuru.getAnalyzer(in, newname); } if (!(fa instanceof BZip2Analyzer)) { if (fa.getGenre() == Genre.PLAIN || fa.getGenre() == Genre.XREFABLE) { this.g = Genre.XREFABLE; } else { this.g = Genre.DATA; } fa.analyze(doc, bzSrc, xrefOut); if (doc.get("t") != null) { doc.removeField("t"); if (g == Genre.XREFABLE) { doc.add(new Field("t", g.typeName(), AnalyzerGuru.string_ft_stored_nanalyzed_norms)); } } } } }
private Document makeDocument(TxnHeader header, EntryTypes type, AtomicInteger count, AtomicLong from, AtomicLong to) { count.incrementAndGet(); if ( header.getTime() < from.get() ) { from.set(header.getTime()); } if ( header.getTime() > to.get() ) { to.set(header.getTime()); } NumericField dateField = new NumericField(FieldNames.DATE, Field.Store.YES, true); dateField.setLongValue(header.getTime()); Document document = new Document(); document.add(new Field(FieldNames.TYPE, Integer.toString(type.getId()), Field.Store.YES, Field.Index.NOT_ANALYZED)); document.add(dateField); return document; } }
DateTools.Resolution.MILLISECOND); path = Util.fixPathIfWindows(path); doc.add(new Field(QueryBuilder.U, Util.path2uid(path, date), string_ft_stored_nanalyzed_norms)); doc.add(new Field(QueryBuilder.FULLPATH, file.getAbsolutePath(), string_ft_nstored_nanalyzed_norms)); doc.add(new SortedDocValuesField(QueryBuilder.FULLPATH, new BytesRef(file.getAbsolutePath()))); doc.add(new Field(QueryBuilder.DATE, date, string_ft_stored_nanalyzed_norms)); doc.add(new SortedDocValuesField(QueryBuilder.DATE, new BytesRef(date))); Genre g = fa.getGenre(); if (g == Genre.PLAIN || g == Genre.XREFABLE || g == Genre.HTML) { doc.add(new Field(QueryBuilder.T, g.typeName(), string_ft_stored_nanalyzed_norms));
/** * Index super column name. * * @param superColumnName * the super column name * @param currentDoc * the current doc */ private void addSuperColumnNameToDocument(String superColumnName, Document currentDoc) { Field luceneField = new Field(SUPERCOLUMN_INDEX, superColumnName, Store.YES, Field.Index.NO); currentDoc.add(luceneField); }
private static org.apache.lucene.document.Document asLuceneDocument(Document itd){ org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); for (String field: itd.getFieldNames()) { doc.add(new Field(field, itd.getField(field), Field.Store.NO, Field.Index.ANALYZED)); } return doc; }
/** * Index parent key. * * @param parentId * the parent id * @param currentDoc * the current doc * @param clazz * the clazz */ protected void addParentKeyToDocument(String parentId, Document currentDoc, Class<?> clazz) { // if (parentId != null) if (clazz != null && parentId != null) { Field luceneField = new Field(IndexingConstants.PARENT_ID_FIELD, parentId, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS); currentDoc.add(luceneField); Field fieldClass = new Field(IndexingConstants.PARENT_ID_CLASS, clazz.getCanonicalName().toLowerCase(), Field.Store.YES, Field.Index.ANALYZED); currentDoc.add(fieldClass); } }
/** *@inheritDoc */ public synchronized void add(final String docId, final Document itdoc) { if (null == docId) { logger.error("No documentId specified. Ignoring addition."); return; } org.apache.lucene.document.Document doc = asLuceneDocument(itdoc); org.apache.lucene.document.Field docidPayloadField = new org.apache.lucene.document.Field(LsiIndex.PAYLOAD_TERM_FIELD, docId, Field.Store.NO, Field.Index.ANALYZED); doc.add(docidPayloadField); doc.add(new Field("documentId",docId,Field.Store.NO,Field.Index.NOT_ANALYZED)); try { if (logger.isDebugEnabled()) { logger.debug("Adding document with docId=" + docId + ". Doc is " + itdoc.getFieldNames()); } writer.updateDocument(docIdTerm(docId), doc); } catch (IOException e) { logger.error(e); } }