/** Append the contents of the specified field to buffer containing text, * normalizing whitespace in the process. * * @param buf * @param f */ protected static void appendFieldText(StringBuilder buf, Field f) { if (f == null) return; if (f.isBinary()) return; if (!f.isStored()) return; if (buf.length() > 0) buf.append(' '); String s = f.stringValue(); s = s.replaceAll("\\s+", " "); // normalize whitespace. buf.append(s); }
public void writeDocument( Document document ) throws IOException { @SuppressWarnings( "unchecked" ) List<Field> fields = document.getFields(); int fieldCount = 0; for ( Field field : fields ) { if ( field.isStored() ) { fieldCount++; } } dos.writeInt( fieldCount ); for ( Field field : fields ) { if ( field.isStored() ) { writeField( field ); } } }
public void writeField( Field field ) throws IOException { int flags = ( field.isIndexed() ? F_INDEXED : 0 ) // + ( field.isTokenized() ? F_TOKENIZED : 0 ) // + ( field.isStored() ? F_STORED : 0 ) // + ( field.isCompressed() ? F_COMPRESSED : 0 ); String name = field.name(); String value = field.stringValue(); dos.write( flags ); dos.writeUTF( name ); writeUTF( value, dos ); }
if (f.isStored() && !f.isBinary()) { String fieldValue = f.stringValue().toLowerCase(); for (String cat: categories.keySet()) {
final void addDocument(Document doc) throws IOException { indexStream.writeLong(fieldsStream.getFilePointer()); int storedCount = 0; Enumeration fields = doc.fields(); while (fields.hasMoreElements()) { Field field = (Field)fields.nextElement(); if (field.isStored()) storedCount++; } fieldsStream.writeVInt(storedCount); fields = doc.fields(); while (fields.hasMoreElements()) { Field field = (Field)fields.nextElement(); if (field.isStored()) { fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name())); byte bits = 0; if (field.isTokenized()) bits |= 1; fieldsStream.writeByte(bits); fieldsStream.writeString(field.stringValue()); } } } }
/** dump the values stored in the specified field for each document. * * <pre>term(tab)document_frequency</pre> * * @param indexDir the index to read. * @param field the name of the field. * @param out the print writer output will be written to * @throws IOException */ public static void dumpDocumentFields(File indexDir, String field, long maxDocs, PrintWriter out) throws IOException { Directory dir = FSDirectory.open(indexDir); IndexReader reader = IndexReader.open(dir, true); int max = reader.maxDoc(); for (int i=0; i < max; i++) { if (!reader.isDeleted(i)) { Document d = reader.document(i); for (Field f: d.getFields(field)) { if (f.isStored() && !f.isBinary()) { String value = f.stringValue(); if (value != null) { out.printf("%s\n", value); } } } } } } }