public void index(String fileName, boolean compress) throws IOException { index(new File(fileName), 0, compress); }
public void add(String key, List<String[]> list) { Document doc = new Document(); try { doc.add(new Field(PAGE_FIELD_NAME, key, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(ENTRY_FIELD_NAME, toByte(list), Field.Store.YES)); indexWriter.addDocument(doc); } catch (IOException e) { logger.error(e); } }
OneExamplePerSenseIndexer oneExamplePerSenseIndexer = new OneExamplePerSenseIndexer(line.getOptionValue("index")); oneExamplePerSenseIndexer.index(line.getOptionValue("file"), compress); oneExamplePerSenseIndexer.close(); } catch (ParseException e) {
protected byte[] toByte(List<String[]> list) throws IOException { ByteArrayOutputStream byteStream = new ByteArrayOutputStream(1024); DataOutputStream dataStream = new DataOutputStream(byteStream); // number of distinct example dataStream.writeInt(list.size()); String[] t; for (int i = 0; i < list.size(); i++) { t = list.get(i); dataStream.writeUTF(t[PAGE_COLUMN_INDEX]); dataStream.writeDouble(Double.parseDouble(t[FREQ_COLUMN_INDEX])); if (t.length > LS_COLUMN_INDEX) { writeVector(dataStream, t[LS_COLUMN_INDEX]); } else { writeVector(dataStream, ""); } if (t.length > BOW_COLUMN_INDEX) { writeVector(dataStream, t[BOW_COLUMN_INDEX]); } else { writeVector(dataStream, ""); } } return byteStream.toByteArray(); }
logger.info("indexing (" + extractorParameters.getOneExamplePerSenseIndexName() + ")..."); OneExamplePerSenseIndexer oneExamplePerSenseIndexer = new OneExamplePerSenseIndexer(extractorParameters.getOneExamplePerSenseIndexName()); oneExamplePerSenseIndexer.index(extractorParameters.getOneExamplePerSenseFileName(), compress); oneExamplePerSenseIndexer.close();
protected void index(String fileName, int key, boolean compress) throws IOException { index(new File(fileName), key, compress); }