/** * Outputs flattened data without schema definitions. * A single line of data contains one key-value record from a fiji family, formatted as: * [entityid]|[timestamp]|[key]|[value] * * @param input The row data to export. * @param context The context to write export to. * @throws IOException if there's an error. */ @Override public void gather(FijiRowData input, GathererContext context) throws IOException { for (String key : input.getQualifiers(mFamily)) { NavigableMap<Long, Object> values = input.getMostRecentValue(mFamily, key); for (Map.Entry<Long, Object> e : values.entrySet()) { // Write this entry out on a single line. mLine.set(makeLine(input.getEntityId(), e.getKey(), key, e.getValue())); context.write(mLine, NullWritable.get()); } } }
/** {@inheritDoc} */ @Override public void produce(FijiRowData input, ProducerContext context) throws IOException { if (!input.containsColumn(getInputColumnName().getFamily(), getInputColumnName().getQualifier())) { LOG.debug("No " + getInputColumnName().getName() + " for entity: " + input.getEntityId()); } String string = input.getMostRecentValue(getInputColumnName().getFamily(), getInputColumnName().getQualifier()).toString(); // Run the regex on the input string. Matcher matcher = mPattern.matcher(string); if (matcher.matches()) { if (matcher.groupCount() == 1) { context.put(matcher.group(1)); } } else { LOG.debug(input.getEntityId().toString() + "'s data '" + string + "' does not match " + mPattern.pattern()); } } }
/** * Initializes a new iterator over the qualifiers from a map-type family. * * <p> * The qualifier iterator wraps a map-type family pager. * The size of the qualifier page determines how many qualifiers to retrieve per RPC. * Smaller values will reduce the memory footprint but increase the number of RPCs. * The size of a qualifier can be estimated or bounded as the size of a KeyValue with no value. * </p> * * @param row Fiji row data with paging enabled on the specified map-type family. * @param family Map-type family name. * @param pageSize Size of the underlying qualifier pages. * @throws IOException on I/O error. */ public MapFamilyQualifierIterator(FijiRowData row, String family, int pageSize) throws IOException { this.mRow = row; this.mFamily = family; this.mPageSize = pageSize; this.mQualifierPager = this.mRow.getPager(family); this.mNext = getNext(); }
/** {@inheritDoc} */ @Override public void gather(FijiRowData input, GathererContext context) throws IOException { if (!input.containsColumn("info", "email")) { // No email data. return; } String email = input.getMostRecentValue("info", "email").toString(); int atSymbol = email.indexOf('@'); if (atSymbol < 0) { // Invalid email. return; } String domain = email.substring(atSymbol + 1); mDomain.set(domain); context.write(mDomain, ONE); }
"FijiRowData must be an instance of HBaseFijiRowData to read TableLayout information."); mEntityId = new EntityIdWritable(rowData.getEntityId()); mWritableData = Maps.newHashMap(); mSchemas = Maps.newHashMap(); for (String qualifier : rowData.getQualifiers(family)) { FijiColumnName column = new FijiColumnName(family, qualifier); if (rowData.getCells(family, qualifier) != null) { NavigableMap<Long, FijiCellWritable> data = convertCellsToWritable(rowData.getCells(family, qualifier)); Schema schema = rowData.getReaderSchema(family, qualifier); mSchemas.put(column, schema); Schema schema = rowData.getReaderSchema(column.getFamily(), column.getQualifier()); mSchemas.put(column, schema); Schema schema = rowData.getReaderSchema(family, familyColumnName.getQualifier()); mSchemas.put(familyColumnName, schema);
for (String key : row.getQualifiers(family.getName())) { FijiCell<Long> counter = row.getMostRecentCell(family.getName(), key); if (null != counter) { printCell(row.getEntityId(), counter, printStream); FijiCell<Long> counter = row.getMostRecentCell(family.getName(), key); if (null != counter) { printCell(row.getEntityId(), counter, printStream); row.getValues(family.getName()); for (String key : keyTimeseriesMap.keySet()) { for (Entry<Long, Object> timestampedCell : keyTimeseriesMap.get(key).entrySet()) { long timestamp = timestampedCell.getKey(); printCell(row.getEntityId(), timestamp, family.getName(), key, timestampedCell.getValue(), printStream); for (String key : entry.getValue()) { NavigableMap<Long, Object> timeseriesMap = row.getValues(family.getName(), key); for (Entry<Long, Object> timestampedCell : timeseriesMap.entrySet()) { long timestamp = timestampedCell.getKey(); printCell( row.getEntityId(), timestamp, family.getName(), key, timestampedCell.getValue(), printStream); if (column.getDesc().getColumnSchema().getType() == SchemaType.COUNTER) { final FijiCell<Long> counter = row.getMostRecentCell(colName.getFamily(), colName.getQualifier()); if (null != counter) {
FijiRestEntityId.create(rowData.getEntityId(), tableLayout)); Map<String, FamilyLayout> familyLayoutMap = tableLayout.getFamilyMap(); FijiCell<Long> counter = rowData.getMostRecentCell(col.getFamily(), col.getQualifier()); if (null != counter) { SchemaOption schemaOption = new SchemaOption(schemaTable. for (String key : rowData.getQualifiers(col.getFamily())) { FijiCell<Long> counter = rowData.getMostRecentCell(col.getFamily(), key); if (null != counter) { SchemaOption schemaOption = new SchemaOption(schemaTable. Map<Long, FijiCell<Object>> rowVals = rowData.getCells(col.getFamily(), col.getQualifier()); for (Entry<Long, FijiCell<Object>> timestampedCell : rowVals.entrySet()) { Map<String, NavigableMap<Long, FijiCell<Object>>> rowVals = rowData.getCells(col .getFamily());
if (pager.hasNext()) { FijiRowData qualifierRowData = pager.next(); NavigableSet<String> qualifiers = qualifierRowData.getQualifiers(family); for (String qualifier : qualifiers) { qualifiersPage.add(new FijiColumnName(family, qualifier)); FijiDataRequest fijiDataRequest = DataRequestOptimizer.expandFamilyWithPagedQualifiers(originalDataRequest, qualifiersPage); FijiRowData qualifierPage = mReader.get(mRowData.getEntityId(), fijiDataRequest); for (FijiColumnName fijiColumnName : qualifiersPage) { final NavigableMap<Long, FijiCell<Object>> pagedData = qualifierPage.getCells(fijiColumnName.getFamily(), fijiColumnName.getQualifier()); final NavigableMap<Long, FijiCellWritable> writableData = convertCellsToWritable(pagedData);
/** {@inheritDoc} */ @Override public EntityId getCurrentKey() throws IOException { return mCurrentRow.getEntityId(); }
/** * Reports the next qualifier from the map-type family, or null. * * <p> * In effect, this pre-loads the next entry to return. * This allows to determine whether the iterator is finished or has more elements. * </p> * * @return the next qualifier from the map-type family, or null. */ private String getNext() { while ((mPage == null) || !mPage.hasNext()) { if (!mQualifierPager.hasNext()) { return null; } final FijiRowData rowPage = mQualifierPager.next(this.mPageSize); mPage = rowPage.getQualifiers(mFamily).iterator(); } return mPage.next(); }
/** {@inheritDoc} */ @Override public void produce(final FijiRowData row, final FijiTableContext context) throws IOException { final Iterable<FijiCell<Object>> cells; if (mColumn.isFullyQualified()) { cells = row.asIterable(mColumn.getFamily(), mColumn.getQualifier()); } else { cells = row.asIterable(mColumn.getFamily()); } for (FijiCell<Object> cell : cells) { context.incrementCounter(Counters.CELLS_PROCESSED); final DecodedCell<Object> original = new DecodedCell<Object>(cell.getWriterSchema(), cell.getData()); final DecodedCell<Object> rewritten = rewriteCell(original); if (rewritten != original) { context.put( row.getEntityId(), mColumn.getFamily(), mColumn.getQualifier(), cell.getTimestamp(), rewritten.getData()); context.incrementCounter(Counters.CELLS_REWRITTEN); } } }
NavigableMap<String, String> tempExtMap = data.getMostRecentValues("extendedInfo"); NavigableMap<String, Long> tempCounterMap = data.getMostRecentValues("counters"); .setJobId(data.getMostRecentValue("info", "jobId").toString()) .setJobName(data.getMostRecentValue("info", "jobName").toString()) .setJobStartTime(data.<Long>getMostRecentValue("info", "startTime")) .setJobEndTime(data.<Long>getMostRecentValue("info", "endTime")) .setJobEndStatus(data.getMostRecentValue("info", "jobEndStatus").toString()) .setJobCounters(data.getMostRecentValue("info", "counters").toString()) .setJobConfiguration(data.getMostRecentValue("info", "configuration").toString()) .setExtendedInfo(tempExtMap) .setCountersFamily(tempCounterMap)
/** {@inheritDoc} */ @Override protected int run(List<String> nonFlagArgs) throws Exception { final Fiji fiji = Fiji.Factory.open(mFijiURI, getConf()); try { JobHistoryFijiTable jobHistoryTable = JobHistoryFijiTable.open(fiji); try { if (!mJobId.isEmpty()) { JobHistoryEntry data = jobHistoryTable.getJobDetails(mJobId); printEntry(data); } else { FijiRowScanner jobScanner = jobHistoryTable.getJobScanner(); for (FijiRowData data : jobScanner) { String jobid = data.getMostRecentValue("info", "jobId").toString(); printEntry(jobHistoryTable.getJobDetails(jobid)); getPrintStream().printf("%n"); } jobScanner.close(); } } finally { jobHistoryTable.close(); } } finally { fiji.release(); } return SUCCESS; }
/** * Build the Writably compatible FijiRowDataPageWritable with the data for the next page of * cells. If we aren't paging through any cells, then this will just return the data cached * from the qualifiers. * * @return a FijiRowDataPageWritable with a page of data substituted. * @throws IOException if there was an error. */ private FijiRowDataPageWritable nextCellPage() throws IOException { Map<FijiColumnName, NavigableMap<Long, FijiCellWritable>> pageData = Maps.newHashMap(); // Add in all of the data from paged qualifiers pageData.putAll(mQualifierPageData); for (Map.Entry<FijiColumnName, FijiPager> entry : mFijiCellPagers.entrySet()) { final FijiColumnName fijiColumnName = entry.getKey(); final FijiPager cellPager = entry.getValue(); try { final FijiRowData pagedFijiRowData = cellPager.next(); final NavigableMap<Long, FijiCell<Object>> pagedData = pagedFijiRowData.getCells(fijiColumnName.getFamily(), fijiColumnName.getQualifier()); final NavigableMap<Long, FijiCellWritable> writableData = convertCellsToWritable(pagedData); pageData.put(fijiColumnName, writableData); } catch (NoSuchElementException nsee) { // If we run out of pages, put in a blank entry pageData.put(fijiColumnName, EMPTY_DATA); } } return new FijiRowDataPageWritable(pageData); }
/** {@inheritDoc} */ @Override protected void map(FijiRowData input, Context mapContext) throws IOException { mProducerContext.setEntityId(input.getEntityId()); mProducer.produce(input, mProducerContext); mapContext.getCounter(JobHistoryCounters.PRODUCER_ROWS_PROCESSED).increment(1); }
/** {@inheritDoc} */ @Override public void produce(FijiRowData input, ProducerContext context) throws IOException { if (!input.containsColumn("info", "email")) { // This user doesn't have an email address. return; } String email = input.getMostRecentValue("info", "email").toString(); int atSymbol = email.indexOf('@'); if (atSymbol < 0) { // Couldn't find the '@' in the email address. Give up. return; } String domain = email.substring(atSymbol + 1); context.put(domain); } }
/** {@inheritDoc} */ @Override public void produce(FijiRowData input, ProducerContext context) throws IOException { if (!mInputColumn.isFullyQualified()) { // Copy the entire family. for (String qualifier : input.getQualifiers(mInputColumn.getFamily())) { FijiColumnName sourceColumn = new FijiColumnName(mInputColumn.getFamily(), qualifier); produceAllVersions(input, context, sourceColumn); } } else { // Copy just a specific column. produceAllVersions(input, context, mInputColumn); } }
/** {@inheritDoc} */ @Override public void produce(FijiRowData input, ProducerContext context) throws IOException { // Extract the affinities for the current input entity (row). Node affinities = getAffinities(input, context); if (null == affinities) { throw new IOException("getAffinities() returned null for entity " + input.getEntityId()); } if (null == affinities.getEdges()) { affinities.setEdges(new ArrayList<Edge>()); } // Get the related items for this entity's affinities. Node relationships = getRelationships(affinities, context); // Compute the recommendations from the entity's affinity relationships. Node recommendations = recommend(relationships, context); // Output the recommendations. if (null != recommendations) { write(recommendations, context); } }
/** * This method will be called once for each row of the phonebook table. * * @param entityId The entity id for the row. * @param row The data from the row (in this case, it would only * include the address column because that is all we requested * when configuring the input format). * @param hadoopContext The MapReduce job context used to emit output. * @throws IOException If there is an IO error. */ @Override public void map(EntityId entityId, FijiRowData row, Context hadoopContext) throws IOException { // Check that the row has the info:address column. // The column names are specified as constants in the Fields.java class. if (!row.containsColumn(Fields.INFO_FAMILY, Fields.ADDRESS)) { LOG.info("Missing address field in row: " + entityId); hadoopContext.getCounter(Counter.MISSING_ADDRESS).increment(1L); return; } final String victimState = hadoopContext.getConfiguration().get(CONF_STATE, ""); final Address address = row.getMostRecentValue(Fields.INFO_FAMILY, Fields.ADDRESS); if (victimState.equals(address.getState().toString())) { // Delete the entry. mWriter.deleteRow(entityId); } }
} else { mMinQualifier = page.getQualifiers(mFamily.getFamily()).last();