private static String getDescription(String id, Media next) { StringBuilder builder = new StringBuilder("Id: ").append(id).append(" Key: ").append(next.getKey()); builder.append(" media: " + next.getMediaType()); return builder.toString(); }
@Override public BWMetaFile toProtoBuf(Row row) { for (Column column : row.getColumns()) { if (!Arrays.areEqual(BWMETA_QUALIFIER.getBytes(), column.getQualifier())) { continue; } try { DocumentProtos.MediaContainer mediaContainer = DocumentProtos.MediaContainer.parseFrom(column.getValue()); for (Media media : mediaContainer.getMediaList()) { if (BWMeta2Constants.KEY_BWMETA2_RECORD.equals(media.getKey())) { return fromMedia(new String(row.getId()), media); } } } catch (InvalidProtocolBufferException e) { throw new RuntimeException("Invalid data:", e); } } return null; }
@SuppressWarnings("unchecked") boolean transformAndReturnSuccess(String rowId, MultiTypeParseResult result, DocumentProtos.MediaContainerOrBuilder media, String newId,boolean transformed) { for (ProtoMediaMetadataToMetadata transformer : transformers) { for (Media m : media.getMediaList()) { if (m.hasKey() && transformer.getSupportedKey().equalsIgnoreCase(m.getKey())) { try { transformed = transformer.transform(m, newId, result,media) || transformed; } catch (Exception e ) { LOGGER.error("exception at row id: "+rowId+" transfomer: "+transformer.getSupportedKey(), e); } } } } return transformed; }
LOGGER.debug("There were no suitable transformation. Available media types are: "); for (DocumentProtos.Media tmp : mc.getMediaList()) { LOGGER.debug("key: {}, type: {}", tmp.getKey(), tmp.getMediaType());
List<DocumentProtos.Media> meList=mc.getMediaList(); for (DocumentProtos.Media m:meList) { if (m.getKey().equalsIgnoreCase(CrossrefConstants.KEY_CROSSREF_JSON_RECORD) || m.getKey().equalsIgnoreCase(CrossrefConstants.KEY_CROSSREF_UNIXREF_RECORD)){ if (m.getKey().equalsIgnoreCase(CCBW2Constants.KEY_SCHOLAR_RECORD)){ gsMedia=m;
fileSize = media.getSourceFilesize(); } else { logger.warn("Source file size is not set in " + media.getKey() + ", using getSerializedSize() method"); fileSize = media.getSerializedSize(); nlmMediaBuilder.setKey(media.getKey()); nlmMediaBuilder.setSourceFilesize(nlmString.length()); nlmMediaBuilder.setContent(ByteString.copyFromUtf8(nlmString)); nlmMediaBuilder.setProvenance(provenanceBuilder); context.write(new Text(media.getKey()), new BytesWritable(nlmMediaBuilder.build().toByteArray())); } catch (AnalysisException ex) { logger.warn("cannot process PDF " + media.getSourcePath(), ex);