ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength()); ts = parsed.getTimestamp(ts); cellVisibilityExpr = parsed.getCellVisibility(); ttl = parsed.getCellTTL(); tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl))); for (int i = 0; i < parsed.getColumnCount(); i++) { if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); KeyValue kv = KeyValueUtil.ensureKeyValue(cell); kvs.add(kv);
public ParsedLine parse(byte[] lineBytes, int length) throws BadTsvLineException { // Enumerate separator offsets ArrayList<Integer> tabOffsets = new ArrayList<>(maxColumnCount); for (int i = 0; i < length; i++) { if (lineBytes[i] == separatorByte) { tabOffsets.add(i); } } if (tabOffsets.isEmpty()) { throw new BadTsvLineException("No delimiter"); } tabOffsets.add(length); if (tabOffsets.size() > maxColumnCount) { throw new BadTsvLineException("Excessive columns"); } else if (tabOffsets.size() <= getRowKeyColumnIndex()) { throw new BadTsvLineException("No row key"); } else if (hasTimestamp() && tabOffsets.size() <= getTimestampKeyColumnIndex()) { throw new BadTsvLineException("No timestamp"); } else if (hasAttributes() && tabOffsets.size() <= getAttributesKeyColumnIndex()) { throw new BadTsvLineException("No attributes specified"); } else if (hasCellVisibility() && tabOffsets.size() <= getCellVisibilityColumnIndex()) { throw new BadTsvLineException("No cell visibility specified"); } else if (hasCellTTL() && tabOffsets.size() <= getCellTTLColumnIndex()) { throw new BadTsvLineException("No cell TTL specified"); } return new ParsedLine(tabOffsets, lineBytes); }
public String[] getIndividualAttributes() { String attributes = getAttributes(); if (attributes != null) { return attributes.split(DEFAULT_MULTIPLE_ATTRIBUTES_SEPERATOR); } else { return null; } }
ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength()); ts = parsed.getTimestamp(ts); cellVisibilityExpr = parsed.getCellVisibility(); ttl = parsed.getCellTTL(); tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl))); for (int i = 0; i < parsed.getColumnCount(); i++) { if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); KeyValue kv = KeyValueUtil.ensureKeyValue(cell); kvs.add(kv);
ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength()); ts = parsed.getTimestamp(ts); cellVisibilityExpr = parsed.getCellVisibility(); ttl = parsed.getCellTTL(); tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl))); for (int i = 0; i < parsed.getColumnCount(); i++) { if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); KeyValue kv = KeyValueUtil.ensureKeyValue(cell); kvs.add(kv);
ImmutableBytesWritable rowKey = new ImmutableBytesWritable(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength()); ts = parsed.getTimestamp(ts); cellVisibilityExpr = parsed.getCellVisibility(); ttl = parsed.getCellTTL(); for (int i = 0; i < parsed.getColumnCount(); i++) { if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex()
ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, line.getLength()); ts = parsed.getTimestamp(ts); cellVisibilityExpr = parsed.getCellVisibility(); ttl = parsed.getCellTTL(); for (int i = 0; i < parsed.getColumnCount(); i++) { if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() tags.add(new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl))); Cell cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); KeyValue kv = KeyValueUtil.ensureKeyValueTypeForMR(cell); kvs.add(kv);
ImmutableBytesWritable rowKey = new ImmutableBytesWritable(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength()); ts = parsed.getTimestamp(ts); for (int i = 0; i < parsed.getColumnCount(); i++) { if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex()) { lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); put.add(kv);
ImmutableBytesWritable rowKey = new ImmutableBytesWritable(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength()); ts = parsed.getTimestamp(ts); cellVisibilityExpr = parsed.getCellVisibility(); ttl = parsed.getCellTTL(); for (int i = 0; i < parsed.getColumnCount(); i++) { if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns && parsed.getColumnLength(i) == 0)) { continue;
ImmutableBytesWritable rowKey = new ImmutableBytesWritable(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength()); ts = parsed.getTimestamp(ts); cellVisibilityExpr = parsed.getCellVisibility(); ttl = parsed.getCellTTL(); for (int i = 0; i < parsed.getColumnCount(); i++) { if (i == parser.getRowKeyColumnIndex() || i == parser.getTimestampKeyColumnIndex() || i == parser.getAttributesKeyColumnIndex() || i == parser.getCellVisibilityColumnIndex() || i == parser.getCellTTLColumnIndex() || (skipEmptyColumns && parsed.getColumnLength(i) == 0)) { continue;
Cell cell = null; if (hfileOutPath == null) { cell = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (cellVisibilityExpr != null) { tags.add(new Tag(TagType.TTL_TAG_TYPE, Bytes.toBytes(ttl))); cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags);
public ParsedLine parse(byte[] lineBytes, int length) throws BadTsvLineException { // Enumerate separator offsets ArrayList<Integer> tabOffsets = new ArrayList<>(maxColumnCount); for (int i = 0; i < length; i++) { if (lineBytes[i] == separatorByte) { tabOffsets.add(i); } } if (tabOffsets.isEmpty()) { throw new BadTsvLineException("No delimiter"); } tabOffsets.add(length); if (tabOffsets.size() > maxColumnCount) { throw new BadTsvLineException("Excessive columns"); } else if (tabOffsets.size() <= getRowKeyColumnIndex()) { throw new BadTsvLineException("No row key"); } else if (hasTimestamp() && tabOffsets.size() <= getTimestampKeyColumnIndex()) { throw new BadTsvLineException("No timestamp"); } else if (hasAttributes() && tabOffsets.size() <= getAttributesKeyColumnIndex()) { throw new BadTsvLineException("No attributes specified"); } else if (hasCellVisibility() && tabOffsets.size() <= getCellVisibilityColumnIndex()) { throw new BadTsvLineException("No cell visibility specified"); } else if (hasCellTTL() && tabOffsets.size() <= getCellTTLColumnIndex()) { throw new BadTsvLineException("No cell TTL specified"); } return new ParsedLine(tabOffsets, lineBytes); }
public ParsedLine parse(byte[] lineBytes, int length) throws BadTsvLineException { // Enumerate separator offsets ArrayList<Integer> tabOffsets = new ArrayList<Integer>(maxColumnCount); for (int i = 0; i < length; i++) { if (lineBytes[i] == separatorByte) { tabOffsets.add(i); } } if (tabOffsets.isEmpty()) { throw new BadTsvLineException("No delimiter"); } tabOffsets.add(length); if (tabOffsets.size() > maxColumnCount) { throw new BadTsvLineException("Excessive columns"); } else if (tabOffsets.size() <= getRowKeyColumnIndex()) { throw new BadTsvLineException("No row key"); } else if (hasTimestamp() && tabOffsets.size() <= getTimestampKeyColumnIndex()) { throw new BadTsvLineException("No timestamp"); } else if (hasAttributes() && tabOffsets.size() <= getAttributesKeyColumnIndex()) { throw new BadTsvLineException("No attributes specified"); } else if (hasCellVisibility() && tabOffsets.size() <= getCellVisibilityColumnIndex()) { throw new BadTsvLineException("No cell visibility specified"); } else if (hasCellTTL() && tabOffsets.size() <= getCellTTLColumnIndex()) { throw new BadTsvLineException("No cell TTL specified"); } return new ParsedLine(tabOffsets, lineBytes); }
public ParsedLine parse(byte[] lineBytes, int length) throws BadTsvLineException { // Enumerate separator offsets ArrayList<Integer> tabOffsets = new ArrayList<>(maxColumnCount); for (int i = 0; i < length; i++) { if (lineBytes[i] == separatorByte) { tabOffsets.add(i); } } if (tabOffsets.isEmpty()) { throw new BadTsvLineException("No delimiter"); } tabOffsets.add(length); if (tabOffsets.size() > maxColumnCount) { throw new BadTsvLineException("Excessive columns"); } else if (tabOffsets.size() <= getRowKeyColumnIndex()) { throw new BadTsvLineException("No row key"); } else if (hasTimestamp() && tabOffsets.size() <= getTimestampKeyColumnIndex()) { throw new BadTsvLineException("No timestamp"); } else if (hasAttributes() && tabOffsets.size() <= getAttributesKeyColumnIndex()) { throw new BadTsvLineException("No attributes specified"); } else if (hasCellVisibility() && tabOffsets.size() <= getCellVisibilityColumnIndex()) { throw new BadTsvLineException("No cell visibility specified"); } else if (hasCellTTL() && tabOffsets.size() <= getCellTTLColumnIndex()) { throw new BadTsvLineException("No cell TTL specified"); } return new ParsedLine(tabOffsets, lineBytes); }
protected void populatePut(byte[] lineBytes, ImportTsv.TsvParser.ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { Cell cell = null; if (hfileOutPath == null) { cell = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (cellVisibilityExpr != null) { // We won't be validating the expression here. The Visibility CP will do // the validation put.setCellVisibility(new CellVisibility(cellVisibilityExpr)); } if (ttl > 0) { put.setTTL(ttl); } } else { // Creating the KV which needs to be directly written to HFiles. Using the Facade // KVCreator for creation of kvs. cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); } put.add(cell); } }
protected void populatePut(byte[] lineBytes, ImportTsv.TsvParser.ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { Cell cell = null; if (hfileOutPath == null) { cell = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (cellVisibilityExpr != null) { // We won't be validating the expression here. The Visibility CP will do // the validation put.setCellVisibility(new CellVisibility(cellVisibilityExpr)); } if (ttl > 0) { put.setTTL(ttl); } } else { // Creating the KV which needs to be directly written to HFiles. Using the Facade // KVCreator for creation of kvs. cell = this.kvCreator.create(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i), tags); } put.add(cell); } }
public ParsedLine parse(byte[] lineBytes, int length) throws BadTsvLineException { // Enumerate separator offsets ArrayList<Integer> tabOffsets = new ArrayList<Integer>(maxColumnCount); for (int i = 0; i < length; i++) { if (lineBytes[i] == separatorByte) { tabOffsets.add(i); } } if (tabOffsets.isEmpty()) { throw new BadTsvLineException("No delimiter"); } tabOffsets.add(length); if (tabOffsets.size() > maxColumnCount) { throw new BadTsvLineException("Excessive columns"); } else if (tabOffsets.size() <= getRowKeyColumnIndex()) { throw new BadTsvLineException("No row key"); } else if (hasTimestamp() && tabOffsets.size() <= getTimestampKeyColumnIndex()) { throw new BadTsvLineException("No timestamp"); } return new ParsedLine(tabOffsets, lineBytes); }
@Override protected void populatePut(byte[] lineBytes, ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { KeyValue kv; kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (parsed.getIndividualAttributes() != null) { String[] attributes = parsed.getIndividualAttributes(); for (String attr : attributes) { String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR); if (split == null || split.length <= 1) { throw new BadTsvLineException(msg(attributes)); } else { if (split[0].length() <= 0 || split[1].length() <= 0) { throw new BadTsvLineException(msg(attributes)); } put.setAttribute(split[0], Bytes.toBytes(split[1])); } } } put.add(kv); }
@Override protected void populatePut(byte[] lineBytes, ParsedLine parsed, Put put, int i) throws BadTsvLineException, IOException { KeyValue kv; kv = new KeyValue(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength(), parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); if (parsed.getIndividualAttributes() != null) { String[] attributes = parsed.getIndividualAttributes(); for (String attr : attributes) { String[] split = attr.split(ImportTsv.DEFAULT_ATTRIBUTES_SEPERATOR); if (split == null || split.length <= 1) { throw new BadTsvLineException(msg(attributes)); } else { if (split[0].length() <= 0 || split[1].length() <= 0) { throw new BadTsvLineException(msg(attributes)); } put.setAttribute(split[0], Bytes.toBytes(split[1])); } } } put.add(kv); }
@Test public void testTsvParserWithTimestamp() throws BadTsvLineException { TsvParser parser = new TsvParser("HBASE_ROW_KEY,HBASE_TS_KEY,col_a,", "\t"); assertNull(parser.getFamily(0)); assertNull(parser.getQualifier(0)); assertNull(parser.getFamily(1)); assertNull(parser.getQualifier(1)); assertBytesEquals(Bytes.toBytes("col_a"), parser.getFamily(2)); assertBytesEquals(HConstants.EMPTY_BYTE_ARRAY, parser.getQualifier(2)); assertEquals(0, parser.getRowKeyColumnIndex()); assertEquals(1, parser.getTimestampKeyColumnIndex()); byte[] line = Bytes.toBytes("rowkey\t1234\tval_a"); ParsedLine parsed = parser.parse(line, line.length); assertEquals(1234l, parsed.getTimestamp(-1)); checkParsing(parsed, Splitter.on("\t").split(Bytes.toString(line))); }