public StringHelper(PrimitiveCategory type) throws UDFArgumentException { this.type = type; switch (type) { case STRING: returnValue = new Text(); break; case CHAR: returnValue = new HiveCharWritable(); break; case VARCHAR: returnValue = new HiveVarcharWritable(); break; default: throw new UDFArgumentException("Unexpected non-string type " + type); } }
public void set(HiveVarcharWritable val, int maxLength) { set(val.getHiveVarchar(), maxLength); }
@Override public void init(ByteArrayRef bytes, int start, int length) { if (oi.isEscaped()) { Text textData = data.getTextValue(); // This is doing a lot of copying here, this could be improved by enforcing length // at the same time as escaping rather than as separate steps. LazyUtils.copyAndEscapeStringDataToText(bytes.getData(), start, length, oi.getEscapeChar(),textData); data.set(textData.toString(), maxLength); isNull = false; } else { try { String byteData = null; byteData = Text.decode(bytes.getData(), start, length); data.set(byteData, maxLength); isNull = false; } catch (CharacterCodingException e) { isNull = true; LOG.debug("Data not in the HiveVarchar data type range so converted to null.", e); } } }
public void enforceMaxLength(int maxLength) { if (getCharacterLength() > maxLength) { set(value.toString(), maxLength); } }
private HiveVarcharWritable getWritableWithParams(HiveVarcharWritable val) { HiveVarcharWritable newValue = new HiveVarcharWritable(); newValue.set(val, getMaxLength()); return newValue; }
return null; Text t = new Text(); return null; case BOOLEAN: t.set(((BooleanObjectInspector) inputOI).get(input) ? trueBytes : falseBytes); return t; .getStrippedValue()); .toString()); t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString()); return t; .getPrimitiveWritableObject(input).toString()); return t; case BINARY: if (binaryOI.preferWritable()) { BytesWritable bytes = binaryOI.getPrimitiveWritableObject(input); t.set(bytes.getBytes(), 0, bytes.getLength()); t.set(((HiveDecimalObjectInspector) inputOI).getPrimitiveWritableObject(input).toString()); return t; default:
@Test public void testCharVarcharArgs() throws HiveException { GenericUDFPrintf udf = new GenericUDFPrintf(); ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableStringObjectInspector, PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getCharTypeInfo(5)), PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getVarcharTypeInfo(7)) }; HiveCharWritable argChar = new HiveCharWritable(); argChar.set("hello"); HiveVarcharWritable argVarchar = new HiveVarcharWritable(); argVarchar.set("world"); DeferredObject[] args = { new DeferredJavaObject(new Text("1st: %s, 2nd: %s")), new DeferredJavaObject(argChar), new DeferredJavaObject(argVarchar) }; PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs); Assert.assertEquals(PrimitiveObjectInspectorFactory.writableStringObjectInspector, oi); Text res = (Text) udf.evaluate(args); Assert.assertEquals("1st: hello, 2nd: world", res.toString()); }
return new DoubleWritable(0); case BINARY: return new BytesWritable(ArrayUtils.EMPTY_BYTE_ARRAY); case STRING: return new Text(ArrayUtils.EMPTY_BYTE_ARRAY); case VARCHAR: return new HiveVarcharWritable(new HiveVarchar(StringUtils.EMPTY, -1)); case CHAR: return new HiveCharWritable(new HiveChar(StringUtils.EMPTY, -1)); case DECIMAL: return new HiveDecimalWritable(); case INTERVAL_YEAR_MONTH: return new HiveIntervalYearMonthWritable();
private Writable getWritableValue(TypeInfo ti, byte[] value) { if (ti.equals(TypeInfoFactory.stringTypeInfo)) { return new Text(value); } else if (ti.equals(TypeInfoFactory.varcharTypeInfo)) { return new HiveVarcharWritable( new HiveVarchar(new Text(value).toString(), -1)); } else if (ti.equals(TypeInfoFactory.binaryTypeInfo)) { return new BytesWritable(value); } return null; }
public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { BinarySortableReadHiveVarcharResults binarySortableReadHiveVarcharResults = (BinarySortableReadHiveVarcharResults) readHiveVarcharResults; if (!binarySortableReadHiveVarcharResults.isInit()) { binarySortableReadHiveVarcharResults.init((VarcharTypeInfo) primitiveTypeInfos[fieldIndex]); } HiveVarcharWritable hiveVarcharWritable = binarySortableReadHiveVarcharResults.getHiveVarcharWritable(); // Decode the bytes into our Text buffer, then truncate. BinarySortableSerDe.deserializeText(inputByteBuffer, columnSortOrderIsDesc[fieldIndex], hiveVarcharWritable.getTextValue()); hiveVarcharWritable.enforceMaxLength(binarySortableReadHiveVarcharResults.getMaxLength()); readHiveVarcharResults.bytes = hiveVarcharWritable.getTextValue().getBytes(); readHiveVarcharResults.start = 0; readHiveVarcharResults.length = hiveVarcharWritable.getTextValue().getLength(); }
HiveDecimalWritable vDecimal = ((HiveDecimalObjectInspector)inputOI). getPrimitiveWritableObject(parameters[0]); int startIdx = vDecimal.toBytes(scratchBuffer); bf.addBytes(scratchBuffer, startIdx, scratchBuffer.length - startIdx); break; DateWritable vDate = ((DateObjectInspector)inputOI). getPrimitiveWritableObject(parameters[0]); bf.addLong(vDate.getDays()); break; case TIMESTAMP: case CHAR: Text vChar = ((HiveCharObjectInspector)inputOI). getPrimitiveWritableObject(parameters[0]).getStrippedValue(); bf.addBytes(vChar.getBytes(), 0, vChar.getLength()); break; case VARCHAR: Text vVarChar = ((HiveVarcharObjectInspector)inputOI). getPrimitiveWritableObject(parameters[0]).getTextValue(); bf.addBytes(vVarChar.getBytes(), 0, vVarChar.getLength()); break; case STRING:
Text t; if (value instanceof String) { t = new Text((String) value); } else { t = ((WritableStringObjectInspector) outputOI).getPrimitiveWritableObject(value); bv.setVal(i, t.getBytes(), 0, t.getLength()); } else if (outputOI instanceof WritableHiveCharObjectInspector) { WritableHiveCharObjectInspector writableHiveCharObjectOI = (WritableHiveCharObjectInspector) outputOI; hiveCharWritable = writableHiveCharObjectOI.getPrimitiveWritableObject(value); Text t = hiveCharWritable.getTextValue(); hiveVarcharWritable = writableHiveVarcharObjectOI.getPrimitiveWritableObject(value); Text t = hiveVarcharWritable.getTextValue(); StringExpr.truncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength); ts = ((WritableDateObjectInspector) outputOI).getPrimitiveJavaObject(value); long l = DateWritable.dateToDays(ts); lv.vector[i] = l; } else if (outputOI instanceof WritableBooleanObjectInspector) {
Text r = reuse == null ? new Text() : (Text) reuse; return deserializeText(buffer, invert, r); reuse == null ? new HiveCharWritable() : (HiveCharWritable) reuse; deserializeText(buffer, invert, r.getTextValue()); r.enforceMaxLength(getCharacterMaxLength(type)); return r; reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse; deserializeText(buffer, invert, r.getTextValue()); r.enforceMaxLength(getCharacterMaxLength(type)); return r; BytesWritable bw = new BytesWritable() ; bw.set(buffer.getData(), start, length); } else { bw.set(buffer.getData(), start, length); HiveDecimalWritable bdw = (reuse == null ? new HiveDecimalWritable() : (HiveDecimalWritable) reuse); bdw.set(bd); return bdw;
private Object parseAsPrimitive(JsonNode value, TypeInfo typeInfo) throws SerDeException { switch (TypeInfoFactory.getPrimitiveTypeInfo(typeInfo.getTypeName()).getPrimitiveCategory()) { case TIMESTAMP: TimestampWritable timestampWritable = new TimestampWritable(); timestampWritable.setTime(TS_PARSER.get().parseMillis(value.textValue())); return timestampWritable; return (new DoubleWritable(value.doubleValue())); case DECIMAL: return (new HiveDecimalWritable(HiveDecimal.create(value.decimalValue()))); case CHAR: return (new HiveCharWritable(new HiveChar(value.textValue(), ((CharTypeInfo) typeInfo).getLength()))); case VARCHAR: return (new HiveVarcharWritable(new HiveVarchar(value.textValue(), ((CharTypeInfo) typeInfo).getLength()))); case STRING: return (new Text(value.textValue())); case BOOLEAN: return (new BooleanWritable(value.isBoolean() ? value.booleanValue() : Boolean.valueOf(value.textValue())));
return new LongWritable(0); case TIMESTAMP: return new TimestampWritable(new Timestamp(0)); case DATE: return new DateWritable(new Date(0)); case FLOAT: return new FloatWritable(0); return new DoubleWritable(0); case BINARY: return new BytesWritable(ArrayUtils.EMPTY_BYTE_ARRAY); case STRING: return new Text(ArrayUtils.EMPTY_BYTE_ARRAY); case VARCHAR: return new HiveVarcharWritable(new HiveVarchar(StringUtils.EMPTY, -1)); case CHAR: return new HiveCharWritable(new HiveChar(StringUtils.EMPTY, -1)); case DECIMAL: return new HiveDecimalWritable(); case INTERVAL_YEAR_MONTH: return new HiveIntervalYearMonthWritable();
case TIMESTAMP: ((TimestampColumnVector) batch.cols[projectionColumnNum]).set( batchIndex, ((TimestampWritable) object).getTimestamp()); break; case DATE: ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = ((DateWritable) object).getDays(); break; case FLOAT: batchIndex, bw.getBytes(), 0, bw.getLength()); batchIndex, tw.getBytes(), 0, tw.getLength()); hiveVarchar = (HiveVarchar) object; } else { hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar(); hiveChar = (HiveChar) object; } else { hiveChar = ((HiveCharWritable) object).getHiveChar();
byte[] bytes = value.getBytes(); int byteLength = value.getLength(); serializeWrite.writeString(bytes, 0, byteLength); case CHAR: HiveChar value = ((HiveCharWritable) object).getHiveChar(); serializeWrite.writeHiveChar(value); case VARCHAR: HiveVarchar value = ((HiveVarcharWritable) object).getHiveVarchar(); serializeWrite.writeHiveVarchar(value); case DECIMAL: HiveDecimal value = ((HiveDecimalWritable) object).getHiveDecimal(); DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)primitiveTypeInfo; serializeWrite.writeHiveDecimal(value, decTypeInfo.scale()); byte[] binaryBytes = byteWritable.getBytes(); int length = byteWritable.getLength(); serializeWrite.writeBinary(binaryBytes, 0, length);
@Test public void testTrue() throws HiveException { udf = new GenericUDFLikeAny(); ObjectInspector valueOIOne = PrimitiveObjectInspectorFactory.writableStringObjectInspector; ObjectInspector valueOITwo = PrimitiveObjectInspectorFactory.writableStringObjectInspector; ObjectInspector valueOIThree = PrimitiveObjectInspectorFactory.writableStringObjectInspector; ObjectInspector[] arguments = { valueOIOne, valueOITwo, valueOIThree }; udf.initialize(arguments); DeferredJavaObject valueObjOne = new DeferredJavaObject(new Text("abc")); DeferredJavaObject valueObjTwo = new DeferredJavaObject(new Text("%b%")); HiveVarchar vc = new HiveVarchar(); vc.setValue("a%"); GenericUDF.DeferredJavaObject[] args = { valueObjOne, valueObjTwo, new GenericUDF.DeferredJavaObject(new HiveVarcharWritable(vc)) }; BooleanWritable output = (BooleanWritable) udf.evaluate(args); assertEquals(true, output.get()); }
return null; case BOOLEAN: t.set(((BooleanObjectInspector) inputOI).get(input) ? trueBytes : falseBytes); return t; t.set(((HiveCharObjectInspector) inputOI).getPrimitiveWritableObject(input) .getStrippedValue()); } else { t.set(((HiveCharObjectInspector) inputOI).getPrimitiveJavaObject(input).getStrippedValue()); if (inputOI.preferWritable()) { t.set(((HiveVarcharObjectInspector) inputOI).getPrimitiveWritableObject(input) .toString()); } else { t.set(((HiveVarcharObjectInspector) inputOI).getPrimitiveJavaObject(input).toString()); if (binaryOI.preferWritable()) { BytesWritable bytes = binaryOI.getPrimitiveWritableObject(input); t.set(bytes.getBytes(), 0, bytes.getLength()); } else { t.set(binaryOI.getPrimitiveJavaObject(input));
ByteBuffer b = Text.encode(String.valueOf(f)); out.write(b.array(), 0, b.limit()); break; ByteBuffer b = Text.encode(String.valueOf(d)); out.write(b.array(), 0, b.limit()); break; writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape); break; Text t = hc.getPaddedValue(); writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape); Text t = hc.getTextValue(); writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, needsEscape); byte[] toEncode = new byte[bw.getLength()]; System.arraycopy(bw.getBytes(), 0,toEncode, 0, bw.getLength()); byte[] toWrite = Base64.encodeBase64(toEncode); out.write(toWrite, 0, toWrite.length);