@Test public void testFindText() throws Exception { // http://dev.mysql.com/doc/refman/5.1/en/string-functions.html#function_locate Assert.assertEquals(0, GenericUDFUtils.findText(new Text("foobarbar"), new Text("foo"), 0)); Assert.assertEquals(3, GenericUDFUtils.findText(new Text("foobarbar"), new Text("bar"), 0)); Assert.assertEquals(-1, GenericUDFUtils.findText(new Text("foobarbar"), new Text("xbar"), 0)); Assert.assertEquals(6, GenericUDFUtils.findText(new Text("foobarbar"), new Text("bar"), 5)); Assert.assertEquals(6, GenericUDFUtils.findText(new Text("foobarbar"), new Text("bar"), 5)); Assert.assertEquals(6, GenericUDFUtils.findText(new Text("foobarbar"), new Text("bar"), 6)); Assert.assertEquals(-1, GenericUDFUtils.findText(new Text("foobarbar"), new Text("bar"), 7)); Assert.assertEquals(-1, GenericUDFUtils.findText(new Text("foobarbar"), new Text("bar"), 10)); Assert.assertEquals(-1, GenericUDFUtils.findText(new Text(""), new Text("bar"), 0)); Assert.assertEquals(0, GenericUDFUtils.findText(new Text(""), new Text(""), 0)); Assert.assertEquals(0, GenericUDFUtils.findText(new Text("foobar"), new Text(""), 0)); Assert.assertEquals(0, GenericUDFUtils.findText(new Text("foobar"), new Text(""), 6)); Assert.assertEquals(-1, GenericUDFUtils.findText(new Text("foobar"), new Text(""), 7)); //Unicode case. Assert.assertEquals(4, GenericUDFUtils.findText(new Text("НАСТРОЕние"), new Text("Р"), 0)); Assert.assertEquals(15, GenericUDFUtils.findText(new Text("НАСТРОЕние НАСТРОЕние"), new Text("Р"), 11)); //surrogate pair case Assert.assertEquals(3, GenericUDFUtils.findText(new Text("123\uD801\uDC00456"), new Text("\uD801\uDC00"), 0)); Assert.assertEquals(4, GenericUDFUtils.findText(new Text("123\uD801\uDC00456"), new Text("4"), 0)); } }
@Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length < 2) { throw new UDFArgumentException( "The function FIELD(str, str1, str2, ...) needs at least two arguments."); } argumentOIs = arguments; for (int i = 0; i < arguments.length; i++) { Category category = arguments[i].getCategory(); if (category != Category.PRIMITIVE) { throw new UDFArgumentTypeException(i, "The " + GenericUDFUtils.getOrdinal(i + 1) + " argument of function FIELD is expected to a " + Category.PRIMITIVE.toString().toLowerCase() + " type, but " + category.toString().toLowerCase() + " is found"); } } return PrimitiveObjectInspectorFactory.writableIntObjectInspector; }
public Text evaluate(Text s) { if (s == null) { return null; } // set() will only allocate memory if the buffer of result is smaller than // s.getLength() and will never resize the buffer down. result.set(s); // Now do an in-place reversal in result.getBytes(). First, reverse every // character, then reverse the whole string. byte[] data = result.getBytes(); int prev = 0; // The index where the current char starts for (int i = 1; i < result.getLength(); i++) { if (GenericUDFUtils.isUtfStartByte(data[i])) { reverse(data, prev, i - 1); prev = i; } } reverse(data, prev, result.getLength() - 1); reverse(data, 0, result.getLength() - 1); return result; } }
return colTi; // Handled later, only struct will be supported. TypeInfo opTi = GenericUDFUtils.deriveInType(children); if (opTi == null || opTi.getCategory() != Category.PRIMITIVE) { throw new HiveException("Cannot vectorize IN() - common type is " + opTi);
public static TypeInfo deriveInType(List<ExprNodeDesc> children) { TypeInfo returnType = null; for (ExprNodeDesc node : children) { TypeInfo ti = node.getTypeInfo(); if (ti.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo)ti).getPrimitiveCategory() == PrimitiveCategory.VOID) { continue; } if (returnType == null) { returnType = ti; continue; } if (returnType == ti) continue; TypeInfo commonTypeInfo = FunctionRegistry.getCommonClass(returnType, ti); if (commonTypeInfo == null) return null; returnType = updateCommonTypeForDecimal(commonTypeInfo, ti, returnType); } return returnType; }
return colTi; // Handled later, only struct will be supported. TypeInfo opTi = GenericUDFUtils.deriveInType(children); if (opTi == null || opTi.getCategory() != Category.PRIMITIVE) { throw new HiveException("Cannot vectorize IN() - common type is " + opTi);
commonTypeInfo = updateCommonTypeForDecimal(commonTypeInfo, oiTypeInfo, rTypeInfo);
@Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length < 2) { throw new UDFArgumentException( "The function FIELD(str, str1, str2, ...) needs at least two arguments."); } argumentOIs = arguments; for (int i = 0; i < arguments.length; i++) { Category category = arguments[i].getCategory(); if (category != Category.PRIMITIVE) { throw new UDFArgumentTypeException(i, "The " + GenericUDFUtils.getOrdinal(i + 1) + " argument of function FIELD is expected to a " + Category.PRIMITIVE.toString().toLowerCase() + " type, but " + category.toString().toLowerCase() + " is found"); } } return PrimitiveObjectInspectorFactory.writableIntObjectInspector; }
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (arguments[0].get() == null || arguments[1].get() == null) { return null; } Text text = (Text) converters[0].convert(arguments[0].get()); Text subtext = (Text) converters[1].convert(arguments[1].get()); intWritable.set(GenericUDFUtils.findText(text, subtext, 0) + 1); return intWritable; }
public Text evaluate(Text s) { if (s == null) { return null; } // set() will only allocate memory if the buffer of result is smaller than // s.getLength() and will never resize the buffer down. result.set(s); // Now do an in-place reversal in result.getBytes(). First, reverse every // character, then reverse the whole string. byte[] data = result.getBytes(); int prev = 0; // The index where the current char starts for (int i = 1; i < result.getLength(); i++) { if (GenericUDFUtils.isUtfStartByte(data[i])) { reverse(data, prev, i - 1); prev = i; } } reverse(data, prev, result.getLength() - 1); reverse(data, 0, result.getLength() - 1); return result; } }
@Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 2) { throw new UDFArgumentLengthException( "The function INSTR accepts exactly 2 arguments."); } for (int i = 0; i < arguments.length; i++) { Category category = arguments[i].getCategory(); if (category != Category.PRIMITIVE) { throw new UDFArgumentTypeException(i, "The " + GenericUDFUtils.getOrdinal(i + 1) + " argument of function INSTR is expected to a " + Category.PRIMITIVE.toString().toLowerCase() + " type, but " + category.toString().toLowerCase() + " is found"); } } converters = new ObjectInspectorConverters.Converter[arguments.length]; for (int i = 0; i < arguments.length; i++) { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector); } return PrimitiveObjectInspectorFactory.writableIntObjectInspector; }
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (arguments[0].get() == null || arguments[1].get() == null) { return null; } Text text = (Text) converters[0].convert(arguments[0].get()); Text subtext = (Text) converters[1].convert(arguments[1].get()); intWritable.set(GenericUDFUtils.findText(text, subtext, 0) + 1); return intWritable; }
if (GenericUDFUtils.isUtfStartByte(data[i])) { len++;
@Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 2) { throw new UDFArgumentLengthException( "The function INSTR accepts exactly 2 arguments."); } for (int i = 0; i < arguments.length; i++) { Category category = arguments[i].getCategory(); if (category != Category.PRIMITIVE) { throw new UDFArgumentTypeException(i, "The " + GenericUDFUtils.getOrdinal(i + 1) + " argument of function INSTR is expected to a " + Category.PRIMITIVE.toString().toLowerCase() + " type, but " + category.toString().toLowerCase() + " is found"); } } converters = new ObjectInspectorConverters.Converter[arguments.length]; for (int i = 0; i < arguments.length; i++) { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector); } return PrimitiveObjectInspectorFactory.writableIntObjectInspector; }
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (arguments[0].get() == null || arguments[1].get() == null) { return null; } Text subtext = (Text) converters[0].convert(arguments[0].get()); Text text = (Text) converters[1].convert(arguments[1].get()); int start = 1; if (arguments.length == 3) { IntWritable startWritable = (IntWritable) converters[2] .convert(arguments[2].get()); if (startWritable == null) { intWritable.set(0); return intWritable; } start = startWritable.get(); } intWritable.set(GenericUDFUtils.findText(text, subtext, start - 1) + 1); return intWritable; }
if (GenericUDFUtils.isUtfStartByte(data[i])) { len++;
@Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 2 && arguments.length != 3) { throw new UDFArgumentLengthException( "The function LOCATE accepts exactly 2 or 3 arguments."); } for (int i = 0; i < arguments.length; i++) { Category category = arguments[i].getCategory(); if (category != Category.PRIMITIVE) { throw new UDFArgumentTypeException(i, "The " + GenericUDFUtils.getOrdinal(i + 1) + " argument of function LOCATE is expected to a " + Category.PRIMITIVE.toString().toLowerCase() + " type, but " + category.toString().toLowerCase() + " is found"); } } converters = new ObjectInspectorConverters.Converter[arguments.length]; for (int i = 0; i < arguments.length; i++) { if (i == 0 || i == 1) { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector); } else if (i == 2) { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableIntObjectInspector); } } return PrimitiveObjectInspectorFactory.writableIntObjectInspector; }
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (arguments[0].get() == null || arguments[1].get() == null) { return null; } Text subtext = (Text) converters[0].convert(arguments[0].get()); Text text = (Text) converters[1].convert(arguments[1].get()); int start = 1; if (arguments.length == 3) { IntWritable startWritable = (IntWritable) converters[2] .convert(arguments[2].get()); if (startWritable == null) { intWritable.set(0); return intWritable; } start = startWritable.get(); } intWritable.set(GenericUDFUtils.findText(text, subtext, start - 1) + 1); return intWritable; }
if (GenericUDFUtils.isUtfStartByte(data[i])) { len++;
@Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length < 2) { throw new UDFArgumentLengthException( "The function ELT(N,str1,str2,str3,...) needs at least two arguments."); } for (int i = 0; i < arguments.length; i++) { Category category = arguments[i].getCategory(); if (category != Category.PRIMITIVE) { throw new UDFArgumentTypeException(i, "The " + GenericUDFUtils.getOrdinal(i + 1) + " argument of function ELT is expected to a " + Category.PRIMITIVE.toString().toLowerCase() + " type, but " + category.toString().toLowerCase() + " is found"); } } converters = new ObjectInspectorConverters.Converter[arguments.length]; for (int i = 0; i < arguments.length; i++) { if (i == 0) { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableIntObjectInspector); } else { converters[i] = ObjectInspectorConverters.getConverter(arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector); } } return PrimitiveObjectInspectorFactory.writableStringObjectInspector; }