@Override public void copyToNewInstance(Object newInstance) throws UDFArgumentException { super.copyToNewInstance(newInstance); if (_stdUdf == null) { return; } StdUdfWrapper newWrapper = (StdUdfWrapper) newInstance; newWrapper._inputObjectInspectors = _inputObjectInspectors; newWrapper._stdFactory = _stdFactory; newWrapper._stdUdf = _stdUdf; newWrapper._nullableArguments = _stdUdf.getAndCheckNullableArguments(); newWrapper._stdUdf.init(_stdFactory); newWrapper._requiredFilesProcessed = false; newWrapper.createStdData(); }
private synchronized void processRequiredFiles(String[] requiredFiles) { if (!_requiredFilesProcessed) { _stdUdf.processRequiredFiles(Arrays.stream(requiredFiles) .map(path -> Path.getPathWithoutSchemeAndAuthority(new Path(path)).toString()) .toArray(String[]::new)); _requiredFilesProcessed = true; } } }
/** Returns an array of booleans indicating if any input argument is nullable and also verifies its length */ public final boolean[] getAndCheckNullableArguments() { boolean[] nullableArguments = getNullableArguments(); if (nullableArguments.length != numberOfArguments()) { throw new RuntimeException( "Unexpected number of nullable arguments. Expected:" + numberOfArguments() + " Received:" + nullableArguments.length); } return nullableArguments; }
@VisibleForTesting static List<TypeVariableConstraint> getTypeVariableConstraintsForStdUdf(StdUDF stdUdf) { Set<GenericTypeSignatureElement> genericTypes = new HashSet<>(); for (String s : stdUdf.getInputParameterSignatures()) { genericTypes.addAll(com.linkedin.transport.typesystem.TypeSignature.parse(s).getGenericTypeSignatureElements()); } genericTypes.addAll(com.linkedin.transport.typesystem.TypeSignature.parse(stdUdf.getOutputParameterSignature()) .getGenericTypeSignatureElements()); return genericTypes.stream().map(t -> typeVariable(t.toString())).collect(Collectors.toList()); }
/** * Returns an array of booleans indicating if any input argument is nullable. * * Nullable arguments are arguments that can receive a null value. For a nullable argument, the user must explicitly * handle null values in their implementation. For a non-nullable argument, the UDF returns null if the argument * is null. The length of the returned array should be equal to the number of input arguments. Defaults to all * arguments being non-nullable. */ public boolean[] getNullableArguments() { return new boolean[numberOfArguments()]; }
private StdData[] wrapArguments(StdUDF stdUDF, Type[] types, Object[] arguments) { StdFactory stdFactory = stdUDF.getStdFactory(); StdData[] stdData = new StdData[arguments.length]; // TODO: Reuse wrapper objects by creating them once upon initialization and reuse them here // along the same lines of what we do in Hive implementation. // JIRA: https://jira01.corp.linkedin.com:8443/browse/LIHADOOP-34894 for (int i = 0; i < stdData.length; i++) { stdData[i] = PrestoWrapper.createStdData(arguments[i], types[i], stdFactory); } return stdData; }
protected StdUdfWrapper(StdUDF stdUDF) { super(new Signature(((TopLevelStdUDF) stdUDF).getFunctionName(), FunctionKind.SCALAR, getTypeVariableConstraintsForStdUdf(stdUDF), ImmutableList.of(), parseTypeSignature(stdUDF.getOutputParameterSignature()), stdUDF.getInputParameterSignatures() .stream() .map(TypeSignature::parseTypeSignature) .collect(Collectors.toList()), false)); _functionDescription = ((TopLevelStdUDF) stdUDF).getFunctionDescription(); }
/** * Returns an array of booleans indicating if any input argument is nullable. * * Nullable arguments are arguments that can receive a null value. For a nullable argument, the user must explicitly * handle null values in their implementation. For a non-nullable argument, the UDF returns null if the argument * is null. The length of the returned array should be equal to the number of input arguments. Defaults to all * arguments being non-nullable. */ public boolean[] getNullableArguments() { return new boolean[numberOfArguments()]; }
private StdData[] wrapArguments(StdUDF stdUDF, Type[] types, Object[] arguments) { StdFactory stdFactory = stdUDF.getStdFactory(); StdData[] stdData = new StdData[arguments.length]; // TODO: Reuse wrapper objects by creating them once upon initialization and reuse them here // along the same lines of what we do in Hive implementation. // JIRA: https://jira01.corp.linkedin.com:8443/browse/LIHADOOP-34894 for (int i = 0; i < stdData.length; i++) { stdData[i] = PrestoWrapper.createStdData(arguments[i], types[i], stdFactory); } return stdData; }
@Override public void copyToNewInstance(Object newInstance) throws UDFArgumentException { super.copyToNewInstance(newInstance); if (_stdUdf == null) { return; } StdUdfWrapper newWrapper = (StdUdfWrapper) newInstance; newWrapper._inputObjectInspectors = _inputObjectInspectors; newWrapper._stdFactory = _stdFactory; newWrapper._stdUdf = _stdUdf; newWrapper._nullableArguments = _stdUdf.getAndCheckNullableArguments(); newWrapper._stdUdf.init(_stdFactory); newWrapper._requiredFilesProcessed = false; newWrapper.createStdData(); }
protected StdUdfWrapper(StdUDF stdUDF) { super(new Signature(((TopLevelStdUDF) stdUDF).getFunctionName(), FunctionKind.SCALAR, getTypeVariableConstraintsForStdUdf(stdUDF), ImmutableList.of(), parseTypeSignature(stdUDF.getOutputParameterSignature()), stdUDF.getInputParameterSignatures() .stream() .map(TypeSignature::parseTypeSignature) .collect(Collectors.toList()), false)); _functionDescription = ((TopLevelStdUDF) stdUDF).getFunctionDescription(); }
private synchronized void processRequiredFiles() { if (!_requiredFilesProcessed) { String[] localFiles = Arrays.stream(_distributedCacheFiles).map(distributedCacheFile -> { try { return getLocalFilePath(distributedCacheFile).toString(); } catch (IOException e) { throw new RuntimeException("Failed to resolve path: [" + distributedCacheFile + "].", e); } } ).toArray(String[]::new); _stdUdf.processRequiredFiles(localFiles); _requiredFilesProcessed = true; } }
/** Returns an array of booleans indicating if any input argument is nullable and also verifies its length */ public final boolean[] getAndCheckNullableArguments() { boolean[] nullableArguments = getNullableArguments(); if (nullableArguments.length != numberOfArguments()) { throw new RuntimeException( "Unexpected number of nullable arguments. Expected:" + numberOfArguments() + " Received:" + nullableArguments.length); } return nullableArguments; }
@Override public ScalarFunctionImplementation specialize(BoundVariables boundVariables, int arity, TypeManager typeManager, FunctionRegistry functionRegistry) { StdFactory stdFactory = new PrestoFactory(boundVariables, typeManager, functionRegistry); StdUDF stdUDF = getStdUDF(); stdUDF.init(stdFactory); // Subtract a small jitter value so that refresh is triggered on first call // Do not add extra delay, if refresh time was set to lower value by an earlier specialize long initialJitter = getRefreshIntervalMillis() / JITTER_FACTOR; int initialJitterInt = initialJitter > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) initialJitter; _requiredFilesNextRefreshTime = Math.min(_requiredFilesNextRefreshTime, System.currentTimeMillis() - (new Random()).nextInt(initialJitterInt)); boolean[] nullableArguments = stdUDF.getAndCheckNullableArguments(); return new ScalarFunctionImplementation(true, getNullConventionForArguments(nullableArguments), getMethodHandle(stdUDF, typeManager, boundVariables, nullableArguments), isDeterministic()); }
@VisibleForTesting static List<TypeVariableConstraint> getTypeVariableConstraintsForStdUdf(StdUDF stdUdf) { Set<GenericTypeSignatureElement> genericTypes = new HashSet<>(); for (String s : stdUdf.getInputParameterSignatures()) { genericTypes.addAll(com.linkedin.transport.typesystem.TypeSignature.parse(s).getGenericTypeSignatureElements()); } genericTypes.addAll(com.linkedin.transport.typesystem.TypeSignature.parse(stdUdf.getOutputParameterSignature()) .getGenericTypeSignatureElements()); return genericTypes.stream().map(t -> typeVariable(t.toString())).collect(Collectors.toList()); }
private synchronized void processRequiredFiles() { if (!_requiredFilesProcessed) { String[] localFiles = Arrays.stream(_distributedCacheFiles).map(distributedCacheFile -> { try { return getLocalFilePath(distributedCacheFile).toString(); } catch (IOException e) { throw new RuntimeException("Failed to resolve path: [" + distributedCacheFile + "].", e); } } ).toArray(String[]::new); _stdUdf.processRequiredFiles(localFiles); _requiredFilesProcessed = true; } }
@Override public ScalarFunctionImplementation specialize(BoundVariables boundVariables, int arity, TypeManager typeManager, FunctionRegistry functionRegistry) { StdFactory stdFactory = new PrestoFactory(boundVariables, typeManager, functionRegistry); StdUDF stdUDF = getStdUDF(); stdUDF.init(stdFactory); // Subtract a small jitter value so that refresh is triggered on first call // Do not add extra delay, if refresh time was set to lower value by an earlier specialize long initialJitter = getRefreshIntervalMillis() / JITTER_FACTOR; int initialJitterInt = initialJitter > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) initialJitter; _requiredFilesNextRefreshTime = Math.min(_requiredFilesNextRefreshTime, System.currentTimeMillis() - (new Random()).nextInt(initialJitterInt)); boolean[] nullableArguments = stdUDF.getAndCheckNullableArguments(); return new ScalarFunctionImplementation(true, getNullConventionForArguments(nullableArguments), getMethodHandle(stdUDF, typeManager, boundVariables, nullableArguments), isDeterministic()); }
boolean atLeastOneInputParametersSignaturesBindingSuccess = false; for (StdUDF stdUdf: stdUdfImplementations) { List<String> inputParameterSignatures = stdUdf.getInputParameterSignatures(); if (inputParameterSignatures.size() != dataTypes.length) { continue; TypeSignature.parse(_stdUdf.getOutputParameterSignature()), boundVariables );
private synchronized void processRequiredFiles(StdUDF stdUDF, String[] requiredFiles) { if (_requiredFilesNextRefreshTime < System.currentTimeMillis()) { try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(getClass().getClassLoader())) { String[] copiedFiles = new String[requiredFiles.length]; FileSystemClient client = new FileSystemClient(); for (int i = 0; i < requiredFiles.length; i++) { String localFilename = client.copyToLocalFile(requiredFiles[i]); copiedFiles[i] = localFilename; } stdUDF.processRequiredFiles(copiedFiles); // Determine how many times _refreshIntervalMillis needs to be added to go above currentTimeMillis int refreshIntervalFactor = (int) Math.ceil( (System.currentTimeMillis() - _requiredFilesNextRefreshTime) / (double) getRefreshIntervalMillis()); _requiredFilesNextRefreshTime += getRefreshIntervalMillis() * Math.max(1, refreshIntervalFactor); } } }
/** * Given input schemas, this method matches them to the expected type signatures, and finds bindings to the * generic parameters. Once the generic parameter bindings are known, the method infers the output type (in the form * of an Avro schema) by substituting the binding values in the output type signature. * signature and * @param arguments Input Avro Schemas of UDF parameters. * @return Inferred output Avro Schema. */ public Schema initialize(Schema[] arguments) { AvroTypeInference avroTypeInference = new AvroTypeInference(); avroTypeInference.compile(arguments, getStdUdfImplementations(), getTopLevelUdfClass()); _inputSchemas = avroTypeInference.getInputDataTypes(); _stdFactory = avroTypeInference.getStdFactory(); _stdUdf = avroTypeInference.getStdUdf(); _nullableArguments = _stdUdf.getAndCheckNullableArguments(); _stdUdf.init(_stdFactory); _requiredFilesProcessed = false; createStdData(); return avroTypeInference.getOutputDataType(); }