public synchronized CLEvent gen_numbers(CLQueue commandQueue, CLBuffer<Integer > seeds, CLBuffer<Integer > output, int globalWorkSizes[], int localWorkSizes[], CLEvent... eventsToWaitFor) throws CLBuildException { if ((gen_numbers_kernel == null)) gen_numbers_kernel = createKernel("gen_numbers"); gen_numbers_kernel.setArgs(seeds, output); return gen_numbers_kernel.enqueueNDRange(commandQueue, globalWorkSizes, localWorkSizes, eventsToWaitFor); } /** <i>native declaration : com/nativelibs4java/opencl/util/XORShiftRandom.c</i> */
void setKernelArg(CLKernel kernel, int argIndex) { message += " (kernel name = " + kernel.getFunctionName() + ", num args = " + kernel.getNumArgs() + ", arg index = " + argIndex; CLProgram program = kernel.getProgram(); if (program != null) message += ", source = <<<\n\t" + program.getSource().replaceAll("\n", "\n\t"); message += "\n>>> )"; }
public String toString() { return getFunctionName() + " {args: " + getNumArgs() + "}";//, workGroupSize = " + getWorkGroupSize() + ", localMemSize = " + getLocalMemSize() + "}"; }
CLImage2D finalImageOut = null; for (CLKernel kernel : kernels) { setProgress("Running kernel '" + kernel.getFunctionName() + "'..."); try { kernel.setArgs(imageIn, imageOut); finalImageOut = imageOut; imageOut = imageIn; imageIn = finalImageOut; lastEvent = kernel.enqueueNDRange(queue, new int[] { width, height }, lastEvent); } catch (CLException ex) { throw new RuntimeException("Error occurred while running kernel '" + kernel.getFunctionName() + "': " + ex, ex); imageOut.release(); for (CLKernel kernel : kernels) kernel.release(); program.release(); queue.release();
public static void main(String[] args) { CLContext context = JavaCL.createBestContext(CLPlatform.DeviceFeature.GPU); System.out.println(context); int n = 128;// * 128; // Pointer<Integer> p = Pointer.allocateInts(n); for (int i = 0; i < 100000; i++) { // if ((i & 0xff) == 0xff) System.out.print("."); CLQueue queue = context.createDefaultQueue(); CLBuffer<Integer> buffer = context.createByteBuffer(CLMem.Usage.Output, 4 * n).as(Integer.class);//p); CLProgram program = context.createProgram("kernel void f(global int* input, int n) {\n" + "int i = get_global_id(0);\n" + "if (i >= n) return;\n" + "input[i] = i;\n" + "}"); CLKernel kernel = program.createKernel("f"); for (int j = 0; j < 100; j++) { kernel.setArgs(buffer, n); kernel.enqueueNDRange(queue, new int[] { n }); } queue.finish(); queue.release(); kernel.release(); program.release(); buffer.release(); } context.release(); } }
/** * Calls <a href="http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueNDRangeKernel.html">clEnqueueNDRangeKernel</a>.<br> * Enqueues a command to execute a kernel on a device (see {@link CLKernel#enqueueNDRange(CLQueue, int[], int[], int[], CLEvent[])}) * @param globalWorkSizes Each element describes the number of global work-items in a dimension that will execute the kernel function. The total number of global work-items is computed as globalWorkSizes[0] * ... * globalWorkSizes[globalWorkSizes.length - 1]. * @param localWorkSizes Each element describes the number of work-items that make up a work-group (also referred to as the size of the work-group) that will execute the kernel specified by kernel. The total number of work-items in a work-group is computed as localWorkSizes[0] * ... * localWorkSizes[localWorkSizes.length - 1]. The total number of work-items in the work-group must be less than or equal to the CL_DEVICE_MAX_WORK_GROUP_SIZE value specified in table 4.3 and the number of work- items specified in localWorkSizes[0], ... localWorkSizes[localWorkSizes.length - 1] must be less than or equal to the corresponding values specified by CLDevice.getMaxWorkItemSizes()[dimensionIndex]. The explicitly specified localWorkSize will be used to determine how to break the global work-items specified by global_work_size into appropriate work-group instances. If localWorkSize is specified, the values specified in globalWorkSize[dimensionIndex] must be evenly divisible by the corresponding values specified in localWorkSize[dimensionIndex]. This parameter can be left null, in which case the OpenCL implementation will choose good values. * @param queue This kernel will be queued for execution on the device associated with that queue. * @param eventsToWaitFor Events that need to complete before this particular command can be executed. Special value {@link CLEvent#FIRE_AND_FORGET} can be used to avoid returning a CLEvent. * @return Event object that identifies this command and can be used to query or queue a wait for the command to complete, or null if eventsToWaitFor contains {@link CLEvent#FIRE_AND_FORGET}. */ public CLEvent enqueueNDRange(CLQueue queue /*, int[] globalOffsets*/, int[] globalWorkSizes, int[] localWorkSizes, CLEvent... eventsToWaitFor) { return enqueueNDRange(queue, null, globalWorkSizes, localWorkSizes, eventsToWaitFor); }
kernel.setArg(0, clInBuff); kernel.setArg(1, clOutBuff); CLEvent completion = kernel.enqueueNDRange(q, new int[] {size}); completion.waitFor();
/** * Calls <a href="http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateKernel.html">clCreateKernel</a>.<br> * Find a kernel by its functionName, and optionally bind some arguments to it. */ public CLKernel createKernel(String name, Object... args) throws CLBuildException { synchronized (this) { if (!built) build(); } ReusablePointers ptrs = ReusablePointers.get(); Pointer<Integer> pErr = ptrs.pErr; Pointer<Byte> pName = pointerToCString(name); long kernel; int previousAttempts = 0; do { kernel = CL.clCreateKernel(getEntity(), getPeer(pName), getPeer(pErr)); } while (failedForLackOfMemory(pErr.getInt(), previousAttempts++)); Pointer.release(pName); CLKernel kn = new CLKernel(this, name, kernel); if (args.length != 0) kn.setArgs(args); return kn; } }
Runnable setWithSetArgs = new Runnable() { public void run() { kernel.setArgs(a, b, (short)1, 1, (byte)1, 1.0f); }}; Runnable setWithSpecializedSetArg = new Runnable() { public void run() {
@Test public void nullArg() { CLBuffer<Byte> out = context.createByteBuffer(Usage.InputOutput, 2) ; CLKernel isInputNull = context.createProgram( "kernel void isInputNull(global int* in, global bool* out) {\n" + "*out = !in;\n" + "}" ).createKernel("isInputNull"); isInputNull.setArgs(CLKernel.NULL_POINTER_KERNEL_ARGUMENT, out); assertTrue(out.read(queue, isInputNull.enqueueTask(queue)).as(Boolean.class).get()); isInputNull.setArgs(out, out); assertFalse(out.read(queue, isInputNull.enqueueTask(queue)).as(Boolean.class).get()); }
/** * Return the kernel function name. */ @InfoName("CL_KERNEL_FUNCTION_NAME") public String getFunctionName() { if (name == null) name = infos.getString(getEntity(), CL_KERNEL_FUNCTION_NAME); return name; } }
void release() { for (CLKernel kernel : kernels.values()) { kernel.release(); } clProgram.release(); } }
final CLBuffer<Integer> b=context.createBuffer(CLMem.Usage.Output, Integer.class, 4); int nArgs = kernel.getNumArgs(); private final long aPeer = a.getEntity(); private final long bPeer = b.getEntity(); private final long kEntity = kernel.getEntity(); private final Pointer<?> tmp = allocateBytes(8);//.withoutValidityInformation(); private final ByteBuffer tmpBuf = tmp.getByteBuffer();
ProgramImpl(String opencl) { clProgram = clContext.createProgram(PRAGMA_DOUBLE_SUPPORT + opencl); CLKernel[] clKernels = clProgram.createKernels(); for (CLKernel clKernel : clKernels) { kernels.put(clKernel.getFunctionName(), clKernel); } }
/** * Calls <a href="http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueNDRangeKernel.html">clEnqueueNDRangeKernel</a>.<br> * Enqueues a command to execute a kernel on a device, using local work sizes chosen by the OpenCL implementation. * See {@link CLKernel#enqueueNDRange(CLQueue, int[], int[], int[], CLEvent[])} * @param globalWorkSizes Each element describes the number of global work-items in a dimension that will execute the kernel function. The total number of global work-items is computed as globalWorkSizes[0] * ... * globalWorkSizes[globalWorkSizes.length - 1]. * @param queue This kernel will be queued for execution on the device associated with that queue. * @param eventsToWaitFor Events that need to complete before this particular command can be executed. Special value {@link CLEvent#FIRE_AND_FORGET} can be used to avoid returning a CLEvent. * @return Event object that identifies this command and can be used to query or queue a wait for the command to complete, or null if eventsToWaitFor contains {@link CLEvent#FIRE_AND_FORGET}. */ public CLEvent enqueueNDRange(CLQueue queue /*, int[] globalOffsets*/, int[] globalWorkSizes, CLEvent... eventsToWaitFor) { return enqueueNDRange(queue, null, globalWorkSizes, null, eventsToWaitFor); }
/** * Calls <a href="http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateKernel.html">clCreateKernel</a>.<br> * Find a kernel by its functionName, and optionally bind some arguments to it. */ public CLKernel createKernel(String name, Object... args) throws CLBuildException { synchronized (this) { if (!built) build(); } ReusablePointers ptrs = ReusablePointers.get(); Pointer<Integer> pErr = ptrs.pErr; Pointer<Byte> pName = pointerToCString(name); long kernel; int previousAttempts = 0; do { kernel = CL.clCreateKernel(getEntity(), getPeer(pName), getPeer(pErr)); } while (failedForLackOfMemory(pErr.getInt(), previousAttempts++)); Pointer.release(pName); CLKernel kn = new CLKernel(this, name, kernel); if (args.length != 0) kn.setArgs(args); return kn; } }
/** * Return the number of arguments to kernel. */ @InfoName("CL_KERNEL_NUM_ARGS") public int getNumArgs() { int numArgs = infos.getInt(getEntity(), CL_KERNEL_NUM_ARGS); //System.out.println("numArgs = " + numArgs); return numArgs; }
public String toString() { return getFunctionName() + " {args: " + getNumArgs() + "}";//, workGroupSize = " + getWorkGroupSize() + ", localMemSize = " + getLocalMemSize() + "}"; }
@Test public void sweatTest() { long tot = 0; for (boolean cached : new boolean[] { false, true }) { for (int time = 0; time < 100; time++) { CLContext context = JavaCL.createBestContext(CLPlatform.DeviceFeature.GPU); CLQueue queue = context.createDefaultQueue(); CLProgram program = context.createProgram("kernel void f(global int* a) { a[0] = 1; }"); program.setCached(cached); program.build(); CLKernel kernel = program.createKernel("f"); kernel.release(); program.release(); queue.release(); context.release(); System.gc(); } } System.out.println(tot); } }