/** * This method returns set of available devices * @return */ @Override public Set<Integer> getAvailableDevices() { return new HashSet<>(configuration.getAvailableDevices()); }
/** * This method returns set of available devices * @return */ @Override public Set<Integer> getAvailableDevices() { return new HashSet<>(configuration.getAvailableDevices()); }
/** * This method pairs specified thread & device * * @param threadId * @param deviceId */ @Override public void attachThreadToDevice(long threadId, Integer deviceId) { List<Integer> devices = new ArrayList<>(CudaEnvironment.getInstance().getConfiguration().getAvailableDevices()); logger.trace("Manually mapping thread [{}] to device [{}], out of [{}] devices...", threadId, deviceId, devices.size()); affinityMap.put(threadId, deviceId); }
/** * This method pairs specified thread & device * * @param threadId * @param deviceId */ @Override public void attachThreadToDevice(long threadId, Integer deviceId) { List<Integer> devices = new ArrayList<>(CudaEnvironment.getInstance().getConfiguration().getAvailableDevices()); logger.debug("Manually mapping thread [{}] to device [{}], out of [{}] devices...", threadId, deviceId, devices.size()); affinityMap.put(threadId, deviceId); }
/** * This method returns total amount of memory allocated within system * * @return */ @Override public Table<AllocationStatus, Integer, Long> getAllocationStatistics() { Table<AllocationStatus, Integer, Long> table = HashBasedTable.create(); table.put(AllocationStatus.HOST, 0, zeroUseCounter.get()); for (Integer deviceId : configuration.getAvailableDevices()) { table.put(AllocationStatus.DEVICE, deviceId, getAllocatedDeviceMemory(deviceId)); } return table; }
/** * This method returns total amount of memory allocated within system * * @return */ @Override public Table<AllocationStatus, Integer, Long> getAllocationStatistics() { Table<AllocationStatus, Integer, Long> table = HashBasedTable.create(); table.put(AllocationStatus.HOST, 0, zeroUseCounter.get()); for (Integer deviceId : configuration.getAvailableDevices()) { table.put(AllocationStatus.DEVICE, deviceId, getAllocatedDeviceMemory(deviceId)); } return table; }
public static void nd4jTest() { System.out.println( "Device count:" + CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size()); ThreadLocalRandom rand = ThreadLocalRandom.current(); double[][] data = new double[100000][100]; for (int i = 0; i < data.length; i++) { for (int j = 0; j < data[0].length; j++) { data[i][j] = rand.nextDouble(); } } long ts = System.currentTimeMillis(); INDArray ary = Nd4j.create(data); for (int i = 0; i < 100; i++) { ary.transpose(); } ts = System.currentTimeMillis() - ts; System.out.println(ts + " ms for 100 iterations"); }
/** * This method returns device id available. Round-robin balancing used here. * * @param threadId this parameter can be anything, it's used for logging only. * @return */ protected Integer getNextDevice(long threadId) { Integer device = null; if (!CudaEnvironment.getInstance().getConfiguration().isForcedSingleGPU() && getNumberOfDevices() > 0) { // simple round-robin here synchronized (this) { device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(devPtr.getAndIncrement()); // We check only for number of entries here, not their actual values if (devPtr.get() >= CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size()) devPtr.set(0); logger.debug("Mapping thread [{}] to device [{}], out of [{}] devices...", threadId, device, CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size()); } } else { device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(0); logger.debug("Single device is forced, mapping to device [{}]", device); } return device; }
/** * This method returns device id available. Round-robin balancing used here. * * @param threadId this parameter can be anything, it's used for logging only. * @return */ protected Integer getNextDevice(long threadId) { Integer device = null; if (!CudaEnvironment.getInstance().getConfiguration().isForcedSingleGPU() && getNumberOfDevices() > 0) { // simple round-robin here synchronized (this) { device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(devPtr.getAndIncrement()); // We check only for number of entries here, not their actual values if (devPtr.get() >= CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size()) devPtr.set(0); logger.debug("Mapping thread [{}] to device [{}], out of [{}] devices...", threadId, device, CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size()); } } else { device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(0); logger.debug("Single device is forced, mapping to device [{}]", device); } return device; }
protected synchronized void fillPoolWithResources(int numResources, boolean restoreDevice) { List<Integer> devices = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices(); int cDevice = 0; if (restoreDevice) { cDevice = AtomicAllocator.getInstance().getDeviceId(); } NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps(); for (Integer device : devices) { nativeOps.setDevice(new CudaPointer(device)); pool.put(device, new LinkedBlockingQueue<CudaContext>()); cublasHandle_t handle = createNewCublasHandle(); cusolverDnHandle_t solverHandle = createNewSolverHandle(); for (int cnt = 0; cnt < numResources; cnt++) { CudaContext context = createNewStream(device); context.initOldStream(); getDeviceBuffers(context, device); context.setHandle(handle); context.setSolverHandle(solverHandle); context.syncOldStream(); pool.get(device).add(context); } } if (restoreDevice) { nativeOps.setDevice(new CudaPointer(cDevice)); } }
protected synchronized void fillPoolWithResources(int numResources, boolean restoreDevice) { List<Integer> devices = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices(); int cDevice = 0; if (restoreDevice) { cDevice = AtomicAllocator.getInstance().getDeviceId(); } NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps(); for (Integer device : devices) { nativeOps.setDevice(new CudaPointer(device)); pool.put(device, new LinkedBlockingQueue<CudaContext>()); cublasHandle_t handle = createNewCublasHandle(); cusolverDnHandle_t solverHandle = createNewSolverHandle(); for (int cnt = 0; cnt < numResources; cnt++) { CudaContext context = createNewStream(device); context.initOldStream(); getDeviceBuffers(context, device); context.setHandle(handle); context.setSolverHandle(solverHandle); context.syncOldStream(); pool.get(device).add(context); } } if (restoreDevice) { nativeOps.setDevice(new CudaPointer(cDevice)); } }