@Override public boolean awaitCompletion() { try { return awaitCompletion(Long.MAX_VALUE, TimeUnit.DAYS); } catch(InterruptedException e) { return false; } }
public void stop() { stopped.set(true); if ( threadPool != null ) threadPool.shutdownNow(); if(jobEndTime == null) jobEndTime = Calendar.getInstance(); if ( query != null ) { for ( AtomicBoolean isDone : forestIsDone.values() ) { // if even one isn't done, log a warning if ( isDone.get() == false ) { logger.warn("QueryBatcher instance \"{}\" stopped before all results were retrieved", getJobName()); break; } } } else { if ( iterator != null && iterator.hasNext() ) { logger.warn("QueryBatcher instance \"{}\" stopped before all results were processed", getJobName()); } } closeAllListeners(); }
synchronized void start(JobTicket ticket) { if ( threadPool != null ) { logger.warn("startJob called more than once"); return; } if ( getBatchSize() <= 0 ) { withBatchSize(1); logger.warn("batchSize should be 1 or greater--setting batchSize to 1"); } jobTicket = ticket; initialize(); for (QueryBatchListener urisReadyListener : urisReadyListeners) { urisReadyListener.initializeListener(this); } jobStartTime = Calendar.getInstance(); started.set(true); if ( query != null ) { startQuerying(); } else { startIterating(); } }
private synchronized void initialize() { if ( threadCountSet == false ) { if ( query != null ) { Forest[] forests = getForestConfig().listForests(); logger.warn("threadCount not set--defaulting to number of forests ({})", forests.length); withThreadCount(forests.length); } else { int hostCount = clientList.get().size(); logger.warn("threadCount not set--defaulting to number of hosts ({})", hostCount); withThreadCount( hostCount ); } // now we've set the threadCount threadCountSet = true; } // If we are iterating and if we have the thread count to 1, we have a single thread acting as both // consumer and producer of the ThreadPoolExecutor queue. Hence, we produce till the maximum and start // consuming and produce again. Since the thread count is 1, there is no worry about thread utilization. if(getThreadCount() == 1) { isSingleThreaded = true; } logger.info("Starting job batchSize={}, threadCount={}, onUrisReady listeners={}, failure listeners={}", getBatchSize(), getThreadCount(), urisReadyListeners.size(), failureListeners.size()); threadPool = new QueryThreadPoolExecutor(getThreadCount(), this); }
private synchronized void calucluateDeltas(Set<Forest> oldForests, Forest[] forests) { // the forests we haven't known about yet Set<Forest> addedForests = new HashSet<>(); // the forests that we knew about but they were black-listed and are no longer black-listed Set<Forest> restartedForests = new HashSet<>(); // any known forest might now be black-listed Set<Forest> blackListedForests = new HashSet<>(oldForests); for ( Forest forest : forests ) { if ( ! oldForests.contains(forest) ) { // we need to do special handling since we're adding this new forest after we're started addedForests.add(forest); } // if we have blackListedTasks for this forest, let's restart them if ( blackListedTasks.get(forest) != null ) restartedForests.add(forest); // this forest is not black-listed blackListedForests.remove(forest); } if ( blackListedForests.size() > 0 ) { DataMovementManagerImpl moveMgrImpl = getMoveMgr(); String primaryHost = moveMgrImpl.getPrimaryClient().getHost(); if ( getHostNames(blackListedForests).contains(primaryHost) ) { int randomPos = Math.abs(primaryHost.hashCode()) % clientList.get().size(); moveMgrImpl.setPrimaryClient(clientList.get().get(randomPos)); } } cleanupExistingTasks(addedForests, restartedForests, blackListedForests); }
private synchronized void cleanupExistingTasks(Set<Forest> addedForests, Set<Forest> restartedForests, Set<Forest> blackListedForests) { if ( blackListedForests.size() > 0 ) { logger.warn("removing jobs related to hosts [{}] from the queue", getHostNames(blackListedForests)); logger.warn("adding jobs for forests [{}] to the queue", getForestNames(addedForests)); threadPool.execute(new QueryTask(getMoveMgr(), this, forest, query, 1, 1)); logger.warn("re-adding jobs related to forests [{}] to the queue", getForestNames(restartedForests));
private void retry(QueryEvent queryEvent, boolean callFailListeners) { if ( isStopped() == true ) { logger.warn("Job is now stopped, aborting the retry"); return; } Forest retryForest = null; for ( Forest forest : getForestConfig().listForests() ) { if ( forest.equals(queryEvent.getForest()) ) { // while forest and queryEvent.getForest() have equivalent forest id, // we expect forest to have the currently available host info retryForest = forest; break; } } if ( retryForest == null ) { throw new IllegalStateException("Forest for queryEvent (" + queryEvent.getForest().getForestName() + ") is not in current getForestConfig()"); } // we're obviously not done with this forest forestIsDone.get(retryForest).set(false); retryForestMap.get(retryForest).incrementAndGet(); long start = queryEvent.getForestResultsSoFar() + 1; logger.trace("retryForest {} on retryHost {} at start {}", retryForest.getForestName(), retryForest.getPreferredHost(), start); QueryTask runnable = new QueryTask(getMoveMgr(), this, retryForest, query, queryEvent.getForestBatchNumber(), start, queryEvent.getJobBatchNumber(), callFailListeners); runnable.run(); } /*
@Override public synchronized QueryBatcher withForestConfig(ForestConfiguration forestConfig) { super.withForestConfig(forestConfig); Forest[] forests = forestConfig.listForests(); Set<Forest> oldForests = new HashSet<>(forestResults.keySet()); Map<String,Forest> hosts = new HashMap<>(); for ( Forest forest : forests ) { if ( forest.getPreferredHost() == null ) throw new IllegalStateException("Hostname must not be null for any forest"); hosts.put(forest.getPreferredHost(), forest); if ( forestResults.get(forest) == null ) forestResults.put(forest, new AtomicLong()); if ( forestIsDone.get(forest) == null ) forestIsDone.put(forest, new AtomicBoolean(false)); if ( retryForestMap.get(forest) == null ) retryForestMap.put(forest, new AtomicInteger(0)); } logger.info("(withForestConfig) Using forests on {} hosts for \"{}\"", hosts.keySet(), forests[0].getDatabaseName()); List<DatabaseClient> newClientList = new ArrayList<>(); for ( String host : hosts.keySet() ) { Forest forest = hosts.get(host); DatabaseClient client = getMoveMgr().getForestClient(forest); newClientList.add(client); } clientList.set(newClientList); boolean started = (threadPool != null); if ( started == true && oldForests.size() > 0 ) calucluateDeltas(oldForests, forests); return this; }
private synchronized void startQuerying() { boolean consistentSnapshotFirstQueryHasRun = false; for ( Forest forest : getForestConfig().listForests() ) { QueryTask runnable = new QueryTask(getMoveMgr(), this, forest, query, 1, 1); if ( consistentSnapshot == true && consistentSnapshotFirstQueryHasRun == false ) { // let's run this first time in-line so we'll have the serverTimestamp set // before we launch all the parallel threads runnable.run(); consistentSnapshotFirstQueryHasRun = true; } else { threadPool.execute(runnable); } } }
.withClient(client) .withTimestamp(queryStart) .withJobTicket(getJobTicket()) .withForestBatchNumber(forestBatchNum) .withForest(forest); queryMgr.setPageLength(getBatchSize()); UrisHandle handle = new UrisHandle(); if ( consistentSnapshot == true && serverTimestamp.get() > -1 ) { uris.add( uri ); if ( uris.size() == getBatchSize() ) { if ( uris.size() != getBatchSize() ) {
@Override public QueryBatcher newQueryBatcher(Iterator<String> iterator) { if ( iterator == null ) throw new IllegalArgumentException("iterator must not be null"); return newQueryBatcher(new QueryBatcherImpl(iterator, this, getForestConfig())); }
private void launchNextTask() { if ( stopped.get() == true ) { // we're stopping, so don't do anything more return; } AtomicBoolean isDone = forestIsDone.get(forest); // we made it to the end, so don't launch anymore tasks if ( isDone.get() == true ) return; long nextStart = start + getBatchSize(); threadPool.execute(new QueryTask(moveMgr, batcher, forest, query, forestBatchNum + 1, nextStart)); } };
protected void finalize() { if ( stopped.get() == false ) { logger.warn("QueryBatcher instance \"{}\" was never cleanly stopped. You should call dataMovementManager.stopJob.", getJobName()); } }
private QueryBatcher newQueryBatcherImpl(QueryDefinition query) { if ( query == null ) throw new IllegalArgumentException("query must not be null"); return newQueryBatcher(new QueryBatcherImpl(query, this, getForestConfig())); }
try { boolean lastBatch = false; List<String> uriQueue = new ArrayList<>(getBatchSize()); while (iterator.hasNext()) { uriQueue.add(iterator.next()); if(!iterator.hasNext()) lastBatch = true; if (uriQueue.size() == getBatchSize() || !iterator.hasNext()) { final List<String> uris = uriQueue; final boolean finalLastBatch = lastBatch; uriQueue = new ArrayList<>(getBatchSize()); Runnable processBatch = new Runnable() { public void run() {