com.marklogic.client.datamovement.impl.WriteBatcherImpl java code examples

@Override
public WriteBatcher add(String uri, AbstractWriteHandle contentHandle) {
 add(uri, null, contentHandle);
 return this;
}

@Override
public WriteBatcher addAs(String uri, Object content) {
 return addAs(uri, null, content);
}

@Override
public boolean awaitCompletion() {
 try {
  return awaitCompletion(Long.MAX_VALUE, TimeUnit.DAYS);
 } catch(InterruptedException e) {
  logger.debug("awaitCompletion caught InterruptedException");
  return false;
 }
}

private void flush(boolean waitForCompletion) {
 requireInitialized();
 requireNotStopped();
 boolean forceNewTransaction = true;
 for ( int i=0; iter.hasNext(); i++ ) {
  if ( isStopped() == true ) {
   logger.warn("Job is now stopped, preventing the flush of {} queued docs", docs.size() - i);
   if ( waitForCompletion == true ) awaitCompletion();
   return;
  BatchWriteSet writeSet = newBatchWriteSet(forceNewTransaction);
  int j=0;
  for ( ; j < getBatchSize() && iter.hasNext(); j++ ) {
   DocumentWriteOperation doc = iter.next();
   writeSet.getWriteSet().add(doc);
 if ( waitForCompletion == true ) awaitCompletion();
  Runnable cleanupTransactions = () -> {
   cleanupUnfinishedTransactions();
    while ( (transactionInfo = host.getTransactionInfoAndDrainPermits()) != null ) {
     TransactionInfo transactionInfoCopy = transactionInfo;
     completeTransaction(transactionInfoCopy);

public void initialize() {
 if ( initialized == true ) return;
 synchronized(this) {
  if ( initialized == true ) return;
  if ( getBatchSize() <= 0 ) {
   withBatchSize(1);
   logger.warn("batchSize should be 1 or greater--setting batchSize to 1");
  }
  if ( transactionSize > 1 ) usingTransactions = true;
  // if threadCount is negative or 0, use one thread per host
  if ( getThreadCount() <= 0 ) {
   withThreadCount( hostInfos.length );
   logger.warn("threadCount should be 1 or greater--setting threadCount to number of hosts ({})", hostInfos.length);
  }
  // create a thread pool where threads are kept alive for up to one minute of inactivity,
  // max queue size is threadCount * 3, and callers run tasks past the max queue size
  threadPool = new CompletableThreadPoolExecutor(getThreadCount(), getThreadCount(), 1, TimeUnit.MINUTES,
   new LinkedBlockingQueue<Runnable>(getThreadCount() * 3));
  threadPool.allowCoreThreadTimeOut(true);
  initialized = true;
  logger.info("threadCount={}", getThreadCount());
  logger.info("batchSize={}", getBatchSize());
  if ( usingTransactions == true ) logger.info("transactionSize={}", transactionSize);
  jobStartTime = Calendar.getInstance();
  started.set(true);
 }
}

@Override
public WriteBatcher add(DocumentWriteOperation writeOperation) {
 if ( writeOperation.getUri() == null ) throw new IllegalArgumentException("uri must not be null");
 if ( writeOperation.getContent() == null ) throw new IllegalArgumentException("contentHandle must not be null");
 initialize();
 requireNotStopped();
 queue.add(writeOperation);
 logger.trace("add uri={}", writeOperation.getUri());
 // if we have queued batchSize, it's time to flush a batch
 long recordNum = batchCounter.incrementAndGet();
 boolean timeToWriteBatch = (recordNum % getBatchSize()) == 0;
 if ( timeToWriteBatch ) {
  BatchWriteSet writeSet = newBatchWriteSet(false);
  int i=0;
  for ( ; i < getBatchSize(); i++ ) {
   DocumentWriteOperation doc = queue.poll();
   if ( doc != null ) {
    writeSet.getWriteSet().add(doc);
   } else {
    // strange, there should have been a full batch of docs in the queue...
    break;
   }
  }
  if ( writeSet.getWriteSet().size() > 0 ) {
   threadPool.submit( new BatchWriter(writeSet) );
  }
 }
 return this;
}

@Override
public WriteBatcher newWriteBatcher() {
 WriteBatcherImpl batcher = new WriteBatcherImpl(this, getForestConfig());
 batcher.onBatchFailure(new HostAvailabilityListener(this));
 WriteJobReportListener writeJobListener = new WriteJobReportListener();
 batcher.onBatchFailure(writeJobListener);
 batcher.onBatchFailure(new NoResponseListener(this));
 batcher.onBatchSuccess(writeJobListener);
 return batcher;
}

 Forest forest = hosts.get(host);
 newHostInfos[i].client = getMoveMgr().getForestClient(forest);
 if (getMoveMgr().getConnectionType() == DatabaseClient.ConnectionType.DIRECT) {
  logger.info("Adding DatabaseClient on port {} for host \"{}\" to the rotation",
     newHostInfos[i].client.getPort(), host);
DataMovementManagerImpl moveMgrImpl = getMoveMgr();
String primaryHost = moveMgrImpl.getPrimaryClient().getHost();
if ( removedHostInfos.containsKey(primaryHost) ) {
   BatchWriteSet writeSet = newBatchWriteSet(forceNewTransaction, writerTask.writeSet.getBatchNumber());
   writeSet.onFailure(throwable -> {
    if ( throwable instanceof RuntimeException ) throw (RuntimeException) throwable;
 cleanupUnfinishedTransactions(removedHostInfo);

private void closeAllListeners() {
 for (WriteBatchListener listener : getBatchSuccessListeners()) {
  if ( listener instanceof AutoCloseable ) {
   try {
    ((AutoCloseable) listener).close();
   } catch (Exception e) {
    logger.error("onBatchSuccess listener cannot be closed", e);
   }
  }
 }
 for (WriteFailureListener listener : getBatchFailureListeners()) {
  if ( listener instanceof AutoCloseable ) {
   try {
    ((AutoCloseable) listener).close();
   } catch (Exception e) {
    logger.error("onBatchFailure listener cannot be closed", e);
   }
  }
 }
}

@Override
public void flushAndWait() {
 flush(true);
}

private void cleanupUnfinishedTransactions() {
 for ( HostInfo host : hostInfos ) {
  cleanupUnfinishedTransactions(host);
 }
}

public void stop() {
 jobEndTime = Calendar.getInstance();
 stopped.set(true);
 if ( threadPool != null ) threadPool.shutdownNow();
 closeAllListeners();
}

private void cleanupUnfinishedTransactions(HostInfo host) {
 Iterator<TransactionInfo> iterator = host.unfinishedTransactions.iterator();
 while ( iterator.hasNext() ) {
  TransactionInfo transactionInfo = iterator.next();
  if ( transactionInfo.alive.get() == false ) {
   iterator.remove();
  } else if ( transactionInfo.queuedForCleanup.get() == true ) {
   // skip this one, it's already queued
  } else {
   if ( transactionInfo.inProcess.get() <= 0 ) {
    if ( transactionInfo.written.get() == true ) {
     transactionInfo.queuedForCleanup.set(true);
     threadPool.submit( () -> {
      if ( completeTransaction(transactionInfo) ) {
       host.unfinishedTransactions.remove(transactionInfo);
      } else {
       // let's try again next cleanup
       transactionInfo.queuedForCleanup.set(false);
      }
     });
    } else {
     iterator.remove();
    }
   }
  }
 }
}

@Override
public void flushAsync() {
 flush(false);
}

transactionInfo.inProcess.incrementAndGet();
host.addTransactionInfo(transactionInfo);
cleanupUnfinishedTransactions();
return transactionInfo;

@Override
public WriteBatcher add(String uri, DocumentMetadataWriteHandle metadataHandle, AbstractWriteHandle contentHandle) {
 add(new DocumentWriteOperationImpl(OperationType.DOCUMENT_WRITE, uri, metadataHandle, contentHandle));
 return this;
}

@Override
public WriteBatcher addAs(String uri, DocumentMetadataWriteHandle metadataHandle,
             Object content) {
 if (content == null) throw new IllegalArgumentException("content must not be null");
 AbstractWriteHandle handle;
 Class<?> as = content.getClass();
 if (AbstractWriteHandle.class.isAssignableFrom(as)) {
  handle = (AbstractWriteHandle) content;
 } else {
  ContentHandle<?> contentHandle = DatabaseClientFactory.getHandleRegistry().makeHandle(as);
  Utilities.setHandleContent(contentHandle, content);
  handle = contentHandle;
 }
 return add(uri, metadataHandle, handle);
}

@Override
public WriteBatcher add(WriteEvent... docs) {
 for ( WriteEvent doc : docs ) {
  add( doc.getTargetUri(), doc.getMetadata(), doc.getContent() );
 }
 return this;
}

Javadoc

The implementation of WriteBatcher. Features - multiple threads can concurrently call add/addAs - we don't manage these threads, they're outside this - no synchronization or unnecessary delays while queueing - won't launch extra threads until a batch is ready to write - (warning) we don't proactively read streams, so don't leave them in the queue too long - topology-aware by calling /v1/forestinfo - get list of hosts which have writeable forests - each write hits the next writeable host for round-robin network calls - manage an internal threadPool of size threadCount for network calls - when batchSize reached, writes a batch - using a thread from threadPool - no synchronization or unnecessary delays while emptying queue - and calls each successListener (if not using transactions) - if usingTransactions (transactionSize > 1) - opens transactions as needed - using a thread from threadPool - but not before, lest we increase likelihood of transaction timeout - threads needing a transaction will open one then make it available to others up to transactionSize - after each batch write, check if transactionSize reached and if so commit the transaction - don't check before write to avoid race condition where the last batch writes and commits before the second to last batch writes - don't commit if another thread is in process with the transaction - instead queue the transaction for commit later - if commit is successful call each successListener for each transaction batch - when a batch fails, calls each failureListener - and calls rollback (if using transactions) - using a thread from threadPool - then calls each failureListener for each transaction batch - flush() writes all queued documents whether the last batch is full or not - and commits the transaction for each batch so nothing is left uncommitted (ignores transactionSize) - and resets counter so the next batch will be a normal batch size - and finishes any unfinished transactions - those without error are committed - those with error are made to rollback - awaitCompletion allows the calling thread to block until all tasks queued to that point are finished writing batches or committing transactions (or calling rollback) Design - think asynchronously - so that many external threads and many internal threads can be constantly updating state without creating conflict - avoid race conditions and logic which depends on state remaining unchanged from one statement to the next - when triggering periodic processing such as writing a batch, opening a transaction, or choosing the next host to use - use logic where multiple concurrent threads can arrive at the same point and see the same state yet only one of the threads will perform the processing - do this by using AtomicLong.incrementAndGet() so each thread gets a different number, then trigger the logic with the thread that gets the correct number - for example, we decide to write a batch by timeToWriteBatch = (recordNum % getBatchSize()) == 0; - in other words, when we reach a recordNum which is a multiple of getBatchSize - only one thread will get the correct number and that thread will have timeToWriteBatch == true - we don't reset recordNum at each batch as that would introduce a race condition - however, when flush is called we want subsequent batches to start over, so in that case we reset recordNum to 0 - use classes from java.util.concurrent and java.util.concurrent.atomic - so external threads don't block when calling add/addAs - so internal state doesn't get confused by race conditions - avoid deadlock - don't ask threads to block - use non-blocking queues where possible - we use a blocking queue for the thread pool since that's required and it makes sense for threads to block while awaiting more tasks - we use a blocking queue for the DocumentWriteOperation main queue just so we can have the atomic drainTo method used by flush. But LinkedBlockingQueue is unbounded so nothing should block on put() and we use poll() to get things so we don't block there either. - we only use one synchronized block inside initialize() to ensure it only runs once - after the first call is complete, calls to initialize() won't hit the synchronized block - try to do what's expected - try to write documents in the order they are sent to add/addAs - accepting that asynchronous threads will proceed unpredictably - for example, thread A might start before thread B and perform less work, but thread B might still complete first - try to match batch sizes to batchSize - except when flush is called, then immediately write all queued docs - try to match number of batches in each transaction to transactionSize - except when any batch fails, then stop writing to that transaction - except when flush is called, then commit all open transactions - when awaitCompletion is called, block until existing tasks are complete but ignore any tasks added after awaitCompletion is called - for more on the design of awaitCompletion, see comments above CompletableThreadPoolExecutor and CompletableRejectedExecutionHandler - track - one queue of DocumentWriteOperation - batchCounter to decide if it's time to write a batch - flush resets this so after flush batch sizes will be normal - batchNumber to decide which host to use next (round-robin) - initialized to ensure configuration doesn't change after add/addAs are called - threadPool of threadCount size for most calls to the server - not calls during forestinfo or flush - each host - host name - client (contains http connection pool) - auth challenge once per client - number of batches - used to kick off a transaction each time we hit transactionSize - current transactions (transactionInfos object) - with batches already written - unfinishedTransactions - ready to commit or rollback, but waiting for all threads to stop processing it first - each transaction - host - inProcess == true if any thread is currently working in the transaction - transactionPermits track how many more batches can use the transaction - batchesFinished tracks number of batches written (after they're done) - so we can commit only after batchesFinished = transactionSize - written == true if any batches have started writing with this transaction - so we won't commit or rollback an unwritten transaction - throwable if an error occured but rollback couldn't be called immediately because another thread was still processing - alive = false if the transaction has been finished (commit / rollback) - queuedForCleanup tracks if the transaction is now in unfinishedTransactions - any batches waiting for finish (commit/rollback) before calling successListeners or failureListeners - each task (Runnable) in the thread pool task queue - so we can know which tasks to monitor when awaitCompletion is called - we remove each task when it's complete - for more details, see comments above CompletableThreadPoolExecutor and CompletableRejectedExecutionHandler Known issues - does not guarantee minimal batch loss on transaction failure - if two batches attempt to write at the same time and one fails, the other will be part of the rollback whether it fails or not - however, any subsequent batches that attempt to write will be in a new transaction

Most used methods

Popular in Java

Making http post requests using okhttp
getContentResolver (Context)
addToBackStack (FragmentTransaction)
setContentView (Activity)
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
JCheckBox (javax.swing)
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Top Sublime Text plugins

How to useWriteBatcherImpl in com.marklogic.client.datamovement.impl

Best Java code snippets using com.marklogic.client.datamovement.impl.WriteBatcherImpl (Showing top 18 results out of 315)

How to use
WriteBatcherImpl
in
com.marklogic.client.datamovement.impl