private Bson referenceFilter(String reference) { return eq(IMongoSerializer.FIELD_REFERENCE, StringUtil.truncateWithHash(reference, 1024, "!")); }
private boolean isActionValid (CommandLine cmd) { String action = cmd.getOptionValue(ARG_ACTION); if (ARG_ACTION_CHECKCONFIG.equals(action)) { System.err.println("-action checkcfg is deprecated, " + "use -k or --checkcfg instead."); return false; } return EqualsUtil.equalsAny(action, ARG_ACTION_START, ARG_ACTION_RESUME, ARG_ACTION_STOP); }
@Override public void run() { monitoring = true; while(monitoring) { if (stopFile.exists()) { stopMonitoring(); stopSuite(); } Sleeper.sleepSeconds(1); } }
/** * Commits documents and delete them from queue when done. * @param batch the bath of operations to commit. */ private void commitAndCleanBatch(List<ICommitOperation> batch) { int numTries = 0; boolean success = false; while (!success) { try { commitBatch(batch); success = true; } catch (Exception e) { if (numTries < maxRetries) { LOG.error("Could not commit batched operations.", e); Sleeper.sleepMillis(maxRetryWait); numTries++; } else { throw (RuntimeException) e; } } } // Delete queued documents after commit for (ICommitOperation op : batch) { op.delete(); } batch.clear(); }
@Override public Document toDocument(Stage stage, ICrawlData crawlData) { Document doc = new Document(); // "reference" is a Mongo indexed field, which is limited to 1024. // So if too long we truncate it, trying to keep it unique, // while storing the original in a separate field. String ref = StringUtil.truncateWithHash( crawlData.getReference(), 1024, "!"); doc.put(FIELD_REFERENCE, ref); if (!Objects.equals(ref, crawlData.getReference())) { doc.put(FIELD_REFERENCE_EXCESSIVE, crawlData.getReference()); } doc.put(FIELD_PARENT_ROOT_REFERENCE, crawlData.getParentRootReference()); doc.put(FIELD_IS_ROOT_PARENT_REFERENCE, crawlData.isRootParentReference()); doc.put(FIELD_CRAWL_STATE, crawlData.getState().toString()); doc.put(FIELD_META_CHECKSUM, crawlData.getMetaChecksum()); doc.put(FIELD_CONTENT_CHECKSUM, crawlData.getContentChecksum()); doc.put(FIELD_IS_VALID, crawlData.getState().isGoodState()); doc.put(FIELD_STAGE, stage.toString()); if (crawlData.getContentType() != null) { doc.put(FIELD_CONTENT_TYPE, crawlData.getContentType().toString()); } doc.put(FIELD_CRAWL_DATE, crawlData.getCrawlDate()); return doc; }
private void toString(StringBuilder b, String jobId, int depth) { IJobStatus status = getJobStatus(jobId); b.append(StringUtils.repeat(' ', depth * TO_STRING_INDENT)); b.append(StringUtils.leftPad(new PercentFormatter().format( status.getProgress()), TO_STRING_INDENT)); b.append(" ").append(status.getJobId()); b.append(System.lineSeparator()); for (IJobStatus child : getChildren(jobId)) { toString(b, child.getJobId(), depth + 1); } } }
private synchronized File createFile(String suffix) throws IOException { // Create date directory File dateDir = new File(directory, DateFormatUtils.format( System.currentTimeMillis(), "yyyy/MM-dd/hh/mm/ss")); if (!dateDir.exists()) { try { FileUtils.forceMkdir(dateDir); } catch (IOException e) { throw new CommitterException( "Cannot create commit directory: " + dateDir, e); } } // Create file return new File(dateDir, TimeIdGenerator.next() + suffix); }
@Override public boolean equals(final Object other) { if (!(other instanceof ExternalTransformer)) { return false; } ExternalTransformer castOther = (ExternalTransformer) other; return new EqualsBuilder() .appendSuper(super.equals(castOther)) .append(getCommand(), castOther.getCommand()) .append(getTempDir(), castOther.getTempDir()) .append(patterns, castOther.patterns) .append(metadataInputFormat, castOther.metadataInputFormat) .append(metadataOutputFormat, castOther.metadataOutputFormat) .isEquals() && EqualsUtil.equalsMap(getEnvironmentVariables(), castOther.getEnvironmentVariables()); } @Override
+ attemptCount + " of " + maxRerunAttempts + ").", exception); Sleeper.sleepMillis(sleepTime);
private String truncate(String value) { if (value == null || value.length() <= maxLength) { return value; } if (isAppendHash()) { return StringUtil.truncateWithHash( value, getMaxLength(), getSuffix()); } if (StringUtils.isNotEmpty(getSuffix())) { return StringUtils.abbreviate(value, getSuffix(), getMaxLength()); } return StringUtils.truncate(value, getMaxLength()); }
@Override public void tagApplicableDocument( String reference, InputStream document, ImporterMetadata metadata, boolean parsed) throws ImporterHandlerException { for (String fieldName : fieldCases.keySet()) { CaseChangeDetails d = fieldCases.get(fieldName); boolean validApplyTo = false; String newField = fieldName; // Do field if (EqualsUtil.equalsAny(d.applyTo, APPLY_FIELD, APPLY_BOTH)) { newField = changeFieldCase(fieldName, d, metadata); validApplyTo = true; } // Do values if (StringUtils.isBlank(d.applyTo) || EqualsUtil.equalsAny( d.applyTo, APPLY_VALUE, APPLY_BOTH)) { changeValuesCase(newField, d, metadata); validApplyTo = true; } if (!validApplyTo) { LOG.warn("Unsupported \"applyTo\": " + d.applyTo); } } }
private void stopJob(final IJob job, final IJobStatus status) { ((MutableJobStatus) status).setStopRequested(true); job.stop(status, suite); while (status.isRunning()) { Sleeper.sleepSeconds(STOP_WAIT_DELAY); } if (status.getState() == JobState.STOPPED) { for (IJobLifeCycleListener l : suite.getJobLifeCycleListeners()) { l.jobStopped(status); } if (job.getId().equals(suite.getRootJob().getId())) { for (ISuiteLifeCycleListener l : suite.getSuiteLifeCycleListeners()) { l.suiteStopped(suite); } } } }
@Override public void run() { try { while(!terminate) { for (MutableJobStatus status : statuses) { status.setLastActivity( new Date(suite.getJobStatusStore().touch( suite.getId(), status.getJobId()))); } Sleeper.sleepMillis(HEARTBEAT_INTERVAL); } } catch (IOException e) { throw new JEFException("Cannot update status heartbeat.", e); } }
return false; Sleeper.sleepMillis(MINIMUM_DELAY);