private boolean shouldSkipPolicyBecauseAlreadyRunWithDiagnostics(com.github.cafdataprocessing.corepolicy.common.Document doc, Long policyId) { if (doc.getMetadata().containsKey(PolicyWorkerConstants.POLICYWORKER_FAILURE_POLICY + policyId)) { return true; } return false; }
public static void reapplyTemporaryWorkingData(com.github.cafdataprocessing.corepolicy.common.Document document, Multimap<String, String> temporaryData ) { Multimap<String, String> docData = document.getMetadata(); for( String propName : temporaryData.keySet() ) { docData.putAll( propName, temporaryData.get(propName)); } }
private String getStorageReference(Document document) { Collection<String> storageReferences = document.getMetadata().get("storageReference"); if (storageReferences.size() != 1) { throw new RuntimeException("No storageReference set"); } return storageReferences.stream().findAny().get(); } }
private static Collection<Long> getPoliciesYetToBeExecuted(com.github.cafdataprocessing.corepolicy.common.Document document, Long collectionSequenceId, Collection<Long> resolvedPolicies) { Collection<Long> policiesExecutedAlready = DocumentFields.getPoliciesAlreadyExecutedForStringMap(collectionSequenceId, document.getMetadata()); Collection<Long> policiesYetToBeExecuted = new ArrayList<>(); policiesYetToBeExecuted.addAll(resolvedPolicies); policiesYetToBeExecuted.removeAll(policiesExecutedAlready); return policiesYetToBeExecuted; }
@Override public Long extract(Document document) throws Exception { Optional<String> collectionSequenceIdFieldValue = document.getMetadata().get(DocumentFields.CollectionSequence).stream().findFirst(); if(!collectionSequenceIdFieldValue.isPresent()){ throw new Exception("No " + DocumentFields.CollectionSequence + " field"); } return getCollectionSequenceId(collectionSequenceIdFieldValue.get()); }
public final static List<Long> getCollectionSequencesAlreadyStarted( com.github.cafdataprocessing.corepolicy.common.Document document ){ Multimap<String, String> metadata = document.getMetadata(); // get ids from property = "POLICYWORKER_COLLECTIONSEQUENCE" - mv field of sequenceids. return getIdsFromMetadataPropertyStringMap(metadata, ApiStrings.POLICYWORKER_COLLECTION_SEQUENCE); }
public final static List<Long> getCollectionSequencesAlreadyCompleted( com.github.cafdataprocessing.corepolicy.common.Document document ){ Multimap<String, String> metadata = document.getMetadata(); // get ids from property = "POLICYWORKER_COLLECTIONSEQUENCE_COMPLETED" - mv field of sequenceids. return getIdsFromMetadataPropertyStringMap(metadata, ApiStrings.POLICYWORKER_COLLECTION_SEQUENCES_COMPLETED); }
public static void setReference(Document document, String reference){ setReference(document.getMetadata(), reference); }
/*** * Add the collection sequence id when we have completed executing all resolved policies for its classification. * @param document * @param collectionSequenceId */ public final static void addCollectionSequenceCompletedInfo( com.github.cafdataprocessing.corepolicy.common.Document document, Long collectionSequenceId ) { Multimap<String, String> metadata = document.getMetadata(); // get all completed collection sequences - "POLICYWORKER_COLLECTION_SEQUENCES_COMPLETED" - mv field List<Long> csIds = getIdsFromMetadataPropertyStringMap(metadata, ApiStrings.POLICYWORKER_COLLECTION_SEQUENCES_COMPLETED); // if the specified sequence already exists in this list, just return, we have nothing to do. if ( csIds.contains( collectionSequenceId )) return; // add on the collection sequence id onto the mv list. metadata.put(ApiStrings.POLICYWORKER_COLLECTION_SEQUENCES_COMPLETED, collectionSequenceId.toString() ); }
/** * Removes fields that are added to track working progress through Collection Sequence execution. * @param document The Document to remove temporary working data from. * @return The temporary data that was removed. */ public static Multimap<String, String> removeTemporaryWorkingData(com.github.cafdataprocessing.corepolicy.common.Document document ) { // using same map type as the used inside our document object. Multimap<String, String> temporaryData = ArrayListMultimap.create(); Multimap<String, String> docData = document.getMetadata(); getListOfKnownTemporaryData(docData).stream().filter(propName -> docData.containsKey(propName)).forEach(propName -> { temporaryData.putAll(propName, docData.get(propName)); docData.removeAll(propName); }); return temporaryData; }
/** * This method will REMOVE any existing temp (e.g. validation hash) metadata fields, and add new ones that represent * this metadata. * @param document The document who's metadata should be updated. */ public void applyTemporaryMetadataToDocument( ConditionEngineResult conditionEngineResult, Document document ){ try { //Lets remove all the field values document.getMetadata().removeAll(DocumentFields.EvaluationInformationBlob); document.getMetadata().removeAll(DocumentFields.MetadataHash); //now lets get new values and add them to the document String sourceJson = mapper.writeValueAsString(conditionEngineResult); String compressedBlob = ZipUtils.compressStringAndEncode(sourceJson); String securityHash = generateSecurityHash(conditionEngineResult); document.getMetadata().put(DocumentFields.EvaluationInformationBlob, compressedBlob); document.getMetadata().put(DocumentFields.MetadataHash, securityHash); } catch (IOException e) { CpeException cpeException = new BackEndRequestFailedCpeException(BackEndRequestFailedErrors.UnableToAddTemporaryMetadata,e ); logger.error("Error adding temporary evaluation metadata to a document.", cpeException); throw cpeException; } }
public static String getReference(Document document){ return getReference(document.getMetadata(), !document.getDocuments().isEmpty()); }
private void applyFieldActions(Document document, Collection<FieldAction> fieldActions) { for (FieldAction fieldAction : fieldActions) { FieldAction.Action action = fieldAction.getAction(); // N.B. Weird javac behaviour whereby if a switch is used below 2 ProcessDocument class files are // generated, once called ProcessDocument$1.class and once ProcessDocument.class which stops us from // correctly running javah on this class. if ( action == FieldAction.Action.SET_FIELD_VALUE ){ document.getMetadata().get(fieldAction.getFieldName()).clear(); document.getMetadata().put(fieldAction.getFieldName(), fieldAction.getFieldValue()); } else if ( action == FieldAction.Action.ADD_FIELD_VALUE ) { document.getMetadata().put(fieldAction.getFieldName(), fieldAction.getFieldValue()); } } } }
protected void applyFieldActions(Document document, Collection<FieldAction> fieldActions) { for (FieldAction fieldAction : fieldActions) { FieldAction.Action action = fieldAction.getAction(); // N.B. Weird javac behaviour whereby if a switch is used below 2 ProcessDocument class files are // generated, once called ProcessDocument$1.class and once ProcessDocument.class which stops us from // correctly running javah on this class. if ( action == FieldAction.Action.SET_FIELD_VALUE ){ document.getMetadata().get(fieldAction.getFieldName()).clear(); document.getMetadata().put(fieldAction.getFieldName(), fieldAction.getFieldValue()); } else if ( action == FieldAction.Action.ADD_FIELD_VALUE ) { document.getMetadata().put(fieldAction.getFieldName(), fieldAction.getFieldValue()); } } } }
private void setupPreevaluatedInformation(Document document) { // Check if we have any supplied metadata which has been evaluated before. ConditionEngineResult result = conditionEngineMetadata.createResult(document.getMetadata()); applyEvaluationInfo(result, this); }
private Multimap<String, ReferencedData> getRequestedFieldsData(Collection<String> fieldsRequested, Document document) { //gather source data fields for Extract task Multimap<String, ReferencedData> sourceData = ArrayListMultimap.create(); //add metadata fields to source data (filter to list provided on policy definition if required) for (Map.Entry<String, String> metadata : document.getMetadata().entries()) { String fieldName = metadata.getKey(); if (fieldsRequested.contains(fieldName)) { sourceData.put(fieldName, ReferencedData.getWrappedData(metadata.getValue().getBytes())); } } WorkerResponseHolder workerResponseHolder = applicationContext.getBean(WorkerResponseHolder.class); com.github.cafdataprocessing.worker.policy.shared.Document taskDataDocumentToClassify = workerResponseHolder.getTaskData().getDocument(); //add metadata reference fields to source data (filter to list provided on policy definition if required) for (Map.Entry<String, ReferencedData> metadataReference : taskDataDocumentToClassify.getMetadataReferences().entries()) { String fieldName = metadataReference.getKey(); if (fieldsRequested.contains(fieldName)) { sourceData.put(metadataReference.getKey(), metadataReference.getValue()); } } return sourceData; }
@Override protected ProcessingAction handlePolicy(Document document, Policy policy, Long collectionSequenceId ) { // Apply any tagging information setup by this policy. TagPolicy policy1 = getTagPolicy(policy); // apply any fields to the document. setup by this policy. applyFieldActions(document, policy1.getFieldActions()); Multimap<String, String> metadata = document.getMetadata(); final boolean[] foundTmpFields = {false}; // Now we also need to a tag a field depending on whether or not we can // find any of the temp metadata fields present on the metadata supplied. DocumentFields.getListOfKnownTemporaryData(metadata).stream().filter(propName -> metadata.containsKey(propName)).forEach(propName -> { foundTmpFields[0] = true; }); /* add a field we know about to the document, based on the collection sequence id... */ metadata.put("CheckTempFieldsWorkerFoundMetadata_" + collectionSequenceId, Boolean.toString(foundTmpFields[0])); // we can use the same engine, and don't need to requeue this to happen.. return ProcessingAction.CONTINUE_PROCESSING; }
private Map<String, List<DocumentWorkerFieldValue>> getFieldsData(Set<String> policyDefFields, Document document) { Map<String, List<DocumentWorkerFieldValue>> fieldsMap = new HashMap<>(); WorkerResponseHolder workerResponseHolder = applicationContext.getBean(WorkerResponseHolder.class); com.github.cafdataprocessing.worker.policy.shared.Document taskDataDocumentToClassify = workerResponseHolder.getTaskData().getDocument(); // Add metadata fields to Document Worker Task fields. If 'fields' is specified in the policy definition then only add those fields to the 'fieldsMap' otherwise add all fields. // This method is looping through a Metadata map of String to String, and a Metadata map of String to ReferenceData which is used for binary data and storage references. if (policyDefFields == null || policyDefFields.isEmpty()) { document.getMetadata().entries().stream() .forEach(metadata -> addToWorkerTaskFields(fieldsMap, metadata.getKey(), createWorkerData(metadata.getValue()))); if (taskDataDocumentToClassify != null) { taskDataDocumentToClassify.getMetadataReferences().entries().stream() .forEach(metadataReference -> addToWorkerTaskFields(fieldsMap, metadataReference.getKey(), createWorkerData(metadataReference.getValue()))); } return fieldsMap; } for (String fieldName : policyDefFields) { // Get the predicate for metadata key matching from the filter field name final Predicate<String> doesFieldSpecMatch = getKeyToFilterFieldNameMatchPredicate(fieldName); // If the filter field name matches with the metadata key add the metadata entry to the worker task fields document.getMetadata().entries().stream() .filter(metadata -> doesFieldSpecMatch.test(metadata.getKey())) .forEach(metadata -> addToWorkerTaskFields(fieldsMap, metadata.getKey(), createWorkerData(metadata.getValue()))); if (taskDataDocumentToClassify != null) { taskDataDocumentToClassify.getMetadataReferences().entries().stream() .filter(metadataRef -> doesFieldSpecMatch.test(metadataRef.getKey())) .forEach(metadataReference -> addToWorkerTaskFields(fieldsMap, metadataReference.getKey(), createWorkerData(metadataReference.getValue()))); } } return fieldsMap; }
public DocumentUnderEvaluationImpl(Document document, ConditionEngineMetadata conditionEngineMetadata, ApiProperties apiProperties){ this(conditionEngineMetadata, apiProperties); // we hold onto a list of MetadataValues internally, so convert at this point so we do it only once. for( Map.Entry<String, String> entry : document.getMetadata().entries()) { this.metadata.put( entry.getKey(), new MetadataValue(apiProperties, entry.getValue())); } // we hold onto a list of MetadataValues internally, so convert at this point so we do it only once. for( Map.Entry<String, InputStream> entry : document.getStreams().entries()) { this.streams.put( entry.getKey(), new MetadataValue(apiProperties, entry.getValue())); } this.addMetadataString(DocumentFields.ChildDocumentCount, String.valueOf(document.getDocuments().size())); final int depth = getDepth(); this.documents = document.getDocuments().stream(). map(d -> { DocumentUnderEvaluation documentUnderEvaluation = new DocumentUnderEvaluationImpl(d, conditionEngineMetadata, apiProperties); documentUnderEvaluation.addMetadataString( DocumentFields.ChildDocumentDepth, String.valueOf(depth+1)); return documentUnderEvaluation; }).collect(Collectors.toList()); setupPreevaluatedInformation(document); }
newDocument.setReference(document.getReference()); if (document.getMetadata() != null) { for (Map.Entry<String, String> entry : document.getMetadata().entries()) { newDocument.getMetadata().put(entry.getKey(), entry.getValue());