/** * Writes the workflow tokens modified by this program to the given URL connection. */ private void writeWorkflowToken(@Nullable BasicWorkflowToken workflowToken, HttpURLConnection urlConn) throws IOException { try (Writer writer = new OutputStreamWriter(urlConn.getOutputStream(), Charsets.UTF_8)) { if (workflowToken != null) { GSON.toJson(Maps.transformValues(workflowToken.getAllFromCurrentNode(), Functions.toStringFunction()), TOKEN_TYPE, writer); } } }
/** * Make a deep copy of the {@link WorkflowToken}. * @return copied WorkflowToken */ public synchronized WorkflowToken deepCopy() { return new BasicWorkflowToken(this); }
private void updateWorkflowToken(WorkflowToken workflowToken) throws Exception { ((BasicWorkflowToken) token).mergeToken(workflowToken); }
@Test public void testNonUpdatableWorkflowToken() { BasicWorkflowToken token = new BasicWorkflowToken(0); token.setCurrentNode("node"); try { token.put("a", "b"); Assert.fail("Workflow token update should fail because the token is non-updatable."); } catch (IllegalStateException e) { assertSizeExceededErrorMessage(e); } }
@Test public void testMergeLargeWorkflowToken() { BasicWorkflowToken token1 = new BasicWorkflowToken(1); token1.setCurrentNode("node1"); // total size of token1 after this operation is 512KB token1.put(generateDataInKb(1), generateDataInKb(511)); // add an additional 2 bytes, just so we have size > max size (and not equal to max size) after merge token1.put("k", "v"); BasicWorkflowToken token2 = new BasicWorkflowToken(1); token2.setCurrentNode("node1"); // total size of token2 after this operation is 512KB token2.put(generateDataInKb(1), generateDataInKb(511)); // should succeed, because token1 already contains the NodeValue being merged token1.mergeToken(token2); // set a different node in token2 and add the same data token2.setCurrentNode("node2"); // token2 is at capacity after the following operation token2.put(generateDataInKb(1), generateDataInKb(511)); // merging should now fail, because token1 now does not contain the NodeValue being merged try { token1.mergeToken(token2); Assert.fail("Workflow token merging should fail because token size should have exceeded limit."); } catch (IllegalStateException e) { assertSizeExceededErrorMessage(e); } }
@Override public synchronized void put(String key, Value value) { put(key, value, Scope.USER); }
@SuppressWarnings("unchecked") private Workflow initializeWorkflow() throws Exception { Class<?> clz = Class.forName(workflowSpec.getClassName(), true, program.getClassLoader()); if (!Workflow.class.isAssignableFrom(clz)) { throw new IllegalStateException(String.format("%s is not Workflow.", clz)); } Class<? extends Workflow> workflowClass = (Class<? extends Workflow>) clz; final Workflow workflow = new InstantiatorFactory(false).get(TypeToken.of(workflowClass)).create(); // set metrics Reflections.visit(workflow, workflow.getClass(), new MetricsFieldSetter(workflowContext.getMetrics())); if (!(workflow instanceof ProgramLifecycle)) { return workflow; } final TransactionControl txControl = Transactions.getTransactionControl(workflowContext.getDefaultTxControl(), Workflow.class, workflow, "initialize", WorkflowContext.class); basicWorkflowToken.setCurrentNode(workflowSpec.getName()); workflowContext.setState(new ProgramState(ProgramStatus.INITIALIZING, null)); workflowContext.initializeProgram((ProgramLifecycle) workflow, txControl, false); workflowStateWriter.setWorkflowToken(workflowRunId, basicWorkflowToken); return workflow; }
@Override public synchronized Value get(String key, String nodeName) { return get(key, nodeName, Scope.USER); }
private void put(String key, Value value, Scope scope) { if (!putAllowed) { String msg = String.format("Failed to put key '%s' from node '%s' in the WorkflowToken. Put operation is not " + "allowed from the Mapper and Reducer classes and from Spark executor.", key, nodeName); throw new UnsupportedOperationException(msg); } Preconditions.checkNotNull(key, "Null key cannot be added in the WorkflowToken."); Preconditions.checkNotNull(value, String.format("Null value provided for the key '%s'.", key)); Preconditions.checkNotNull(value.toString(), String.format("Null value provided for the key '%s'.", key)); Preconditions.checkState(nodeName != null, "nodeName cannot be null."); List<NodeValue> nodeValueList = tokenValueMap.get(scope).get(key); if (nodeValueList == null) { nodeValueList = Lists.newArrayList(); tokenValueMap.get(scope).put(key, nodeValueList); } NodeValue nodeValueToAddUpdate = new NodeValue(nodeName, value); // Check if the current node already added the key to the token. // In that case replace that entry with the new one for (int i = 0; i < nodeValueList.size(); i++) { NodeValue existingNodeValue = nodeValueList.get(i); if (existingNodeValue.getNodeName().equals(nodeName)) { addOrUpdate(key, nodeValueToAddUpdate, nodeValueList, i); return; } } addOrUpdate(key, nodeValueToAddUpdate, nodeValueList, -1); }
/** * Returns the {@link WorkflowProgramInfo} if it is running inside Workflow or {@code null} if not. */ @Nullable WorkflowProgramInfo getWorkflowProgramInfo() { String info = hConf.get(HCONF_ATTR_WORKFLOW_INFO); if (info == null) { return null; } WorkflowProgramInfo workflowProgramInfo = GSON.fromJson(info, WorkflowProgramInfo.class); workflowProgramInfo.getWorkflowToken().disablePut(); return workflowProgramInfo; }
private BasicWorkflowToken(BasicWorkflowToken other) { for (Map.Entry<Scope, Map<String, List<NodeValue>>> entry : other.tokenValueMap.entrySet()) { Map<String, List<NodeValue>> tokenValueMapForScope = new HashMap<>(); for (Map.Entry<String, List<NodeValue>> valueEntry : entry.getValue().entrySet()) { tokenValueMapForScope.put(valueEntry.getKey(), Lists.newArrayList(valueEntry.getValue())); } this.tokenValueMap.put(entry.getKey(), tokenValueMapForScope); } this.nodeName = other.nodeName; if (other.mapReduceCounters != null) { this.mapReduceCounters = copyHadoopCounters(other.mapReduceCounters); } this.maxSizeBytes = other.maxSizeBytes; this.bytesLeft = other.bytesLeft; }
@Test public void testUpdateWithLargeRecord() { BasicWorkflowToken token = new BasicWorkflowToken(1); token.setCurrentNode("node"); try { token.put("k", generateDataInKb(1024)); Assert.fail("Workflow token update should fail because token size should have exceeded limit."); } catch (IllegalStateException e) { assertSizeExceededErrorMessage(e); } }
@Override public synchronized void put(String key, Value value) { put(key, value, Scope.USER); }
@SuppressWarnings("unchecked") private Workflow initializeWorkflow() throws Exception { Class<?> clz = Class.forName(workflowSpec.getClassName(), true, program.getClassLoader()); if (!Workflow.class.isAssignableFrom(clz)) { throw new IllegalStateException(String.format("%s is not Workflow.", clz)); } Class<? extends Workflow> workflowClass = (Class<? extends Workflow>) clz; final Workflow workflow = new InstantiatorFactory(false).get(TypeToken.of(workflowClass)).create(); // set metrics Reflections.visit(workflow, workflow.getClass(), new MetricsFieldSetter(workflowContext.getMetrics())); if (!(workflow instanceof ProgramLifecycle)) { return workflow; } final TransactionControl txControl = Transactions.getTransactionControl(workflowContext.getDefaultTxControl(), Workflow.class, workflow, "initialize", WorkflowContext.class); basicWorkflowToken.setCurrentNode(workflowSpec.getName()); workflowContext.setState(new ProgramState(ProgramStatus.INITIALIZING, null)); workflowContext.initializeProgram((ProgramLifecycle) workflow, txControl, false); workflowStateWriter.setWorkflowToken(workflowRunId, basicWorkflowToken); return workflow; }
@Override public synchronized Value get(String key) { return get(key, Scope.USER); }
private void put(String key, Value value, Scope scope) { if (!putAllowed) { String msg = String.format("Failed to put key '%s' from node '%s' in the WorkflowToken. Put operation is not " + "allowed from the Mapper and Reducer classes and from Spark executor.", key, nodeName); throw new UnsupportedOperationException(msg); } Preconditions.checkNotNull(key, "Null key cannot be added in the WorkflowToken."); Preconditions.checkNotNull(value, String.format("Null value provided for the key '%s'.", key)); Preconditions.checkNotNull(value.toString(), String.format("Null value provided for the key '%s'.", key)); Preconditions.checkState(nodeName != null, "nodeName cannot be null."); List<NodeValue> nodeValueList = tokenValueMap.get(scope).get(key); if (nodeValueList == null) { nodeValueList = Lists.newArrayList(); tokenValueMap.get(scope).put(key, nodeValueList); } NodeValue nodeValueToAddUpdate = new NodeValue(nodeName, value); // Check if the current node already added the key to the token. // In that case replace that entry with the new one for (int i = 0; i < nodeValueList.size(); i++) { NodeValue existingNodeValue = nodeValueList.get(i); if (existingNodeValue.getNodeName().equals(nodeName)) { addOrUpdate(key, nodeValueToAddUpdate, nodeValueList, i); return; } } addOrUpdate(key, nodeValueToAddUpdate, nodeValueList, -1); }
/** * Returns the {@link WorkflowProgramInfo} if it is running inside Workflow or {@code null} if not. */ @Nullable WorkflowProgramInfo getWorkflowProgramInfo() { String info = hConf.get(HCONF_ATTR_WORKFLOW_INFO); if (info == null) { return null; } WorkflowProgramInfo workflowProgramInfo = GSON.fromJson(info, WorkflowProgramInfo.class); workflowProgramInfo.getWorkflowToken().disablePut(); return workflowProgramInfo; }
private BasicWorkflowToken(BasicWorkflowToken other) { for (Map.Entry<Scope, Map<String, List<NodeValue>>> entry : other.tokenValueMap.entrySet()) { Map<String, List<NodeValue>> tokenValueMapForScope = new HashMap<>(); for (Map.Entry<String, List<NodeValue>> valueEntry : entry.getValue().entrySet()) { tokenValueMapForScope.put(valueEntry.getKey(), Lists.newArrayList(valueEntry.getValue())); } this.tokenValueMap.put(entry.getKey(), tokenValueMapForScope); } this.nodeName = other.nodeName; if (other.mapReduceCounters != null) { this.mapReduceCounters = copyHadoopCounters(other.mapReduceCounters); } this.maxSizeBytes = other.maxSizeBytes; this.bytesLeft = other.bytesLeft; }
@Test public void testRepeatedPutAtSameNode() { BasicWorkflowToken token = new BasicWorkflowToken(1); token.setCurrentNode("node1"); // after this put, size would be 512KB token.put(generateDataInKb(1), generateDataInKb(511)); // after another successful put at a different node, size would be 1024KB. Workflow token would be at capacity. token.setCurrentNode("node2"); token.put(generateDataInKb(1), generateDataInKb(511)); // should succeed because the entry at key k1 should be replaced token.put(generateDataInKb(1), generateDataInKb(511)); // now should fail, because even though we're updating node2's value, we're adding an extra KB try { token.put(generateDataInKb(1), generateDataInKb(512)); Assert.fail("Workflow token update at existing key should fail because token size should have exceeded limit."); } catch (IllegalStateException e) { assertSizeExceededErrorMessage(e); } }
/** * Writes the workflow tokens modified by this program to the given URL connection. */ private void writeWorkflowToken(@Nullable BasicWorkflowToken workflowToken, HttpURLConnection urlConn) throws IOException { try (Writer writer = new OutputStreamWriter(urlConn.getOutputStream(), Charsets.UTF_8)) { if (workflowToken != null) { GSON.toJson(Maps.transformValues(workflowToken.getAllFromCurrentNode(), Functions.toStringFunction()), TOKEN_TYPE, writer); } } }