/** * Drops messages to/from other members and then closes the channel. Note that this member won't get excluded from * the view until failure detection has kicked in and the new coord installed the new view */ public static void shutdown(JChannel ch) throws Exception { DISCARD discard=new DISCARD(); discard.setLocalAddress(ch.getAddress()); discard.setDiscardAll(true); ProtocolStack stack=ch.getProtocolStack(); TP transport=stack.getTransport(); stack.insertProtocol(discard,ProtocolStack.Position.ABOVE,transport.getClass()); //abruptly shutdown FD_SOCK just as in real life when member gets killed non gracefully FD_SOCK fd=ch.getProtocolStack().findProtocol(FD_SOCK.class); if(fd != null) fd.stopServerSocket(false); View view=ch.getView(); if(view != null) { ViewId vid=view.getViewId(); List<Address> members=Collections.singletonList(ch.getAddress()); ViewId new_vid=new ViewId(ch.getAddress(),vid.getId() + 1); View new_view=new View(new_vid,members); // inject view in which the shut down member is the only element GMS gms=stack.findProtocol(GMS.class); gms.installView(new_view); } Util.close(ch); }
@Override public <T> CompletionStage<T> invokeCommand(Address target, ReplicableCommand command, ResponseCollector<T> collector, DeliverOrder deliverOrder, long timeout, TimeUnit unit) { if (command instanceof StateRequestCommand && ((StateRequestCommand) command).getType() == StateRequestCommand.Type.START_STATE_TRANSFER && target.equals(address(1))) { d1.setDiscardAll(true); fork((Callable<Void>) () -> { log.info("KILLING the c1 cache"); TestingUtil.killCacheManagers(manager(c1)); return null; }); } return super.invokeCommand(target, command, collector, deliverOrder, timeout, unit); } });
public void testClusterRecoveryAfterThreeWaySplit() throws Exception { d1.setDiscardAll(true); d2.setDiscardAll(true); d3.setDiscardAll(true); d1.setDiscardAll(false); d2.setDiscardAll(false); d3.setDiscardAll(false);
d1.setDiscardAll(true); d2.setDiscardAll(true); d1.setDiscardAll(false); d2.setDiscardAll(false);
public void testClusterRecoveryAfterSplitAndCoordLeave() throws Exception { d1.setDiscardAll(true); d2.setDiscardAll(true); d3.setDiscardAll(true); d2.setDiscardAll(false); d3.setDiscardAll(false);
/** * Drops messages to/from other members and then closes the channel. Note that this member won't get excluded from * the view until failure detection has kicked in and the new coord installed the new view */ public static void shutdown(JChannel ch) throws Exception { DISCARD discard=new DISCARD(); discard.setLocalAddress(ch.getAddress()); discard.setDiscardAll(true); ProtocolStack stack=ch.getProtocolStack(); TP transport=stack.getTransport(); stack.insertProtocol(discard,ProtocolStack.Position.ABOVE,transport.getClass()); //abruptly shutdown FD_SOCK just as in real life when member gets killed non gracefully FD_SOCK fd=ch.getProtocolStack().findProtocol(FD_SOCK.class); if(fd != null) fd.stopServerSocket(false); View view=ch.getView(); if(view != null) { ViewId vid=view.getViewId(); List<Address> members=Collections.singletonList(ch.getAddress()); ViewId new_vid=new ViewId(ch.getAddress(),vid.getId() + 1); View new_view=new View(new_vid,members); // inject view in which the shut down member is the only element GMS gms=stack.findProtocol(GMS.class); gms.installView(new_view); } Util.close(ch); }
public void testNodeCrash() { List<MagicKey> keys = init(); assertFalse(c2.getCacheManager().isCoordinator()); d2.setDiscardAll(true); TestingUtil.blockUntilViewsReceived(30000, false, c1, c3); TestingUtil.waitForNoRebalance(c1, c3); checkValuesInDC(keys, c1, c3); }
public void testCoordCrash() { List<MagicKey> keys = init(); assertTrue(c1.getCacheManager().isCoordinator()); d1.setDiscardAll(true); TestingUtil.blockUntilViewsReceived(30000, false, c2, c3); TestingUtil.waitForNoRebalance(c2, c3); checkValuesInDC(keys, c2, c3); }
d1.setDiscardAll(true); d2.setDiscardAll(true); d3.setDiscardAll(true); if (mergeCoordIndex == 0) d1.setDiscardAll(false); if (mergeCoordIndex == 1) d2.setDiscardAll(false); if (mergeCoordIndex == 2) d3.setDiscardAll(false); d1.setDiscardAll(false); d2.setDiscardAll(false); d3.setDiscardAll(false);
discard[nonOwner].setDiscardAll(true);
public void testClusterRecoveryAfterCoordLeave() throws Exception { // create the partitions log.debugf("Killing coordinator via discard"); d1.setDiscardAll(true); // wait for the partitions to form long startTime = System.currentTimeMillis(); TestingUtil.blockUntilViewsReceived(30000, false, c1); TestingUtil.blockUntilViewsReceived(30000, false, c2, c3); TestingUtil.waitForNoRebalance(c1); TestingUtil.waitForNoRebalance(c2, c3); long endTime = System.currentTimeMillis(); log.debugf("Recovery took %s", Util.prettyPrintTime(endTime - startTime)); assert endTime - startTime < 30000 : "Recovery took too long: " + Util.prettyPrintTime(endTime - startTime); // Check that a new node can join addClusterEnabledCacheManager(defaultConfig, new TransportFlags().withFD(true).withMerge(true)); Cache<Object, Object> c4 = cache(3, CACHE_NAME); TestingUtil.blockUntilViewsReceived(30000, true, c2, c3, c4); TestingUtil.waitForNoRebalance(c2, c3, c4); }
public void testCoordinatorCrashesDuringJoin() { d2.setDiscardAll(true); manager(1).defineConfiguration(CACHE_NAME, clusteredConfig.build()); fork((Callable<Object>) () -> cache(1, CACHE_NAME)); TestingUtil.blockUntilViewsReceived(30000, false, manager(0)); TestingUtil.blockUntilViewsReceived(30000, false, manager(1)); TestingUtil.waitForNoRebalance(cache(1, CACHE_NAME)); } }
/** * Simulates a node crash, discarding all the messages from/to this node and then stopping the caches. */ public static void crashCacheManagers(EmbeddedCacheManager... cacheManagers) { for (EmbeddedCacheManager cm : cacheManagers) { JGroupsTransport t = (JGroupsTransport) cm.getGlobalComponentRegistry().getComponent(Transport.class); JChannel channel = t.getChannel(); try { DISCARD discard = new DISCARD(); discard.setDiscardAll(true); channel.getProtocolStack().insertProtocol(discard, ProtocolStack.Position.ABOVE, TP.class); } catch (Exception e) { log.warn("Problems inserting discard", e); throw new RuntimeException(e); } View view = View.create(channel.getAddress(), 100, channel.getAddress()); ((GMS) channel.getProtocolStack().findProtocol(GMS.class)).installView(view); } killCacheManagers(cacheManagers); }
public void testAbruptLeaveAfterGetStatus() throws TimeoutException, InterruptedException { // Block the GET_STATUS command on node 2 final LocalTopologyManager localTopologyManager2 = TestingUtil.extractGlobalComponent(manager(1), LocalTopologyManager.class); final CheckPoint checkpoint = new CheckPoint(); LocalTopologyManager spyLocalTopologyManager2 = spy(localTopologyManager2); final CacheTopology initialTopology = localTopologyManager2.getCacheTopology(CACHE_NAME); doAnswer(invocation -> { int viewId = (Integer) invocation.getArguments()[0]; checkpoint.trigger("GET_STATUS_" + viewId); log.debugf("Blocking the GET_STATUS command on the new coordinator"); checkpoint.awaitStrict("3 left", 10, TimeUnit.SECONDS); return invocation.callRealMethod(); }).when(spyLocalTopologyManager2).handleStatusRequest(anyInt()); TestingUtil.replaceComponent(manager(1), LocalTopologyManager.class, spyLocalTopologyManager2, true); // Node 1 (the coordinator) dies. Node 2 becomes coordinator and tries to call GET_STATUS log.debugf("Killing coordinator"); manager(0).stop(); TestingUtil.blockUntilViewsReceived(30000, false, manager(1), manager(2)); // Wait for the GET_STATUS command and stop node 3 abruptly int viewId = manager(1).getTransport().getViewId(); checkpoint.awaitStrict("GET_STATUS_" + viewId, 10, TimeUnit.SECONDS); d3.setDiscardAll(true); manager(2).stop(); TestingUtil.blockUntilViewsReceived(30000, false, manager(1)); checkpoint.triggerForever("3 left"); // Wait for node 2 to install a view with only itself and unblock the GET_STATUS command TestingUtil.waitForNoRebalance(c2); }
public void testMergeViewHappens() { discard.setDiscardAll(false); TestingUtil.blockUntilViewsReceived(60000, cache(0), cache(1)); TestingUtil.waitForNoRebalance(cache(0), cache(1)); assert ml0.isMerged && ml1.isMerged; cache(0).put("k", "v2"); assertEquals(cache(0).get("k"), "v2"); assertEquals(cache(1).get("k"), "v2"); }
private void testRestartOnlyMember(boolean crash) { // The coordinator stays up throughout the test, but the cache only runs on node 1 and then 2 manager(1).defineConfiguration(CACHE_NAME, clusteredConfig.build()); manager(1).getCache(CACHE_NAME); if (crash) { d2.setDiscardAll(true); } manager(1).stop(); TestingUtil.blockUntilViewsReceived(30000, false, manager(0)); addClusterEnabledCacheManager(clusteredConfig, new TransportFlags().withFD(true)); manager(2).getCache(CACHE_NAME); }
@Override protected void createCacheManagers() throws Throwable { addClusterEnabledCacheManager(getDefaultClusteredCacheConfig(CacheMode.REPL_SYNC, true), new TransportFlags().withMerge(true)); ml0 = new MergeListener(); manager(0).addListener(ml0); discard = TestingUtil.getDiscardForCache(manager(0)); discard.setDiscardAll(true); addClusterEnabledCacheManager(getDefaultClusteredCacheConfig(CacheMode.REPL_SYNC, true), new TransportFlags().withMerge(true)); ml1 = new MergeListener(); manager(1).addListener(ml1); cache(0).put("k", "v0"); cache(1).put("k", "v1"); Thread.sleep(2000); assert advancedCache(0).getRpcManager().getTransport().getMembers().size() == 1; assert advancedCache(1).getRpcManager().getTransport().getMembers().size() == 1; }
TestingUtil.getDiscardForCache(manager(1)).setDiscardAll(true); TestingUtil.blockUntilViewsReceived(60000, true, cacheManagers);
try { DISCARD d3 = TestingUtil.getDiscardForCache(c1.getCacheManager()); d3.setDiscardAll(true); TestingUtil.killCacheManagers(manager(c1)); } catch (Exception e) {