/** * Kill Tachyon Master by 'kill -9' command. */ private static void killMaster() { String[] killMasterCommand = new String[]{"/usr/bin/env", "bash", "-c", "for pid in `ps -Aww -o pid,command | grep -i \"[j]ava\" | grep " + "\"tachyon.master.TachyonMaster\" | awk '{print $1}'`; do kill -9 \"$pid\"; done"}; try { Runtime.getRuntime().exec(killMasterCommand).waitFor(); CommonUtils.sleepMs(LOG, 1000); } catch (Exception e) { LOG.error("Error when killing Master", e); } }
@Override public void run() { long lastCheckMs = System.currentTimeMillis(); while (mRunning) { // Check the time since last check, and wait until it is within check interval long lastIntervalMs = System.currentTimeMillis() - lastCheckMs; long toSleepMs = mCheckIntervalMs - lastIntervalMs; if (toSleepMs > 0) { CommonUtils.sleepMs(LOG, toSleepMs); } else { LOG.warn("Space reserver took: " + lastIntervalMs + ", expected: " + mCheckIntervalMs); } reserveSpace(); } }
/** * Start Tachyon Master by executing the launch script. */ private static void startMaster() { String startMasterCommand = new TachyonConf().get(Constants.TACHYON_HOME) + "/bin/tachyon-start.sh master"; try { Runtime.getRuntime().exec(startMasterCommand).waitFor(); CommonUtils.sleepMs(LOG, 1000); } catch (Exception e) { LOG.error("Error when starting Master", e); } }
public static void main(String[] args) throws Exception { LocalTachyonCluster cluster = new LocalTachyonCluster(100, 8 * Constants.MB, Constants.GB); cluster.start(); CommonUtils.sleepMs(Constants.SECOND_MS); cluster.stop(); CommonUtils.sleepMs(Constants.SECOND_MS); cluster = new LocalTachyonCluster(100, 8 * Constants.MB, Constants.GB); cluster.start(); CommonUtils.sleepMs(Constants.SECOND_MS); cluster.stop(); CommonUtils.sleepMs(Constants.SECOND_MS); }
/** * Main loop for the cleanup, continuously look for zombie sessions */ @Override public void run() { long lastCheckMs = System.currentTimeMillis(); while (mRunning) { // Check the time since last check, and wait until it is within check interval long lastIntervalMs = System.currentTimeMillis() - lastCheckMs; long toSleepMs = mCheckIntervalMs - lastIntervalMs; if (toSleepMs > 0) { CommonUtils.sleepMs(LOG, toSleepMs); } else { LOG.warn("Session cleanup took: " + lastIntervalMs + ", expected: " + mCheckIntervalMs); } // Check if any sessions have become zombies, if so clean them up mBlockDataManager.cleanupSessions(); lastCheckMs = System.currentTimeMillis(); } }
public static void main(String[] args) throws Exception { LocalTachyonCluster cluster = new LocalTachyonCluster(100, 8 * Constants.MB, Constants.GB); cluster.start(); CommonUtils.sleepMs(Constants.SECOND_MS); cluster.stop(); CommonUtils.sleepMs(Constants.SECOND_MS); cluster = new LocalTachyonCluster(100, 8 * Constants.MB, Constants.GB); cluster.start(); CommonUtils.sleepMs(Constants.SECOND_MS); cluster.stop(); CommonUtils.sleepMs(Constants.SECOND_MS); }
/** * Stop the current Tachyon cluster. This is used for preparation and clean up. * To crash the Master, use <code>killMaster</code>. */ private static void stopCluster() { String stopClusterCommand = new TachyonConf().get(Constants.TACHYON_HOME) + "/bin/tachyon-stop.sh"; try { Runtime.getRuntime().exec(stopClusterCommand).waitFor(); CommonUtils.sleepMs(LOG, 1000); } catch (Exception e) { LOG.error("Error when stop Tachyon cluster", e); } } }
long toSleepMs = mSyncIntervalMs - lastIntervalMs; if (toSleepMs > 0) { CommonUtils.sleepMs(LOG, toSleepMs); } else { LOG.warn("Sync took: " + lastIntervalMs + ", expected: " + mSyncIntervalMs); CommonUtils.sleepMs(LOG, Constants.SECOND_MS); if (System.currentTimeMillis() - lastSyncMs >= mSyncTimeoutMs) { throw new RuntimeException("Master sync timeout exceeded: " + mSyncTimeoutMs);
/** * Starts both a master and a worker using the configurations in {@link MasterContext} and * {@link WorkerContext} respectively. * * @throws IOException when the operation fails */ public void start() throws IOException { mTachyonHome = File.createTempFile("Tachyon", "U" + System.currentTimeMillis()).getAbsolutePath(); // Delete the temp dir by ufs, otherwise, permission problem may be encountered. UnderFileSystemUtils.deleteDir(mTachyonHome, MasterContext.getConf()); mWorkerDataFolder = "/datastore"; mLocalhostName = NetworkAddressUtils.getLocalHostName(100); // Disable hdfs client caching to avoid file system close() affecting other clients System.setProperty("fs.hdfs.impl.disable.cache", "true"); startMaster(); UnderFileSystemUtils.mkdirIfNotExists(mMasterConf.get(Constants.UNDERFS_ADDRESS), mMasterConf); CommonUtils.sleepMs(10); startWorker(); }
long toSleepMs = mHeartbeatIntervalMs - lastIntervalMs; if (toSleepMs > 0) { CommonUtils.sleepMs(LOG, toSleepMs); } else { LOG.warn("Heartbeat took: " + lastIntervalMs + ", expected: " + mHeartbeatIntervalMs); CommonUtils.sleepMs(LOG, Constants.SECOND_MS); if (System.currentTimeMillis() - lastHeartbeatMs >= mHeartbeatTimeoutMs) { throw new RuntimeException("Master heartbeat timeout exceeded: " + mHeartbeatTimeoutMs);
mJournalTailer = new JournalTailer(mMaster, mJournal); while (!mJournalTailer.checkpointExists()) { CommonUtils.sleepMs(LOG, mJournalTailerSleepTimeMs); if (mInitiateShutdown) { LOG.info("Journal tailer has been shutdown while waiting to load the checkpoint file."); CommonUtils.sleepMs(LOG, mJournalTailerSleepTimeMs); CommonUtils.sleepMs(LOG, mJournalTailerSleepTimeMs); } catch (IOException ioe) {
CommonUtils.sleepMs(LOG, toSleep, true); timeleft = deadline - System.currentTimeMillis();
mWorkerThread.start(); CommonUtils.sleepMs(100); ClientContext.reset(mWorkerConf);
CommonUtils.sleepMs(LOG, aliveTimeMs); System.out.println("Round " + rounds + " : Crash Master..."); killMaster();
CommonUtils.sleepMs(10);
mThriftServer.stop(); mThriftServerSocket.close(); CommonUtils.sleepMs(100);
CommonUtils.sleepMs(LOG, 100);
CommonUtils.sleepMs(100);