private FileStatus[] listSortedOutputShardDirs(Path outputReduceDir, FileSystem fs) throws FileNotFoundException, IOException { final String dirPrefix = SolrOutputFormat.getOutputName(job); FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(dirPrefix); } }); for (FileStatus dir : dirs) { if (!dir.isDirectory()) { throw new IllegalStateException("Not a directory: " + dir.getPath()); } } // use alphanumeric sort (rather than lexicographical sort) to properly handle more than 99999 shards Arrays.sort(dirs, new Comparator<FileStatus>() { @Override public int compare(FileStatus f1, FileStatus f2) { return new AlphaNumericComparator().compare(f1.getPath().getName(), f2.getPath().getName()); } }); return dirs; }
public static Path findSolrConfig(Configuration conf) throws IOException { // FIXME when mrunit supports the new cache apis //URI[] localArchives = context.getCacheArchives(); Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); for (Path unpackedDir : localArchives) { if (unpackedDir.getName().equals(SolrOutputFormat.getZipName(conf))) { LOG.info("Using this unpacked directory as solr home: {}", unpackedDir); return unpackedDir; } } throw new IOException(String.format(Locale.ENGLISH, "No local cache archives, where is %s:%s", SolrOutputFormat .getSetupOk(), SolrOutputFormat.getZipName(conf))); }
public static void setupSolrHomeCache(File solrHomeDir, Job job) throws IOException{ File solrHomeZip = createSolrHomeZip(solrHomeDir); addSolrConfToDistributedCache(job, solrHomeZip); }
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Utils.getLogConfigFile(context.getConfiguration()); Path workDir = getDefaultWorkFile(context, ""); int batchSize = getBatchSize(context.getConfiguration()); return new SolrRecordWriter<K, V>(context, workDir, batchSize); }
public SolrRecordWriter(TaskAttemptContext context, Path outputShardDir, int batchSize) { this.batchSize = batchSize; this.batch = new ArrayList<>(batchSize); Configuration conf = context.getConfiguration(); // setLogLevel("org.apache.solr.core", "WARN"); // setLogLevel("org.apache.solr.update", "WARN"); heartBeater = new HeartBeater(context); try { heartBeater.needHeartBeat(); Path solrHomeDir = SolrRecordWriter.findSolrConfig(conf); FileSystem fs = outputShardDir.getFileSystem(conf); EmbeddedSolrServer solr = createEmbeddedSolrServer(solrHomeDir, fs, outputShardDir); batchWriter = new BatchWriter(solr, batchSize, context.getTaskAttemptID().getTaskID(), SolrOutputFormat.getSolrWriterThreadCount(conf), SolrOutputFormat.getSolrWriterQueueSize(conf)); } catch (Exception e) { throw new IllegalStateException(String.format(Locale.ENGLISH, "Failed to initialize record writer for %s, %s", context.getJobName(), conf .get("mapred.task.id")), e); } finally { heartBeater.cancelHeartBeat(); } }
SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job); } else { assert options.zkHost != null; String configName = PublicZooKeeperInspector.readConfigName(zkClient, options.collection); File tmpSolrHomeDir = PublicZooKeeperInspector.downloadConfigDir(zkClient, configName); SolrOutputFormat.setupSolrHomeCache(tmpSolrHomeDir, job); LOG.debug("Using " + tmpSolrHomeDir + " as solr home"); options.solrHomeDir = tmpSolrHomeDir; String dirPrefix = SolrOutputFormat.getOutputName(job); Path srcPath = stats.getPath(); if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
private static File createSolrHomeZip(File solrHomeDir, boolean safeToModify) throws IOException { if (solrHomeDir == null || !(solrHomeDir.exists() && solrHomeDir.isDirectory())) { throw new IOException("Invalid solr home: " + solrHomeDir); } // solrconfig-site.xml may have an unneeded variable with no default value ${solr.host}, // which will cause the job to fail. Replace it. File solrConfigSite = new File(solrHomeDir, "conf" + File.separator + "solrconfig-site.xml"); if (solrConfigSite.isFile()) { if (!safeToModify) { // not safe to modify, let's copy the contents over to a temp directory File newTmpDir = Files.createTempDir(); newTmpDir.deleteOnExit(); LOG.debug("Creating temporary copy of solr home dir at: " + newTmpDir.getAbsolutePath()); FileUtils.copyDirectory(solrHomeDir, newTmpDir); return createSolrHomeZip(newTmpDir, true); } // find a suitable backup name File backup = new File(solrHomeDir, solrConfigSite.getName() + ".bak"); while (backup.exists()) { backup = new File(solrHomeDir, backup.getName() + ".bak"); } LOG.debug("Moving existing " + solrConfigSite.getName() + " to " + backup.getName()); Files.move(solrConfigSite, backup); writeReplacementSolrConfigSite(solrConfigSite); } File solrHomeZip = File.createTempFile("solr", ".zip"); createZip(solrHomeDir, solrHomeZip); return solrHomeZip; }
public static void addSolrConfToDistributedCache(Job job, File solrHomeZip) throws IOException { // Make a reasonably unique name for the zip file in the distributed cache // to avoid collisions if multiple jobs are running. String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME; Configuration jobConf = job.getConfiguration(); jobConf.set(ZIP_NAME, hdfsZipName); Path zipPath = new Path("/tmp", getZipName(jobConf)); FileSystem fs = FileSystem.get(jobConf); fs.copyFromLocalFile(new Path(solrHomeZip.toString()), zipPath); final URI baseZipUrl = fs.getUri().resolve( zipPath.toString() + '#' + getZipName(jobConf)); DistributedCache.addCacheArchive(baseZipUrl, jobConf); LOG.debug("Set Solr distributed cache: {}", Arrays.asList(job.getCacheArchives())); LOG.debug("Set zipPath: {}", zipPath); // Actually send the path for the configuration zip file jobConf.set(SETUP_OK, zipPath.toString()); }
public static File createSolrHomeZip(File solrHomeDir) throws IOException { return createSolrHomeZip(solrHomeDir, false); }
private static File createSolrHomeZip(File solrHomeDir, boolean safeToModify) throws IOException { if (solrHomeDir == null || !(solrHomeDir.exists() && solrHomeDir.isDirectory())) { throw new IOException("Invalid solr home: " + solrHomeDir); } File solrHomeZip = File.createTempFile("solr", ".zip"); solrHomeZip.deleteOnExit(); createZip(solrHomeDir, solrHomeZip); return solrHomeZip; }
public SolrRecordWriter(TaskAttemptContext context, Path outputShardDir, int batchSize) { this.batchSize = batchSize; this.batch = new ArrayList(batchSize); Configuration conf = context.getConfiguration(); // setLogLevel("org.apache.solr.core", "WARN"); // setLogLevel("org.apache.solr.update", "WARN"); heartBeater = new HeartBeater(context); try { heartBeater.needHeartBeat(); Path solrHomeDir = SolrRecordWriter.findSolrConfig(conf); FileSystem fs = outputShardDir.getFileSystem(conf); EmbeddedSolrServer solr = createEmbeddedSolrServer(solrHomeDir, fs, outputShardDir); batchWriter = new BatchWriter(solr, batchSize, context.getTaskAttemptID().getTaskID(), SolrOutputFormat.getSolrWriterThreadCount(conf), SolrOutputFormat.getSolrWriterQueueSize(conf)); } catch (Exception e) { throw new IllegalStateException(String.format( "Failed to initialize record writer for %s, %s", context.getJobName(), conf .get("mapred.task.id")), e); } finally { heartBeater.cancelHeartBeat(); } }
SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job); } else { assert options.zkHost != null; String configName = PublicZooKeeperInspector.readConfigName(zkClient, options.collection); File tmpSolrHomeDir = PublicZooKeeperInspector.downloadConfigDir(zkClient, configName, options.useZkSolrConfig); SolrOutputFormat.setupSolrHomeCache(tmpSolrHomeDir, job); LOG.debug("Using " + tmpSolrHomeDir + " as solr home"); options.solrHomeDir = tmpSolrHomeDir; String dirPrefix = SolrOutputFormat.getOutputName(job); Path srcPath = stats.getPath(); if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Utils.getLogConfigFile(context.getConfiguration()); Path workDir = getDefaultWorkFile(context, ""); int batchSize = getBatchSize(context.getConfiguration()); return new SolrRecordWriter<K,V>(context, workDir, batchSize); }
public static void addSolrConfToDistributedCache(Job job, File solrHomeZip) throws IOException { // Make a reasonably unique name for the zip file in the distributed cache // to avoid collisions if multiple jobs are running. String hdfsZipName = UUID.randomUUID().toString() + '.' + ZIP_FILE_BASE_NAME; Configuration jobConf = job.getConfiguration(); jobConf.set(ZIP_NAME, hdfsZipName); Path zipPath = new Path("/tmp", getZipName(jobConf)); FileSystem fs = FileSystem.get(jobConf); fs.copyFromLocalFile(new Path(solrHomeZip.toString()), zipPath); final URI baseZipUrl = fs.getUri().resolve( zipPath.toString() + '#' + getZipName(jobConf)); DistributedCache.addCacheArchive(baseZipUrl, jobConf); LOG.debug("Set Solr distributed cache: {}", Arrays.asList(job.getCacheArchives())); LOG.debug("Set zipPath: {}", zipPath); // Actually send the path for the configuration zip file jobConf.set(SETUP_OK, zipPath.toString()); }
public static File createSolrHomeZip(File solrHomeDir) throws IOException { return createSolrHomeZip(solrHomeDir, false); }
public static Path findSolrConfig(Configuration conf) throws IOException { // FIXME when mrunit supports the new cache apis //URI[] localArchives = context.getCacheArchives(); Path[] localArchives = DistributedCache.getLocalCacheArchives(conf); for (Path unpackedDir : localArchives) { if (unpackedDir.getName().equals(SolrOutputFormat.getZipName(conf))) { // copy solrHomeDir to ensure it isn't modified across multiple unit tests or multiple EmbeddedSolrServer instances File tmpDir = Files.createTempDir(); tmpDir.deleteOnExit(); FileUtils.copyDirectory(new File(unpackedDir.toString()), tmpDir); unpackedDir = new Path(tmpDir.getAbsolutePath()); LOG.info("Using this unpacked directory as solr home: {}", unpackedDir); return unpackedDir; } } throw new IOException(String.format(Locale.ENGLISH, "No local cache archives, where is %s:%s", SolrOutputFormat .getSetupOk(), SolrOutputFormat.getZipName(conf))); }
public SolrRecordWriter(TaskAttemptContext context, Path outputShardDir, int batchSize) { this.batchSize = batchSize; this.batch = new ArrayList<>(batchSize); Configuration conf = context.getConfiguration(); // setLogLevel("org.apache.solr.core", "WARN"); // setLogLevel("org.apache.solr.update", "WARN"); heartBeater = new HeartBeater(context); try { heartBeater.needHeartBeat(); Path solrHomeDir = SolrRecordWriter.findSolrConfig(conf); FileSystem fs = outputShardDir.getFileSystem(conf); EmbeddedSolrServer solr = createEmbeddedSolrServer(solrHomeDir, fs, outputShardDir); batchWriter = new BatchWriter(solr, batchSize, context.getTaskAttemptID().getTaskID(), SolrOutputFormat.getSolrWriterThreadCount(conf), SolrOutputFormat.getSolrWriterQueueSize(conf)); } catch (Exception e) { throw new IllegalStateException(String.format(Locale.ENGLISH, "Failed to initialize record writer for %s, %s", context.getJobName(), conf .get("mapred.task.id")), e); } finally { heartBeater.cancelHeartBeat(); } }
SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job); } else { assert options.zkHost != null; String configName = zki.readConfigName(zkClient, options.collection); File tmpSolrHomeDir = zki.downloadConfigDir(zkClient, configName); SolrOutputFormat.setupSolrHomeCache(tmpSolrHomeDir, job); options.solrHomeDir = tmpSolrHomeDir; } finally { String dirPrefix = SolrOutputFormat.getOutputName(job); Path srcPath = stats.getPath(); if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
private FileStatus[] listSortedOutputShardDirs(Job job, Path outputReduceDir, FileSystem fs) throws FileNotFoundException, IOException { final String dirPrefix = SolrOutputFormat.getOutputName(job); FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(dirPrefix); } }); for (FileStatus dir : dirs) { if (!dir.isDirectory()) { throw new IllegalStateException("Not a directory: " + dir.getPath()); } } // use alphanumeric sort (rather than lexicographical sort) to properly handle more than 99999 shards Arrays.sort(dirs, new Comparator<FileStatus>() { @Override public int compare(FileStatus f1, FileStatus f2) { return new PublicAlphaNumericComparator().compare(f1.getPath().getName(), f2.getPath().getName()); } }); return dirs; }
public static void setupSolrHomeCache(File solrHomeDir, Job job) throws IOException{ File solrHomeZip = createSolrHomeZip(solrHomeDir); addSolrConfToDistributedCache(job, solrHomeZip); }