/** * print output into a file in directory specified, with name based on annotationFile. * Should not create an empty file (i.e., if columnOutput is empty). * * @param nerOutputDir directory to write output file * @param annotationFile used as prefix for the name of the new file * @param columnOutput a list of strings to be printed to the output file * @throws IOException */ private static void printOut(String nerOutputDir, String annotationFile, List<String> columnOutput) throws IOException { String outFile = nerOutputDir + "/" + annotationFile + ".ner.column.txt" ; if ( !columnOutput.isEmpty() ) { if ( !IOUtils.exists( nerOutputDir ) ) IOUtils.mkdir( nerOutputDir ); LineIO.write(outFile, columnOutput); } }
/** * Load the file. * * @param path path to .fex file * @throws Exception */ public FeatureManifest(String path) throws Exception { this(IOUtils.lsResources(FeatureManifest.class, path).get(0).openStream()); }
/** * Create a directory, if it does not exist. */ public static boolean mkdir(String dir) { if (!exists(dir)) { return (new File(dir)).mkdirs(); } else { return isDirectory(dir); } }
public static String getFileStem(String path) { return stripFileExtension(getFileName(path)); }
failed = new ArrayList<>(); failedFile = directory + File.separator + "failed.txt"; if (IOUtils.exists(directory) && IOUtils.isDirectory(directory)) { String[] files = new String[0]; try { if (IOUtils.exists(failedFile)) { failed = LineIO.read(failedFile, new ITransformer<String, Integer>() { @Override files = IOUtils.lsFiles(directory, new FilenameFilter() { @Override public boolean accept(File dir, String name) { (files.length + failed.size()), failed.size()); for (String file : files) { String taHashStr = IOUtils.stripFileExtension(IOUtils.getFileName(file)); IOUtils.mkdir(directory);
throw new IllegalArgumentException("Input directory and output directory must have different names ('" + inDir + "'"); if (!IOUtils.isDirectory(inDir)) { throw new IOException("input directory '" + inDir + "' is not a directory."); if (IOUtils.exists(outDir)) { if (!IOUtils.isDirectory(outDir)) throw new IOException("output directory '" + outDir + "' is not a directory."); IOUtils.mkdir(outDir); String[] inFiles = IOUtils.lsFilesRecursive(inDir, filter);
if (IOUtils.exists(conllDir)) if (!IOUtils.isDirectory(conllDir)) { System.err.println("Output directory '" + conllDir + "' exists and is not a directory."); System.exit(-1); } else IOUtils.mkdir(conllDir);
public static boolean dbFileExists(String dbFile) { boolean create = false; if (!IOUtils.exists(dbFile + ".mv.db")) create = true; return create; }
/** * A workaround for the unit tests in Maven that move the relative path of the root directory * in {@link ESRLConfigurator} to the directory of each module. <b>NB:</b> This code assumes * the default data directory to be <i>$ROOT_DIR/data/</i>. * * @param file The file/directory to be used * @return The same file/directory moved to the root dir of the main project */ protected String getCorrectPath(String file) { if (file.contains("data") && !IOUtils.exists(new File(file).getParent())) { int dataIndex = file.indexOf("data") - 1; int prevSlashIndex = file.lastIndexOf(File.separator, dataIndex - 1); String fileWithParentDir = file.substring(0, prevSlashIndex) + file.substring(dataIndex, file.length()); logger.warn("{} doesn't exist, trying parent directory: {}.", IOUtils.getFileName(file), fileWithParentDir); file = fileWithParentDir; } return file; }
/** * A table is built from either a given source corpus file or source corpus directory by simply * counting the number of times that each form-POS association appear in a source corpus. * * @param home file name or directory name of the source corpus * @throws Exception **/ public void buildTable(String home) throws Exception { if (IOUtils.isFile(home)) this.buildTableHelper(home); else if (IOUtils.isDirectory(home)) { String[] files = IOUtils.lsFiles(home); for (String file : files) { // logger.info(file); this.buildTableHelper(home + "\\" + file); } } }
@CommandDescription( description = "Pre-extracts the features for the verb-sense model. Run this before training.", usage = "preExtract") public static void preExtract() throws Exception { SenseManager manager = getManager(true); ResourceManager conf = new VerbSenseConfigurator().getDefaultConfig(); // If models directory doesn't exist create it if (!IOUtils.isDirectory(conf.getString(conf .getString(VerbSenseConfigurator.MODELS_DIRECTORY)))) IOUtils.mkdir(conf.getString(conf.getString(VerbSenseConfigurator.MODELS_DIRECTORY))); int numConsumers = Runtime.getRuntime().availableProcessors(); Dataset dataset = Dataset.PTBTrainDev; log.info("Pre-extracting features"); ModelInfo modelInfo = manager.getModelInfo(); String featureSet = "" + modelInfo.featureManifest.getIncludedFeatures().hashCode(); String allDataCacheFile = VerbSenseConfigurator.getFeatureCacheFile(featureSet, dataset, rm); FeatureVectorCacheFile featureCache = preExtract(numConsumers, manager, dataset, allDataCacheFile); pruneFeatures(numConsumers, manager, featureCache, VerbSenseConfigurator.getPrunedFeatureCacheFile(featureSet, rm)); Lexicon lexicon = modelInfo.getLexicon().getPrunedLexicon(manager.getPruneSize()); log.info("Saving lexicon with {} features to {}", lexicon.size(), manager.getLexiconFileName()); log.info(lexicon.size() + " features in the lexicon"); lexicon.save(manager.getLexiconFileName()); }
files = IOUtils.lsResources(GazetteerViewGenerator.class, directory); else files = IOUtils.getListOfFilesInDir(directory); String file = IOUtils.getFileName(url.getPath());
public void writeModelsToDisk(String dir, String modelName){ IOUtils.mkdir(dir); chunker.write(dir + File.separator + modelName + ".lc", dir + File.separator + modelName + ".lex"); logger.info("Done training, models are in " + dir+File.separator+modelName+".lc (.lex)"); } public static void main(String[] args) {
/** * Filters the files contained in a directory or in its subdirectory structure. Returns all * files (not directories) that pass the filter. */ public static String[] lsFilesRecursive(String directory, FilenameFilter filter) throws IOException { File dir = new File(directory); ArrayList<String> files = new ArrayList<>(); for (File filepath : dir.listFiles(filter)) { if (isFile(filepath.getAbsolutePath())) files.add(filepath.getAbsolutePath()); else if (isDirectory(filepath.getAbsolutePath())) files.addAll(Arrays.asList(lsFilesRecursive(filepath.getAbsolutePath(), filter))); } return files.toArray(new String[files.size()]); }
String lemma = IOUtils.stripFileExtension(fileName);
private void readPropbankFrameData(String dir) throws Exception { frameData = new HashMap<>(); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(false); dbf.setValidating(false); for (String file : IOUtils.lsFiles(dir, (dir1, name) -> name.endsWith("xml"))) { // IOUtils.getFileName(file) doesn't work in Windows int slashIndex = file.lastIndexOf(File.separator); String fileName = file.substring(slashIndex + 1); // A hack to deal with percent-sign in nombank. There is another // file called perc-sign that will fill this void. if (fileName.contains("percent-sign.xml")) continue; DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(file); NodeList predicateElements = doc.getElementsByTagName("predicate"); for (int i = 0; i < predicateElements.getLength(); i++) { String lemma = IOUtils.stripFileExtension(fileName); FrameData fData = new FrameData(lemma); frameData.put(lemma, fData); NodeList roleSets = doc.getElementsByTagName("roleset"); addRoleSets(fileName, lemma, fData, roleSets); } } }
public void readFile(String fileName) { try { List<String> lines = LineIO.read(fileName); for (int i = 0; i < lines.size(); i++) { String line = lines.get(i); String sentId = IOUtils.getFileName(fileName) + ":" + i; textAnnotations.add(createTextAnnotation(line, sentId)); } } catch (FileNotFoundException e) { logger.error("Could not read {}; unable to continue.", fileName); throw new RuntimeException(e); } }