/** * Returns an Iterable of the lines in the file. * * The file reader will be closed when the iterator is exhausted. IO errors * will throw an (unchecked) RuntimeIOException * * @param path The file whose lines are to be read. * @return An Iterable containing the lines from the file. */ public static Iterable<String> readLines(String path) { return readLines(path, null); }
/** * Returns an Iterable of the lines in the file. * * The file reader will be closed when the iterator is exhausted. * * @param file The file whose lines are to be read. * @return An Iterable containing the lines from the file. */ public static Iterable<String> readLines(final File file) { return readLines(file, null, null); }
/** * Returns an Iterable of the lines in the file. * * The file reader will be closed when the iterator is exhausted. * * @param file The file whose lines are to be read. * @param fileInputStreamWrapper * The class to wrap the InputStream with, e.g. GZIPInputStream. Note * that the class must have a constructor that accepts an * InputStream. * @return An Iterable containing the lines from the file. */ public static Iterable<String> readLines(final File file, final Class<? extends InputStream> fileInputStreamWrapper) { return readLines(file, fileInputStreamWrapper, null); }
public static void setClass2KeyMapping(File file) throws ClassNotFoundException { for(String line: IOUtils.readLines(file)){ String[] toks = line.split("###"); class2KeyMapping.put(Class.forName(toks[0]), toks[1]); } }
public static List<String> readSentences(String sentencesFile) { logger.info("Reading sentences from " + sentencesFile); List<String> sentences = new ArrayList<>(); for (String sentence : IOUtils.readLines(sentencesFile)) { sentences.add(sentence); } return sentences; }
/** * Returns whether a file object both exists and has contents (i.e. the size of the file is greater than 0) * @param file * @return true if the file exists and is non-empty */ public static boolean existsAndNonEmpty(File file) { if (!file.exists()) { return false; } Iterable<String> lines = IOUtils.readLines(file); String firstLine; try { firstLine = lines.iterator().next(); } catch (NoSuchElementException nsee) { return false; } return firstLine.length() > 0; }
/** * This method reads in properties listed in a file in the format prop=value, * one property per line. and reads them into a LinkedHashMap (insertion order preserving) * Flags not having any arguments is set to "true". * * @param filename A properties file to read * @return The corresponding LinkedHashMap where the ordering is the same as in the * props file */ public static LinkedHashMap<String, String> propFileToLinkedHashMap(String filename, Map<String, String> existingArgs) { LinkedHashMap<String, String> result = new LinkedHashMap<>(existingArgs); for (String l : IOUtils.readLines(filename)) { l = l.trim(); if (l.isEmpty() || l.startsWith("#")) continue; int index = l.indexOf('='); if (index == -1) result.put(l, "true"); else result.put(l.substring(0, index).trim(), l.substring(index + 1).trim()); } return result; }
static Map<String, Map<String, Boolean>> readGoldEntities(String goldEntitiesEvalFiles){ Map<String, Map<String, Boolean>> goldWords = new HashMap<>(); if (goldEntitiesEvalFiles != null) { for (String gfile : goldEntitiesEvalFiles.split(";")) { String[] t = gfile.split(","); String label = t[0]; String goldfile = t[1]; Map<String, Boolean> goldWords4Label = new HashMap<>(); for (String line : IOUtils.readLines(goldfile)) { line = line.trim(); if (line.isEmpty()) continue; if (line.endsWith("#")) goldWords4Label.put(line.substring(0, line.length() - 1), false); else goldWords4Label.put(line, true); } goldWords.put(label, goldWords4Label); } } return goldWords; }
public static ArrayList<Person> readCharacterList(String filename) { ArrayList<Person> characterList = new ArrayList<>(); //format: name;Gender(M or F); aliases (everything semi-colon delimited) for(String line : IOUtils.readLines(new File(filename))) { String[] terms = line.split(";"); if(terms.length == 2) { characterList.add(new Person(terms[0], terms[1], null)); } else { ArrayList<String> aliases = new ArrayList<>(); for(int l = 2; l < terms.length; l++) { aliases.add(terms[l]); } aliases.add(terms[0]); characterList.add(new Person(terms[0], terms[1], aliases)); } } return characterList; }
public void setUp() { assert (wordClassClusterFile != null); if (wordClassClusterFile != null) { for (String line : IOUtils.readLines(wordClassClusterFile)) { String[] t = line.split("\\s+"); int num = Integer.parseInt(t[1]); clusterIds.put(t[0], num); clusters.add(num, t[0]); } } if (negativeWordsFiles != null) { for (String file : negativeWordsFiles.split("[,;]")) { negativeWords.addAll(IOUtils.linesFromFile(file)); } System.out.println("number of negative words from lists " + negativeWords.size()); } }
private static Stream<SentimentDatum> twitter(String path) { return StreamSupport.stream( IOUtils.readLines(path).spliterator(), true ).map(line -> { List<String> fields = Arrays.asList(line.split(",")); if (fields.size() < 3 || "Sentiment".equalsIgnoreCase(fields.get(1)) || fields.get(3).equals("")) { return new SentimentDatum("Cats have tails", SentimentClass.NEUTRAL); } else { int sentiment = Integer.parseInt(fields.get(1)); String text = StringUtils.join(fields.subList(3, fields.size()), ","); return new SentimentDatum(text, SentimentClass.fromInt(sentiment)); } }); }
public boolean useSplit(String prefix) { Map<Split,File> splitMap = Generics.newHashMap(); splitMap.put(Split.Train,new File(prefix + ".train")); splitMap.put(Split.Test,new File(prefix + ".test")); splitMap.put(Split.Dev,new File(prefix + ".dev")); splitFileLists = Generics.newHashMap(); for(Map.Entry<Split, File> entry : splitMap.entrySet()) { File f = entry.getValue(); if(!f.exists()) return false; Set<String> files = Generics.newHashSet(); for(String fileName : IOUtils.readLines(f)) files.add(fileName); splitFileLists.put(entry.getKey(), files); } useSplit = true; return true; }
private static Stream<SentimentDatum> stanford(String path) { return StreamSupport.stream( IOUtils.readLines(path).spliterator(), true ).map(line -> { String[] fields = line.split("\t"); if (fields.length < 4 || "Sentiment".equalsIgnoreCase(fields[3]) || fields[2].equals("")) { return new SentimentDatum("Cats have tails", SentimentClass.NEUTRAL); } else { String text = fields[2]; int sentiment = Integer.parseInt(fields[3]); return new SentimentDatum(text, SentimentClass.fromInt(sentiment)); } }); }
@Override public Annotation read(String path) throws IOException { Annotation doc = new Annotation(""); logger.info("Reading file: " + path); // Each iteration through this loop processes a single sentence along with any relations in it for (Iterator<String> lineIterator = IOUtils.readLines(path).iterator(); lineIterator.hasNext(); ) { Annotation sentence = readSentence(path, lineIterator); AnnotationUtils.addSentence(doc, sentence); } return doc; }
public static void populateTablesInSQL(String dir, Collection<Integer> typesOfPhrases) throws SQLException{ connect(); Statement stmt = connection.createStatement(); for(Integer n: typesOfPhrases) { String table = tablenamePrefix + n; if(!existsTable(table)) throw new RuntimeException("Table " + table + " does not exist in the database! Run the following commands in the psql prompt:" + "create table GoogleNgrams_<NGRAM> (phrase text primary key not null, count bigint not null); create index phrase_<NGRAM> on GoogleNgrams_<NGRAM>(phrase);"); for(String line: IOUtils.readLines(new File(dir + "/" + n + "gms/vocab_cs.gz"), GZIPInputStream.class)){ String[] tok = line.split("\t"); String q = "INSERT INTO " + table + " (phrase, count) VALUES (" + escapeString(tok[0]) +" , " + tok[1]+");"; stmt.execute(q); } } }
public static Map<String, String> readLabelMap(String labelsFile, String separator, String remapLabels) { logger.info("Reading labels from " + labelsFile); Map<String, String> remap = Collections.emptyMap(); if (remapLabels != null) { remap = StringUtils.mapStringToMap(remapLabels); logger.info("Remapping labels using " + remap); } Map<String, String> labelMap = new HashMap<>(); for (String phrase : IOUtils.readLines(labelsFile)) { String[] pieces = phrase.split(separator); String label = pieces[pieces.length - 1]; if (remap.containsKey(label)) { label = remap.get(label); } labelMap.put(pieces[0], label); } return labelMap; }
public static void loadDomainNGrams() { assert(domainNGramsFile != null); if (domainNGramRawFreq == null || domainNGramRawFreq.size() == 0) { for (String line : IOUtils.readLines(domainNGramsFile)) { String[] t = line.split("\t"); domainNGramRawFreq.setCount(t[0], Double.valueOf(t[1])); } Redwood.log(ConstantsAndVariables.minimaldebug, "Data", "loading freq from domain ngram file " + domainNGramsFile); } } }
private static void readClassesInEnv(String s, Map<String, Env> env, Env globalEnv) throws ClassNotFoundException { for(String line: IOUtils.readLines(s)){ String[] toks = line.split("###"); if(toks.length == 3){ String label = toks[0]; String name = toks[1]; Class c = Class.forName(toks[2]); if(!env.containsKey(label)) env.put(label, TokenSequencePattern.getNewEnv()); env.get(label).bind(name, c); }else if(toks.length ==2){ String name = toks[0]; Class c = Class.forName(toks[1]); assert c!=null : " Why is name for " + toks[1] + " null"; globalEnv.bind(name, c); }else throw new RuntimeException("Ill formed env file!"); } }
private static TreeMap<Integer, Counter<CandidatePhrase>> readLearnedWordsFromFile(File file) { TreeMap<Integer, Counter<CandidatePhrase>> learned = new TreeMap<>(); Counter<CandidatePhrase> words = null; int numIter = -1; for (String line : IOUtils.readLines(file)) { if(line.startsWith("###")){ if(words != null) learned.put(numIter, words); numIter ++; words = new ClassicCounter<>(); continue; } String[] t = line.split("\t"); words.setCount(CandidatePhrase.createOrGet(t[0]), Double.parseDouble(t[1])); } if(words != null) learned.put(numIter, words); return learned; }
/** * Read SVM-light formatted data into this dataset. * * A strict SVM-light format is expected, where labels and features are both * encoded as integers. These integers are converted into the dataset label * and feature types using the indexes stored in this dataset. * * @param file The file from which the data should be read. */ public void readSVMLightFormat(File file) { for (String line : IOUtils.readLines(file)) { line = line.replaceAll("#.*", ""); // remove any trailing comments String[] items = line.split("\\s+"); Integer label = Integer.parseInt(items[0]); Counter<F> features = new ClassicCounter<>(); for (int i = 1; i < items.length; i++) { String[] featureItems = items[i].split(":"); int feature = Integer.parseInt(featureItems[0]); double value = Double.parseDouble(featureItems[1]); features.incrementCount(this.featureIndex.get(feature), value); } this.add(new RVFDatum<>(features, this.labelIndex.get(label))); } }