/** * Allows you to read in trees from the source you want. It's the * responsibility of treeReaderFactory() to deal properly with character-set * encoding of the input. It also is the responsibility of tr to properly * normalize trees. */ @Override public MemoryTreebank memoryTreebank() { return new MemoryTreebank(treeReaderFactory()); }
@Override public MemoryTreebank memoryTreebank() { return new MemoryTreebank(treeReaderFactory(), inputEncoding); }
@Override public MemoryTreebank memoryTreebank() { return new MemoryTreebank(treeReaderFactory(), inputEncoding); }
@Override public MemoryTreebank memoryTreebank() { return new MemoryTreebank(treeReaderFactory()); }
@Override public MemoryTreebank memoryTreebank() { return new MemoryTreebank(treeReaderFactory(), inputEncoding); }
@Override public MemoryTreebank memoryTreebank() { return new MemoryTreebank(treeReaderFactory(), inputEncoding); }
@Override public MemoryTreebank memoryTreebank() { return new MemoryTreebank(treeReaderFactory(), inputEncoding); }
/** * Uses a MemoryTreebank with a CHTBTokenizer and a * BobChrisTreeNormalizer */ @Override public MemoryTreebank memoryTreebank() { String encoding = inputEncoding; if (!java.nio.charset.Charset.isSupported(encoding)) { System.out.println("Warning: desired encoding " + encoding + " not accepted. "); System.out.println("Using UTF-8 to construct MemoryTreebank"); encoding = "UTF-8"; } return new MemoryTreebank(treeReaderFactory(), encoding); }
public FTBDataset() { super(); //Need to use a MemoryTreebank so that we can compute gross corpus //stats for MWE pre-processing // The treebank may be reset if setOptions changes CC_TAGSET treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(CC_TAGSET), FrenchTreebankLanguagePack.FTB_ENCODING); treeFileExtension = "xml"; }
/** * Given a file name, reads in those trees and returns them as a List */ public static List<Tree> readTreesWithLabels(String path, Class<? extends CoreAnnotation<Integer>> annotationClass) { List<Tree> trees = Generics.newArrayList(); MemoryTreebank treebank = new MemoryTreebank("utf-8"); treebank.loadPath(path, null); for (Tree tree : treebank) { attachLabels(tree, annotationClass); trees.add(tree); } return trees; }
/** * Loads treebank grammar from first argument and prints it. * Just a demonstration of functionality. * <p> * {@code usage: java MemoryTreebank treebankFilesPath} * * @param args array of command-line arguments */ public static void main(String[] args) { Timing.startTime(); Treebank treebank = new MemoryTreebank(PennTreeReader::new); treebank.loadPath(args[0]); Timing.endTime(); System.out.println(treebank); }
private static void extractSubtrees(List<String> codeStrings, String treeFile) { List<Pair<Integer,Integer>> codes = new ArrayList<>(); for(String s : codeStrings) { Matcher m = codePattern.matcher(s); if(m.matches()) codes.add(new Pair<>(Integer.parseInt(m.group(1)), Integer.parseInt(m.group(2)))); else throw new RuntimeException("Error: illegal node code " + s); } TreeReaderFactory trf = new TRegexTreeReaderFactory(); MemoryTreebank treebank = new MemoryTreebank(trf); treebank.loadPath(treeFile,null, true); for (Pair<Integer,Integer> code : codes) { Tree t = treebank.get(code.first()-1); t.getNodeNumber(code.second()).pennPrint(); } }
public static void main(String[] args) { QPTreeTransformer transformer = new QPTreeTransformer(); Treebank tb = new MemoryTreebank(); Properties props = StringUtils.argsToProperties(args); String treeFileName = props.getProperty("treeFile"); if (treeFileName != null) { try { TreeReader tr = new PennTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFileName))), new LabeledScoredTreeFactory()); Tree t; while ((t = tr.readTree()) != null) { tb.add(t); } } catch (IOException e) { throw new RuntimeException("File problem: " + e); } } for (Tree t : tb) { System.out.println("Original tree"); t.pennPrint(); System.out.println(); System.out.println("Tree transformed"); Tree tree = transformer.transformTree(t); tree.pennPrint(); System.out.println(); System.out.println("----------------------------"); } }
public static void main(String[] args) { CoordinationTransformer transformer = new CoordinationTransformer(null); Treebank tb = new MemoryTreebank(); Properties props = StringUtils.argsToProperties(args); String treeFileName = props.getProperty("treeFile"); if (treeFileName != null) { try { TreeReader tr = new PennTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFileName))), new LabeledScoredTreeFactory()); for (Tree t ; (t = tr.readTree()) != null; ) { tb.add(t); } } catch (IOException e) { throw new RuntimeException("File problem: " + e); } } for (Tree t : tb) { System.out.println("Original tree"); t.pennPrint(); System.out.println(); System.out.println("Tree transformed"); Tree tree = transformer.transformTree(t); tree.pennPrint(); System.out.println(); System.out.println("----------------------------"); } }
private static Iterator<Tree> treebankIterator(String path) { /* Remove empty nodes and strip indices from internal nodes but keep functional tags. */ Treebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false)); tb.loadPath(path); return tb.iterator(); }
/** Fix all the English Penn Treebank errors, or at least some of them (!). */ @Override public MemoryTreebank transformTrees(Treebank tb) { MemoryTreebank mtb = new MemoryTreebank(tb.treeReaderFactory(), tb.encoding()); for (Tree t : tb) { mtb.add(Tsurgeon.processPatternsOnTree(ops, t)); } return mtb; }
/** * Return a MemoryTreebank where each * Tree in the current treebank has been transformed using the * TreeTransformer. This Treebank is unchanged (assuming that the * TreeTransformer correctly doesn't change input Trees). * * @param treeTrans The TreeTransformer to use */ @Override public Treebank transform(TreeTransformer treeTrans) { Treebank mtb = new MemoryTreebank(size(), treeReaderFactory()); for (Tree t : this) { mtb.add(treeTrans.transformTree(t)); } return mtb; }
@Override public boolean setOptions(Properties opts) { boolean ret = super.setOptions(opts); if (opts.containsKey(ConfigParser.paramSplit)) { String splitFileName = opts.getProperty(ConfigParser.paramSplit); splitSet = makeSplitSet(splitFileName); } CC_TAGSET = PropertiesUtils.getBool(opts, ConfigParser.paramCCTagset, false); treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(CC_TAGSET), FrenchTreebankLanguagePack.FTB_ENCODING); if(lexMapper == null) { lexMapper = new DefaultMapper(); lexMapper.setup(null, lexMapOptions.split(",")); } if(pathsToMappings.size() != 0) { if(posMapper == null) posMapper = new DefaultMapper(); for(File path : pathsToMappings) posMapper.setup(path); } return ret; }
/** * returns a MemoryTreebank appropriate to the testing treebank source */ @Override public MemoryTreebank testMemoryTreebank() { return new MemoryTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(), new BobChrisTreeNormalizer(tlp))); }
trees = SentimentUtils.filterUnknownRoots(trees); } else { MemoryTreebank treebank = new MemoryTreebank("utf-8"); treebank.loadPath(filename, null); trees = new ArrayList<>(treebank);