public void convert(InputStream in, OutputStream out) throws Exception { BufferedReader reader = IOUtils.createBufferedReader(in); PrintStream fout = IOUtils.createBufferedPrintStream(out); List<ItToken> list = new ArrayList<>(); NLPNode[] nodes; String line; String[] t; while ((line = reader.readLine()) != null && !(line = line.trim()).isEmpty()) { t = Splitter.splitTabs(line); list.add(new ItToken(t[0], t[1])); } for (List<ItToken> tokens : tokenizer.segmentize(list)) { nodes = decoder.toNodeArray(tokens, token -> create(token)); decoder.decode(nodes); check(nodes); fout.println(decoder.toString(nodes)+"\n"); } reader.close(); fout.close(); }
public void convert(InputStream in, OutputStream out) throws Exception { BufferedReader reader = IOUtils.createBufferedReader(in); PrintStream fout = IOUtils.createBufferedPrintStream(out); List<ItToken> list = new ArrayList<>(); NLPNode[] nodes; String line; String[] t; while ((line = reader.readLine()) != null && !(line = line.trim()).isEmpty()) { t = Splitter.splitTabs(line); list.add(new ItToken(t[0], t[1])); } for (List<ItToken> tokens : tokenizer.segmentize(list)) { nodes = decoder.toNodeArray(tokens, token -> create(token)); decoder.decode(nodes); check(nodes); fout.println(decoder.toString(nodes)+"\n"); } reader.close(); fout.close(); }
public void categorize(String inputFile) throws Exception { CSVParser parser = new CSVParser(IOUtils.createBufferedReader(inputFile), CSVFormat.DEFAULT); List<CSVRecord> records = parser.getRecords(); List<NLPNode[]> document; String outputDir; PrintStream fout; CSVRecord record; System.out.println(inputFile); for (int i=0; i<records.size(); i++) { if (i == 0) continue; record = records.get(i); document = decode.decodeDocument(record.get(6)); document.get(0)[1].putFeat("sent", record.get(0)); outputDir = inputFile.substring(0, inputFile.length()-4); fout = IOUtils.createBufferedPrintStream(outputDir+"/"+FileUtils.getBaseName(outputDir)+"_"+i+".nlp"); for (NLPNode[] nodes : document) fout.println(decode.toString(nodes)+"\n"); fout.close(); } parser.close(); }
public void categorize(String inputFile) throws Exception { CSVParser parser = new CSVParser(IOUtils.createBufferedReader(inputFile), CSVFormat.DEFAULT); List<CSVRecord> records = parser.getRecords(); List<NLPNode[]> document; String outputDir; PrintStream fout; CSVRecord record; System.out.println(inputFile); for (int i=0; i<records.size(); i++) { if (i == 0) continue; record = records.get(i); document = decode.decodeDocument(record.get(6)); document.get(0)[1].putFeat("sent", record.get(0)); outputDir = inputFile.substring(0, inputFile.length()-4); fout = IOUtils.createBufferedPrintStream(outputDir+"/"+FileUtils.getBaseName(outputDir)+"_"+i+".nlp"); for (NLPNode[] nodes : document) fout.println(decode.toString(nodes)+"\n"); fout.close(); } parser.close(); }
public void check(NLPNode[] nodes) { for (int i=1; i<nodes.length; i++) { NLPNode node = nodes[i]; if (((node.isLemma("it") || node.isLemma("its")) && node.getFeat("it") == null) || (node.getFeat("it") != null && !node.isLemma("it") && !node.isLemma("its"))) { System.out.println(decoder.toString(nodes)+"\n"); break; } } }
public void check(NLPNode[] nodes) { for (int i=1; i<nodes.length; i++) { NLPNode node = nodes[i]; if (((node.isLemma("it") || node.isLemma("its")) && node.getFeat("it") == null) || (node.getFeat("it") != null && !node.isLemma("it") && !node.isLemma("its"))) { System.out.println(decoder.toString(nodes)+"\n"); break; } } }