static public void main(String[] args) throws Exception { final String configFile = "src/main/resources/edu/emory/mathcs/nlp/configuration/config-decode-en.xml"; final String inputFile = "src/test/resources/dat/nlp4j.txt"; NLPDecoder nlp4j = new NLPDecoder(IOUtils.createFileInputStream(configFile)); NLPNode[] nodes; String sentence = "John bought a car for Mary."; nodes = nlp4j.decode(sentence); System.out.println(Joiner.join(nodes, "\n", 1)+"\n"); nlp4j.decode(IOUtils.createFileInputStream(inputFile), System.out, AbstractNLPDecoder.FORMAT_RAW); } }
public NLPNode create(ItToken token) { NLPNode node = decoder.create(token); node.setFeatMap(token.feat); return node; }
@Override public void run() { StringBuilder build = new StringBuilder(); byte[] buffer = new byte[2048]; String s, format; int i, idx; try { while ((i = in.read(buffer, 0, buffer.length)) >= 0) { build.append(new String(buffer, 0, i)); if (build.toString().endsWith(END)) { idx = build.indexOf(":"); format = build.substring(0, idx); s = build.substring(idx+1, build.length()-END.length()); out.write(decoder.decodeByteArray(s, format)); out.close(); in.close(); break; } } } catch (IOException e) {e.printStackTrace();} } }
public DependencyParser() throws IOException { LOGGER.info("Loading Dependency Parsing model"); nlpDecoder = new NLPDecoder(); List<NLPComponent<NLPNode>> components = new ArrayList(); String modelPath = System.getProperty("user.dir") + "/models/dep/vi-dep.xz"; if (!new File(modelPath).exists()) throw new IOException("DependencyParser: " + modelPath + " is not found!"); components.add(NLPUtils.getComponent(modelPath)); nlpDecoder.setComponents(components); }
public void convert(InputStream in, OutputStream out) throws Exception { BufferedReader reader = IOUtils.createBufferedReader(in); PrintStream fout = IOUtils.createBufferedPrintStream(out); List<ItToken> list = new ArrayList<>(); NLPNode[] nodes; String line; String[] t; while ((line = reader.readLine()) != null && !(line = line.trim()).isEmpty()) { t = Splitter.splitTabs(line); list.add(new ItToken(t[0], t[1])); } for (List<ItToken> tokens : tokenizer.segmentize(list)) { nodes = decoder.toNodeArray(tokens, token -> create(token)); decoder.decode(nodes); check(nodes); fout.println(decoder.toString(nodes)+"\n"); } reader.close(); fout.close(); }
public CSVSentiment(String configurationFile) { decode = new NLPDecoder(IOUtils.createFileInputStream(configurationFile)); }
public void categorize(String inputFile) throws Exception { CSVParser parser = new CSVParser(IOUtils.createBufferedReader(inputFile), CSVFormat.DEFAULT); List<CSVRecord> records = parser.getRecords(); List<NLPNode[]> document; String outputDir; PrintStream fout; CSVRecord record; System.out.println(inputFile); for (int i=0; i<records.size(); i++) { if (i == 0) continue; record = records.get(i); document = decode.decodeDocument(record.get(6)); document.get(0)[1].putFeat("sent", record.get(0)); outputDir = inputFile.substring(0, inputFile.length()-4); fout = IOUtils.createBufferedPrintStream(outputDir+"/"+FileUtils.getBaseName(outputDir)+"_"+i+".nlp"); for (NLPNode[] nodes : document) fout.println(decode.toString(nodes)+"\n"); fout.close(); } parser.close(); }
public void tagSentence(List<Word> sentenceWords) { NLPNode[] decodedNodes = nlpDecoder.decode(toNodeArray(sentenceWords)); for(int i = 0; i < sentenceWords.size(); i++) { Word word = sentenceWords.get(i); word.setNerLabel(decodedNodes[i + 1].getNamedEntityTag().replace("U-", "B-").replace("L-", "I-")); } }
public void check(NLPNode[] nodes) { for (int i=1; i<nodes.length; i++) { NLPNode node = nodes[i]; if (((node.isLemma("it") || node.isLemma("its")) && node.getFeat("it") == null) || (node.getFeat("it") != null && !node.isLemma("it") && !node.isLemma("its"))) { System.out.println(decoder.toString(nodes)+"\n"); break; } } }
public void convert(InputStream in, OutputStream out) throws Exception { BufferedReader reader = IOUtils.createBufferedReader(in); PrintStream fout = IOUtils.createBufferedPrintStream(out); List<ItToken> list = new ArrayList<>(); NLPNode[] nodes; String line; String[] t; while ((line = reader.readLine()) != null && !(line = line.trim()).isEmpty()) { t = Splitter.splitTabs(line); list.add(new ItToken(t[0], t[1])); } for (List<ItToken> tokens : tokenizer.segmentize(list)) { nodes = decoder.toNodeArray(tokens, token -> create(token)); decoder.decode(nodes); check(nodes); fout.println(decoder.toString(nodes)+"\n"); } reader.close(); fout.close(); }
public CSVSentiment(String configurationFile) { decode = new NLPDecoder(IOUtils.createFileInputStream(configurationFile)); }
public NerRecognizer() throws IOException{ LOGGER.info("Loading NER model"); nlpDecoder = new NLPDecoder(); List<NLPComponent<NLPNode>> components = new ArrayList(); String modelPath = System.getProperty("user.dir") + "/models/ner/vi-ner.xz"; if (!new File(modelPath).exists()) throw new IOException("NerRecognizer: " + modelPath + " is not found!"); GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica(); if(lexica != null) { components.add(lexica); } components.add(NLPUtils.getComponent(modelPath)); nlpDecoder.setComponents(components); }
public void categorize(String inputFile) throws Exception { CSVParser parser = new CSVParser(IOUtils.createBufferedReader(inputFile), CSVFormat.DEFAULT); List<CSVRecord> records = parser.getRecords(); List<NLPNode[]> document; String outputDir; PrintStream fout; CSVRecord record; System.out.println(inputFile); for (int i=0; i<records.size(); i++) { if (i == 0) continue; record = records.get(i); document = decode.decodeDocument(record.get(6)); document.get(0)[1].putFeat("sent", record.get(0)); outputDir = inputFile.substring(0, inputFile.length()-4); fout = IOUtils.createBufferedPrintStream(outputDir+"/"+FileUtils.getBaseName(outputDir)+"_"+i+".nlp"); for (NLPNode[] nodes : document) fout.println(decode.toString(nodes)+"\n"); fout.close(); } parser.close(); }
public void tagSentence(List<Word> sentenceWords) { NLPNode[] decodedNodes = nlpDecoder.decode(toNodeArray(sentenceWords)); for(int i = 0; i < sentenceWords.size(); i++) { Word word = sentenceWords.get(i); word.setHead(decodedNodes[i + 1].getDependencyHead().getID()); word.setDepLabel(decodedNodes[i + 1].getDependencyLabel()); if(word.getPosTag() != null && word.getPosTag().equals("CH")) word.setDepLabel("punct"); } }
public void check(NLPNode[] nodes) { for (int i=1; i<nodes.length; i++) { NLPNode node = nodes[i]; if (((node.isLemma("it") || node.isLemma("its")) && node.getFeat("it") == null) || (node.getFeat("it") != null && !node.isLemma("it") && !node.isLemma("its"))) { System.out.println(decoder.toString(nodes)+"\n"); break; } } }
static public void main(String[] args) throws Exception { final String configFile = "src/main/resources/edu/emory/mathcs/nlp/configuration/config-decode-en.xml"; final String inputFile = "src/test/resources/dat/nlp4j.txt"; NLPDecoder nlp4j = new NLPDecoder(IOUtils.createFileInputStream(configFile)); NLPNode[] nodes; String sentence = "John bought a car for Mary."; nodes = nlp4j.decode(sentence); System.out.println(Joiner.join(nodes, "\n", 1)+"\n"); nlp4j.decode(IOUtils.createFileInputStream(inputFile), System.out, AbstractNLPDecoder.FORMAT_RAW); } }
@SuppressWarnings("resource") public NLPSocketServer(InputStream configuration, int port, int threads) throws Exception { ExecutorService executor = Executors.newFixedThreadPool(threads); ServerSocket server = new ServerSocket(port); Socket client; decoder = new NLPDecoder(configuration); System.out.println("Listening..."); while (true) { client = server.accept(); executor.submit(new NLPTask(client)); } // executor.shutdown(); // server.close(); }
public NLPNode create(ItToken token) { NLPNode node = decoder.create(token); node.setFeatMap(token.feat); return node; }
@Override public void run() { StringBuilder build = new StringBuilder(); byte[] buffer = new byte[2048]; String s, format; int i, idx; try { while ((i = in.read(buffer, 0, buffer.length)) >= 0) { build.append(new String(buffer, 0, i)); if (build.toString().endsWith(END)) { idx = build.indexOf(":"); format = build.substring(0, idx); s = build.substring(idx+1, build.length()-END.length()); out.write(decoder.decodeByteArray(s, format)); out.close(); in.close(); break; } } } catch (IOException e) {e.printStackTrace();} } }
public NLPDecode(String[] args) { BinUtils.initArgs(args, this); List<String> filelist = FileUtils.getFileList(input_path, input_ext, false); Collections.sort(filelist); decoder = new NLPDecoder(IOUtils.createFileInputStream(configuration_file)); decoder.decode(filelist, output_ext, format, threads); }