public static StanfordTokenizer getInstance() { if (instance == null) { try { instance = new StanfordTokenizer(); } catch (Exception e) { e.printStackTrace(); } } return instance; }
public CoreLabelListMatrix(List<CoreLabel> data) { for (CoreLabel l : data) { list.add(new CoreLabelMapMatrix(l)); } }
@Override public ListMatrix<MapMatrix<String, String>> get(int i) { return new CoreLabelListMatrix(data.get(i)); }
String text = ""; ListMatrix<ListMatrix<String>> originalText = StanfordTokenizer.getInstance().tokenize(text); ListMatrix<ListMatrix<MapMatrix<String, String>>> trainingText = new DefaultListMatrix<ListMatrix<MapMatrix<String, String>>>(); tokenSequence = tagger.tagCRF(tokenSequence); tagger = new StanfordTagger(); tagger.train(trainingText); CoreLabelListMatrix tmpTokenSequence = tagger.tagCRF(tokenSequence); for (int i = 0; i < tmpTokenSequence.size(); i++) { MapMatrix<String, String> token = tmpTokenSequence.get(i); String word = token.getAsString("Token"); String c = token.getAsString("Class");
@BeforeClass public static void setUp() { try { tokenizer = new StanfordTokenizer(); tagger = new StanfordTagger(); } catch (Exception e) { e.printStackTrace(); } }
public CoreLabelListListMatrix tagCRF(String text) throws Exception { List<List<CoreLabel>> result = crf.classify(text); return new CoreLabelListListMatrix(result); }
@Test public void testTokenizer() throws Exception { ListDataSet ds = new DefaultListDataSet(); Sample sa1 = new DefaultSample(); sa1.put(Sample.INPUT, s1); sa1.setId("sample1"); Sample sa2 = new DefaultSample(); sa2.put(Sample.INPUT, s2); sa2.setId("sample2"); ds.add(sa1); ds.add(sa2); Tokenizer t = new StanfordTokenizer(); t.tokenize(Sample.INPUT, ds); sa1 = ds.get(0); sa2 = ds.get(1); Matrix m1 = sa1.getAsMatrix(Tokenizer.TOKENIZED); Matrix m2 = sa2.getAsMatrix(Tokenizer.TOKENIZED); assertEquals(1, m1.getColumnCount()); assertEquals(11, m1.getRowCount()); assertEquals(1, m2.getColumnCount()); assertEquals(5, m2.getRowCount()); } }
public CoreLabelListMatrix tagCRF(ListMatrix<MapMatrix<String, String>> text) throws Exception { List<CoreLabel> list = new ArrayList<CoreLabel>(); for (MapMatrix<String, String> m : text) { CoreLabel l = new CoreLabel(); l.set(CoreAnnotations.TextAnnotation.class, m.getAsString("Token")); list.add(l); } List<CoreLabel> result = crf.classify(list); return new CoreLabelListMatrix(result); }