public void recognize() throws NamedEntityRecognizerException { if (null==sentence) throw new NamedEntityRecognizerException("null sentence or sentence not set."); this.listOfEntities = new ArrayList<NamedEntityWord>(sentence.size()); this.mapOfEntities = new HashMap<Integer, NamedEntityPhrase>(); for (String word : sentence) { listOfEntities.add(new NamedEntityWord(word, null)); } }
for (NamedEntityWord neWord : ner.getAnnotatedSentence()) System.out.print(neWord.getWord()+"/"); if (neWord.getNamedEntity()!=null) System.out.print(neWord.getNamedEntity().name());
Token tokenAnno = tokenIter.next(); if (taggedToken.getNamedEntity() != null) { String tagString = taggedToken.getNamedEntity().toString();
public boolean areMatch() { boolean ret = false; try{ret = node.getInfo().getNodeInfo().getWord().equalsIgnoreCase(neWord.getWord());}catch(Exception e){} return ret; }
public void makeOperation() { NodeInfo nodeInfo = null; try{nodeInfo = node.getInfo().getNodeInfo();}catch(Exception e){} String word = null; String lemma = null; int serial = 0; NamedEntity namedEntity = null; SyntacticInfo syntacticInfo = null; if (nodeInfo != null) { word = nodeInfo.getWord(); lemma = nodeInfo.getWordLemma(); serial = nodeInfo.getSerial(); syntacticInfo = nodeInfo.getSyntacticInfo(); } namedEntity = neWord.getNamedEntity(); Info oldInfo = node.getInfo(); node.setInfo(new DefaultInfo(oldInfo.getId(),new DefaultNodeInfo(word,lemma,serial,namedEntity,syntacticInfo),oldInfo.getEdgeInfo())); }
System.out.println(neWord.getWord()+" ["+neWord.getNamedEntity()+"]");
newNamedEntityType = StanfordAnswerToNamedEntityMapper.convert(strNamedEntity); listOfEntities.add(new NamedEntityWord(strWord, newNamedEntityType));
/** * A demo program for Stanford NER. * @param args a single argument which is the path of a classifier from the Stanford NER package, e.g. * ${env_var:JARS}/stanford-ner-2009-01-16/classifiers/ner-eng-ie.crf-3-all2008-distsim.ser.gz * @throws Exception */ public static void main(String[] args) throws Exception { if (args.length==0) throw new IllegalArgumentException("First argument must be the path to a Stanford NER classifier (with .ser.gz extension)"); String pathToNER = args[0]; StanfordNamedEntityRecognizer ner = new StanfordNamedEntityRecognizer( new File(pathToNER)); ner.init(); String testString = "The Israeli PM Binyamin Netanyahu said that Israel will accept the Eitan W. Shishinsky recommendations"; //String testString = "In December 2004 the state sold 18.4% of its equity in Air France-KLM. The state's shareholding in Air France-KLM subsequently fell to just under 20%."; LinkedList<String> testStringList = new LinkedList<String>(); for (String word : testString.split(" ")) testStringList.add(word); ner.setSentence(testStringList); ner.recognize(); System.out.println("\nNEs detected:"); System.out.println(ner.getAnnotatedEntities()); System.out.println("\nNE tag for each word: "); List<NamedEntityWord> list = ner.getAnnotatedSentence(); for (NamedEntityWord neWord: list) System.out.println(neWord.getWord()+" ["+neWord.getNamedEntity()+"]"); ner.cleanUp(); }
private void addNerInfo(List<String> tokenizedSent, List<TokenInfo> processedSent) throws InstrumentCombinationException { List<NamedEntityWord> nerSent = null; try { m_ner.setSentence(tokenizedSent); m_ner.recognize(); nerSent = m_ner.getAnnotatedSentence(); } catch (NamedEntityRecognizerException e) { throw new InstrumentCombinationException("see nested exception from NamedEntityRecognizer:", e); } if(nerSent.size() != processedSent.size()){ throw new InstrumentCombinationException("the number of ner-tagged tokens is not equal" + " to the number of sentence-tokens in input sentence " + tokenizedSent); } Iterator<TokenInfo> procSentIter = processedSent.iterator(); Iterator<NamedEntityWord> nerSentIter = nerSent.iterator(); while(procSentIter.hasNext()){ TokenInfo tInfo = procSentIter.next(); NamedEntityWord nerTerm = nerSentIter.next(); if(tInfo.getOrigStr().equals(nerTerm.getWord())){ tInfo.setNamedEntity(nerTerm.getNamedEntity()); }else{ throw new InstrumentCombinationException("a mismatch was found between a ner-tagged word" + " and the sentence word " + tInfo.getOrigStr() + " of input sentence " + tokenizedSent); } } }