/** * Part of speech tag */ public Annotator posTagger(Properties properties) { String annotatorName = "pos"; return new POSTaggerAnnotator(annotatorName, properties); }
private static AnnotationPipeline makeNumericPipeline() { AnnotationPipeline pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false, "en")); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); pipeline.addAnnotator(new POSTaggerAnnotator(false)); pipeline.addAnnotator(new TimeAnnotator(true)); return pipeline; }
public static AnnotationPipeline getPipeline(Properties props, boolean tokenize) throws Exception { // useGUTime = Boolean.parseBoolean(props.getProperty("gutime", "false")); AnnotationPipeline pipeline = new AnnotationPipeline(); if (tokenize) { pipeline.addAnnotator(new TokenizerAnnotator(false, "en")); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); } pipeline.addAnnotator(new POSTaggerAnnotator(false)); // pipeline.addAnnotator(new NumberAnnotator(false)); // pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false, false)); String timeAnnotator = props.getProperty("timeAnnotator", "sutime"); switch (timeAnnotator) { case "gutime": useGUTime = true; pipeline.addAnnotator(new GUTimeAnnotator("gutime", props)); break; case "heideltime": requiredDocDateFormat = "yyyy-MM-dd"; pipeline.addAnnotator(new HeidelTimeAnnotator("heideltime", props)); break; case "sutime": pipeline.addAnnotator(new TimeAnnotator("sutime", props)); break; default: throw new IllegalArgumentException("Unknown timeAnnotator: " + timeAnnotator); } return pipeline; }
/** * Part of speech tag */ public Annotator posTagger(Properties properties) { String annotatorName = "pos"; return new POSTaggerAnnotator(annotatorName, properties); }
@Override public Annotator create() { try { return new POSTaggerAnnotator("pos", properties); } catch (Exception e) { throw new RuntimeException(e); } }
public Annotator create() { try { String maxLenStr = props.getProperty("pos.maxlen"); int maxLen = Integer.MAX_VALUE; if(maxLenStr != null) maxLen = Integer.parseInt(maxLenStr); return new POSTaggerAnnotator(props.getProperty("pos.model", DefaultPaths.DEFAULT_POS_MODEL), true, maxLen); } catch (Exception e) { throw new RuntimeException(e); } } });
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); int maxLength = Integer.parseInt(maxParseSentenceLength);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); int maxLength = Integer.parseInt(maxParseSentenceLength);
/** * Initializes the tokenizer to detect date columns. */ public void initialize() { Properties props = new Properties(); pipeline.addAnnotator(new TokenizerAnnotator(false) { @Override public Tokenizer<CoreLabel> getTokenizer(Reader r) { // TODO Auto-generated method stub return new PTBTokenizer<CoreLabel>(r, new CoreLabelTokenFactory(), ""); } }); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); pipeline.addAnnotator(new POSTaggerAnnotator(false)); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); }
public static AnnotationPipeline getPipeline(Properties props, boolean tokenize) throws Exception { // useGUTime = Boolean.parseBoolean(props.getProperty("gutime", "false")); AnnotationPipeline pipeline = new AnnotationPipeline(); if (tokenize) { pipeline.addAnnotator(new PTBTokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); } pipeline.addAnnotator(new POSTaggerAnnotator(false)); // pipeline.addAnnotator(new NumberAnnotator(false)); // pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false, false)); String timeAnnotator = props.getProperty("timeAnnotator", "sutime"); if ("gutime".equals(timeAnnotator)) { useGUTime = true; pipeline.addAnnotator(new GUTimeAnnotator()); } else if ("heideltime".equals(timeAnnotator)) { requiredDocDateFormat = "yyyy-MM-dd"; pipeline.addAnnotator(new HeidelTimeAnnotator("heideltime", props)); } else if ("sutime".equals(timeAnnotator)){ pipeline.addAnnotator(new TimeAnnotator("sutime", props)); } else { throw new IllegalArgumentException("Unknown timeAnnotator: " + timeAnnotator); } return pipeline; }
public void initExternalTools(){ try { _posAnnotator = new POSAnnotator(); Datastore ds = new Datastore(new ResourceConfigurator().getDefaultConfig()); File gazetteersResource = ds.getDirectory("org.cogcomp.gazetteers", "gazetteers", 1.3, false); _gazetteers = (FlatGazetteers)GazetteersFactory.get(5, gazetteersResource.getPath() + File.separator + "gazetteers", true, Language.English); WordNetManager.loadConfigAsClasspathResource(true); _wordnet = WordNetManager.getInstance(); __chunker = new ChunkerAnnotator(true); __chunker.initialize(new ChunkerConfigurator().getDefaultConfig()); Properties stanfordProps = new Properties(); stanfordProps.put("annotators", "pos, parse"); stanfordProps.put("parse.originalDependencies", true); stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); __stanfordDep = new StanfordDepHandler(posAnnotator, parseAnnotator); __mentionAnnotator = new MentionAnnotator("ACE_TYPE"); } catch (Exception e){ e.printStackTrace(); } }
private static AnnotationPipeline makeNumericPipeline() { AnnotationPipeline pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false, "en")); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); pipeline.addAnnotator(new POSTaggerAnnotator(false)); pipeline.addAnnotator(new TimeAnnotator(true)); return pipeline; }
public static AnnotationPipeline getPipeline(Properties props, boolean tokenize) throws Exception { // useGUTime = Boolean.parseBoolean(props.getProperty("gutime", "false")); AnnotationPipeline pipeline = new AnnotationPipeline(); if (tokenize) { pipeline.addAnnotator(new TokenizerAnnotator(false, "en")); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); } pipeline.addAnnotator(new POSTaggerAnnotator(false)); // pipeline.addAnnotator(new NumberAnnotator(false)); // pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false, false)); String timeAnnotator = props.getProperty("timeAnnotator", "sutime"); switch (timeAnnotator) { case "gutime": useGUTime = true; pipeline.addAnnotator(new GUTimeAnnotator("gutime", props)); break; case "heideltime": requiredDocDateFormat = "yyyy-MM-dd"; pipeline.addAnnotator(new HeidelTimeAnnotator("heideltime", props)); break; case "sutime": pipeline.addAnnotator(new TimeAnnotator("sutime", props)); break; default: throw new IllegalArgumentException("Unknown timeAnnotator: " + timeAnnotator); } return pipeline; }
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); int maxLength = Integer.parseInt(maxParseSentenceLength);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH); stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE); POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps); ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps); StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);