private Annotator getParser() { if(parserProcessor == null){ Annotator parser = StanfordCoreNLP.getExistingAnnotator("parse"); if (parser == null) { Properties emptyProperties = new Properties(); parser = new ParserAnnotator("coref.parse.md", emptyProperties); } if (parser == null) { // TODO: these assertions rule out the possibility of alternately named parse/pos annotators throw new AssertionError("Failed to get parser - this should not be possible"); } if (parser.requires().contains(CoreAnnotations.PartOfSpeechAnnotation.class)) { Annotator tagger = StanfordCoreNLP.getExistingAnnotator("pos"); if (tagger == null) { throw new AssertionError("Parser required tagger, but failed to find the pos annotator"); } List<Annotator> annotators = Generics.newArrayList(); annotators.add(tagger); annotators.add(parser); parserProcessor = new AnnotationPipeline(annotators); } else { parserProcessor = parser; } } return parserProcessor; }
private static AnnotationPipeline makeNumericPipeline() { AnnotationPipeline pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false, "en")); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); pipeline.addAnnotator(new POSTaggerAnnotator(false)); pipeline.addAnnotator(new TimeAnnotator(true)); return pipeline; }
public static AnnotationPipeline getPipeline(Properties props, boolean tokenize) throws Exception { // useGUTime = Boolean.parseBoolean(props.getProperty("gutime", "false")); AnnotationPipeline pipeline = new AnnotationPipeline(); if (tokenize) { pipeline.addAnnotator(new TokenizerAnnotator(false, "en")); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); } pipeline.addAnnotator(new POSTaggerAnnotator(false)); // pipeline.addAnnotator(new NumberAnnotator(false)); // pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false, false)); String timeAnnotator = props.getProperty("timeAnnotator", "sutime"); switch (timeAnnotator) { case "gutime": useGUTime = true; pipeline.addAnnotator(new GUTimeAnnotator("gutime", props)); break; case "heideltime": requiredDocDateFormat = "yyyy-MM-dd"; pipeline.addAnnotator(new HeidelTimeAnnotator("heideltime", props)); break; case "sutime": pipeline.addAnnotator(new TimeAnnotator("sutime", props)); break; default: throw new IllegalArgumentException("Unknown timeAnnotator: " + timeAnnotator); } return pipeline; }
@SuppressWarnings("RedundantThrows") public static void main(String[] args) throws IOException { Timing tim = new Timing(); AnnotationPipeline ap = new AnnotationPipeline(); final boolean verbose = false; ap.addAnnotator(new TokenizerAnnotator(verbose, "en"));
private Annotator getParser() { if(parserProcessor == null){ Annotator parser = StanfordCoreNLP.getExistingAnnotator("parse"); if (parser == null) { // TODO: these assertions rule out the possibility of alternately named parse/pos annotators throw new AssertionError("Failed to get parser - this should not be possible"); } if (parser.requires().contains(Annotator.POS_REQUIREMENT)) { Annotator tagger = StanfordCoreNLP.getExistingAnnotator("pos"); if (tagger == null) { throw new AssertionError("Parser required tagger, but failed to find the pos annotator"); } List<Annotator> annotators = Generics.newArrayList(); annotators.add(tagger); annotators.add(parser); parserProcessor = new AnnotationPipeline(annotators); } else { parserProcessor = parser; } } return parserProcessor; }
private Annotator getParser() { if(parserProcessor == null){ Annotator parser = StanfordCoreNLP.getExistingAnnotator("parse"); if (parser == null) { Properties emptyProperties = new Properties(); parser = new ParserAnnotator("coref.parse.md", emptyProperties); } if (parser == null) { // TODO: these assertions rule out the possibility of alternately named parse/pos annotators throw new AssertionError("Failed to get parser - this should not be possible"); } if (parser.requires().contains(CoreAnnotations.PartOfSpeechAnnotation.class)) { Annotator tagger = StanfordCoreNLP.getExistingAnnotator("pos"); if (tagger == null) { throw new AssertionError("Parser required tagger, but failed to find the pos annotator"); } List<Annotator> annotators = Generics.newArrayList(); annotators.add(tagger); annotators.add(parser); parserProcessor = new AnnotationPipeline(annotators); } else { parserProcessor = parser; } } return parserProcessor; }
public TypeClassifier() { pipeline = new AnnotationPipeline(); classifier = new Classifier(); featureSet = new FeatureSet(new MaxentTagger( "de.uni_mannheim.informatik.dws.winter.webtables.detectors.tabletypeclassifier\\english-left3words-distsim.tagger")); initialize(); }
public static AnnotationPipeline getPipeline(Properties props, boolean tokenize) throws Exception { // useGUTime = Boolean.parseBoolean(props.getProperty("gutime", "false")); AnnotationPipeline pipeline = new AnnotationPipeline(); if (tokenize) { pipeline.addAnnotator(new PTBTokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); } pipeline.addAnnotator(new POSTaggerAnnotator(false)); // pipeline.addAnnotator(new NumberAnnotator(false)); // pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false, false)); String timeAnnotator = props.getProperty("timeAnnotator", "sutime"); if ("gutime".equals(timeAnnotator)) { useGUTime = true; pipeline.addAnnotator(new GUTimeAnnotator()); } else if ("heideltime".equals(timeAnnotator)) { requiredDocDateFormat = "yyyy-MM-dd"; pipeline.addAnnotator(new HeidelTimeAnnotator("heideltime", props)); } else if ("sutime".equals(timeAnnotator)){ pipeline.addAnnotator(new TimeAnnotator("sutime", props)); } else { throw new IllegalArgumentException("Unknown timeAnnotator: " + timeAnnotator); } return pipeline; }
private static AnnotationPipeline makeNumericPipeline() { AnnotationPipeline pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false, "en")); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); pipeline.addAnnotator(new POSTaggerAnnotator(false)); pipeline.addAnnotator(new TimeAnnotator(true)); return pipeline; }
public static void main(String[] args) throws IOException, ClassNotFoundException { Timing tim = new Timing(); AnnotationPipeline ap = new AnnotationPipeline(); boolean verbose = false; ap.addAnnotator(new PTBTokenizerAnnotator(verbose));
public static void main(String[] args) throws IOException, ClassNotFoundException { Timing tim = new Timing(); AnnotationPipeline ap = new AnnotationPipeline(); boolean verbose = false; ap.addAnnotator(new PTBTokenizerAnnotator(verbose));
public static AnnotationPipeline getPipeline(Properties props, boolean tokenize) throws Exception { // useGUTime = Boolean.parseBoolean(props.getProperty("gutime", "false")); AnnotationPipeline pipeline = new AnnotationPipeline(); if (tokenize) { pipeline.addAnnotator(new TokenizerAnnotator(false, "en")); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); } pipeline.addAnnotator(new POSTaggerAnnotator(false)); // pipeline.addAnnotator(new NumberAnnotator(false)); // pipeline.addAnnotator(new QuantifiableEntityNormalizingAnnotator(false, false)); String timeAnnotator = props.getProperty("timeAnnotator", "sutime"); switch (timeAnnotator) { case "gutime": useGUTime = true; pipeline.addAnnotator(new GUTimeAnnotator("gutime", props)); break; case "heideltime": requiredDocDateFormat = "yyyy-MM-dd"; pipeline.addAnnotator(new HeidelTimeAnnotator("heideltime", props)); break; case "sutime": pipeline.addAnnotator(new TimeAnnotator("sutime", props)); break; default: throw new IllegalArgumentException("Unknown timeAnnotator: " + timeAnnotator); } return pipeline; }
@SuppressWarnings("RedundantThrows") public static void main(String[] args) throws IOException { Timing tim = new Timing(); AnnotationPipeline ap = new AnnotationPipeline(); final boolean verbose = false; ap.addAnnotator(new TokenizerAnnotator(verbose, "en"));