public GeolocAnnotator(String annotatorName, Properties prop) { List<String> allowed_entities = new ArrayList<>(); String entityTypes = prop.getProperty(annotatorName + ".allowed_entity_type", DEFAULT_ENTITY_TYPES); for (String entity : entityTypes.split(",")) { entity = entity.trim(); allowed_entities.add(entity); } String geocoder_url = prop.getProperty(annotatorName + ".geocoder_url"); Boolean use_local_geocoder = PropertiesUtils .getBoolean(prop.getProperty(annotatorName + ".use_local_geocoder"), false); Integer timeout = 1050; if (use_local_geocoder) { timeout = PropertiesUtils.getInteger(prop.getProperty(annotatorName + ".timeout"), timeout); } this.geoloc_conf = GeolocModel.getInstance(allowed_entities, geocoder_url, use_local_geocoder, timeout); setRaw = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".set_raw"), setRaw); }
public UDPipeAnnotator(String annotatorName, Properties properties) throws MalformedURLException { Properties theseProperties = PropertiesUtils.dotConvertedProperties(properties, annotatorName); String server = theseProperties.getProperty("server", DEFAULT_SERVER); String port = theseProperties.getProperty("port", DEFAULT_PORT); String protocol = theseProperties.getProperty("protocol", DEFAULT_PROTOCOL); String address = theseProperties.getProperty("address", DEFAULT_ADDRESS); alreadyTokenized = PropertiesUtils.getBoolean(theseProperties.getProperty("alreadyTokenized"), true); keepOriginal = PropertiesUtils.getBoolean(theseProperties.getProperty("keepOriginal"), false); url = new URL(protocol, server, Integer.parseInt(port), address); }
public MachineLinking(Properties properties) { super(properties, properties.getProperty("address")); minWeight = PropertiesUtils.getDouble(properties.getProperty("min_confidence"), ML_CONFIDENCE); lang = properties.getProperty("lang", null); }
public ReadabilityAnnotator(String annotatorName, Properties props) { globalProperties = props; localProperties = PropertiesUtils.dotConvertedProperties(props, annotatorName); language = globalProperties.getProperty(annotatorName + ".language"); className = globalProperties.getProperty(annotatorName + ".className"); maxSentenceLength = PropertiesUtils .getInteger(localProperties.getProperty("maxSentenceLength"), DEFAULT_MAX_SENTENCE_LENGTH); }
public DigiLemmaAnnotator(String annotatorName, Properties prop) { useGuesser = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".use_guesser"), DEFAULT_USE_GUESSER); extractFeatures = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".extract_features"), DEFAULT_FEATURES); //todo: the model is unique if (useGuesser || extractFeatures) { guesser = GuessModelInstance.getInstance().getModel(); } }
public LinkingAnnotator(String annotatorName, Properties props) throws Exception { Properties newProps = PropertiesUtils.dotConvertedProperties(props, annotatorName); String annotator = newProps.getProperty("annotator", DBPS_ANNOTATOR); Class<? extends Linking> myClass = annotators.get(annotator); if (myClass == null) { myClass = (Class<? extends Linking>) Class.forName(annotator); } Constructor<? extends Linking> myConstructor = myClass.getConstructor(Properties.class); tagger = myConstructor.newInstance(newProps); }
public Readability(String language, Annotation annotation, Properties localProperties) { this.language = language; this.annotation = annotation; String text = annotation.get(CoreAnnotations.TextAnnotation.class); docLenWithSpaces = text.length(); docLenWithoutSpaces = text.replaceAll("\\s+", "").length(); ttrLimit = PropertiesUtils.getInteger(localProperties.getProperty("ttrLimit"), DEFAULT_TTR_LIMIT); }
public DigiLemmaAnnotator(String annotatorName, Properties prop) { useGuesser = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".use_guesser"), DEFAULT_USE_GUESSER); extractFeatures = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".extract_features"), DEFAULT_FEATURES); //todo: the model is unique if (useGuesser || extractFeatures) { guesser = GuessModelInstance.getInstance().getModel(); } }
public ReadabilityAnnotator(String annotatorName, Properties props) { globalProperties = props; localProperties = PropertiesUtils.dotConvertedProperties(props, annotatorName); language = globalProperties.getProperty(annotatorName + ".language"); className = globalProperties.getProperty(annotatorName + ".className"); maxSentenceLength = PropertiesUtils .getInteger(localProperties.getProperty("maxSentenceLength"), DEFAULT_MAX_SENTENCE_LENGTH); }
public HeidelTimeAnnotator(String annotatorName, Properties props) { // Todo: load an instance for each type // Todo: add document creation datetime String configFile = props.getProperty(annotatorName + ".config", null); String dtString = props.getProperty(annotatorName + ".type", "news"); DocumentType documentType; try { documentType = DocumentType.valueOf(dtString.toUpperCase()); } catch (Exception e) { documentType = DocumentType.NEWS; } if (configFile == null) { Properties convertedProperties = PropertiesUtils.dotConvertedProperties(props, annotatorName); tagger = HeidelTimeModel.getInstance(convertedProperties, documentType).getTagger(); } else { tagger = HeidelTimeModel.getInstance(configFile, documentType).getTagger(); } }
public Readability(String language, Annotation annotation, Properties localProperties) { this.language = language; this.annotation = annotation; String text = annotation.get(CoreAnnotations.TextAnnotation.class); docLenWithSpaces = text.length(); docLenWithoutSpaces = text.replaceAll("\\s+", "").length(); ttrLimit = PropertiesUtils.getInteger(localProperties.getProperty("ttrLimit"), DEFAULT_TTR_LIMIT); }
public SemaforAnnotator(String annotatorName, Properties props) { String semaforModelDir = props.getProperty(annotatorName + ".model_dir", SEMAFOR_MODEL_DIR); useConll = PropertiesUtils.getBoolean(props.getProperty(annotatorName + ".use_conll"), USE_CONLL); maxLen = PropertiesUtils.getInteger(props.getProperty(annotatorName + ".max_len"), MAXLEN); parser = SemaforModel.getInstance(semaforModelDir).getParser(); }
public ItalianTokenizerAnnotator(String annotatorName, Properties props) { String modelFile = props.getProperty(annotatorName + ".model", null); newlineIsSentenceBreak = PropertiesUtils .getBoolean(props.getProperty(annotatorName + ".newlineIsSentenceBreak"), true); tokenizeOnlyOnSpace = PropertiesUtils .getBoolean(props.getProperty(annotatorName + ".tokenizeOnlyOnSpace"), false); ssplitOnlyOnNewLine = PropertiesUtils .getBoolean(props.getProperty(annotatorName + ".ssplitOnlyOnNewLine"), false); if (ssplitOnlyOnNewLine) { newlineIsSentenceBreak = true; } File model = null; if (modelFile != null) { model = new File(modelFile); } tokenizer = ItalianTokenizerModel.getInstance(model).getTokenizer(); }
public static ItalianReadabilityModel getInstance(Properties globalProperties, Properties localProperties) { if (ourInstance == null) { boolean useGlossario = PropertiesUtils.getBoolean(localProperties.getProperty("glossario.use"), false); String easyWordsFileName = localProperties.getProperty("easyWords"); .dotConvertedProperties(localProperties, "glossario.stanford"); for (String key : globalProperties.stringPropertyNames()) { if (stanfordProperties.getProperty(key) == null) { .getBoolean(localProperties.getProperty("glossario.parse", "true"), true);
public HeidelTimeAnnotator(String annotatorName, Properties props) { // Todo: load an instance for each type // Todo: add document creation datetime String configFile = props.getProperty(annotatorName + ".config", null); String dtString = props.getProperty(annotatorName + ".type", "news"); DocumentType documentType; try { documentType = DocumentType.valueOf(dtString.toUpperCase()); } catch (Exception e) { documentType = DocumentType.NEWS; } if (configFile == null) { Properties convertedProperties = PropertiesUtils.dotConvertedProperties(props, annotatorName); tagger = HeidelTimeModel.getInstance(convertedProperties, documentType).getTagger(); } else { tagger = HeidelTimeModel.getInstance(configFile, documentType).getTagger(); } }
Element element = (Element) item; String regExp = element.getAttribute("find"); boolean merge = PropertiesUtils.getBoolean(element.getAttribute("merge"), true); Integer group = PropertiesUtils.getInteger(element.getAttribute("get"), 1); if (merge) { if (!first) {
public ItalianTokenizerAnnotator(String annotatorName, Properties props) { String modelFile = props.getProperty(annotatorName + ".model", null); newlineIsSentenceBreak = PropertiesUtils .getBoolean(props.getProperty(annotatorName + ".newlineIsSentenceBreak"), true); tokenizeOnlyOnSpace = PropertiesUtils .getBoolean(props.getProperty(annotatorName + ".tokenizeOnlyOnSpace"), false); ssplitOnlyOnNewLine = PropertiesUtils .getBoolean(props.getProperty(annotatorName + ".ssplitOnlyOnNewLine"), false); if (ssplitOnlyOnNewLine) { newlineIsSentenceBreak = true; } File model = null; if (modelFile != null) { model = new File(modelFile); } tokenizer = ItalianTokenizerModel.getInstance(model).getTokenizer(); }
public static ItalianReadabilityModel getInstance(Properties globalProperties, Properties localProperties) { if (ourInstance == null) { boolean useGlossario = PropertiesUtils.getBoolean(localProperties.getProperty("glossario.use"), false); String easyWordsFileName = localProperties.getProperty("easyWords"); .dotConvertedProperties(localProperties, "glossario.stanford"); for (String key : globalProperties.stringPropertyNames()) { if (stanfordProperties.getProperty(key) == null) { .getBoolean(localProperties.getProperty("glossario.parse", "true"), true);
Element element = (Element) item; String regExp = element.getAttribute("find"); boolean merge = PropertiesUtils.getBoolean(element.getAttribute("merge"), true); Integer group = PropertiesUtils.getInteger(element.getAttribute("get"), 1); if (merge) { if (!first) {
public VerbAnnotator(String annotatorName, Properties prop) { usePrefix = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".use_prefix"), DEFAULT_USE_PREFIX); auxUsePrefix = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".aux_is_prefix"), DEFAULT_AUX_IS_PREFIX); modalUsePrefix = PropertiesUtils.getBoolean(prop.getProperty(annotatorName + ".modal_is_prefix"), DEFAULT_MODAL_IS_PREFIX); String skipTagsText = prop.getProperty(annotatorName + ".skip_tags", DEFAULT_SKIP_TAGS); String verbTagsText = prop.getProperty(annotatorName + ".verb_tags", DEFAULT_VERB_TAGS); String auxTagsText = prop.getProperty(annotatorName + ".aux_tags", DEFAULT_AUX_TAGS); String modalTagsText = prop.getProperty(annotatorName + ".modal_tags", DEFAULT_MODAL_TAGS); // todo: add custom filename model = VerbModel.getInstance(); skipTags = new ArrayList<>(); verbTags = new ArrayList<>(); modalTags = new ArrayList<>(); auxTags = new ArrayList<>(); splitParts(skipTagsText, skipTags); splitParts(verbTagsText, verbTags); splitParts(modalTagsText, modalTags); splitParts(auxTagsText, auxTags); }