Refine search
public SolrLocator(Config config, MorphlineContext context) { this(context); this.config = config; Configs configs = new Configs(); collectionName = configs.getString(config, "collection", null); zkHost = configs.getString(config, "zkHost", null); solrHomeDir = configs.getString(config, "solrHomeDir", null); solrUrl = configs.getString(config, "solrUrl", null); batchSize = configs.getInt(config, "batchSize", batchSize); LOG.trace("Constructed solrLocator: {}", this); configs.validateArguments(config); }
protected Rename(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); remove = getConfigs().getBoolean(config, CONF_REMOVE, DEFAULT_REMOVE); Config paths = getConfigs().getConfig(config, CONF_FIELDS); entries = paths.entrySet(); }
public Equals(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.entrySet = new Configs().getEntrySet(config); for (Map.Entry<String, Object> entry : entrySet) { if (!(entry.getValue() instanceof Collection)) { entry.setValue(new FieldExpression(entry.getValue().toString(), getConfig())); } } this.renderedConfig = config.root().render(); }
protected ContainsAnyOf(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.entrySet = new Configs().getEntrySet(config); }
public RetryPolicyFactory parse(Config retryPolicyConfig) { final Config boundedExponentialBackoffRetryConfig = new Configs().getConfig(retryPolicyConfig, BOUNDED_EXPONENTIAL_BACKOFF_RETRY_NAME); final long baseSleepTime = new Configs() .getNanoseconds(boundedExponentialBackoffRetryConfig, "baseSleepTime", TimeUnit.MILLISECONDS.toNanos(125)); final long maxSleepTime = new Configs() .getNanoseconds(boundedExponentialBackoffRetryConfig, "maxSleepTime", TimeUnit.SECONDS.toNanos(10)); final int maxRetries = new Configs() .getInt(boundedExponentialBackoffRetryConfig, "maxRetries", 100); final long maxElapsedTime = new Configs() .getNanoseconds(boundedExponentialBackoffRetryConfig, "maxElapsedTime", TimeUnit.SECONDS.toNanos(3 * 60)); LOG.debug("Parsed retry policy BoundedExponentialBackoffRetry with " + "baseSleepTime:{}, maxSleepTime:{}, maxRetries:{}, maxElapsedTime:{}", new Object[]{baseSleepTime, maxSleepTime, maxRetries, maxElapsedTime}); if (maxRetries <= 0 || maxElapsedTime <= 0) { return null; } return new DefaultRetryPolicyFactory( new FlexibleBoundedExponentialBackoffRetry(baseSleepTime, maxSleepTime, maxRetries, maxElapsedTime)); }
Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); SolrLocator locator = new SolrLocator(solrLocatorConfig, context); LOG.debug("solrLocator: {}", locator); String uprefix = getConfigs().getString(config, ExtractingParams.UNKNOWN_FIELD_PREFIX, null); if (uprefix != null) { cellParams.put(ExtractingParams.UNKNOWN_FIELD_PREFIX, uprefix); for (String capture : getConfigs().getStringList(config, ExtractingParams.CAPTURE_ELEMENTS, Collections.<String>emptyList())) { cellParams.put(ExtractingParams.CAPTURE_ELEMENTS, capture); Config fmapConfig = getConfigs().getConfig(config, "fmap", null); if (fmapConfig != null) { for (Map.Entry<String, Object> entry : new Configs().getEntrySet(fmapConfig)) { cellParams.put(ExtractingParams.MAP_PREFIX + entry.getKey(), entry.getValue().toString()); String captureAttributes = getConfigs().getString(config, ExtractingParams.CAPTURE_ATTRIBUTES, null); if (captureAttributes != null) { cellParams.put(ExtractingParams.CAPTURE_ATTRIBUTES, captureAttributes); String lowerNames = getConfigs().getString(config, ExtractingParams.LOWERNAMES, null); if (lowerNames != null) { cellParams.put(ExtractingParams.LOWERNAMES, lowerNames); String defaultField = getConfigs().getString(config, ExtractingParams.DEFAULT_FIELD, null); if (defaultField != null) { cellParams.put(ExtractingParams.DEFAULT_FIELD, defaultField); xpathExpr = getConfigs().getString(config, ExtractingParams.XPATH_EXPRESSION, null);
public Grok(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); GrokDictionaries dict = new GrokDictionaries(config, getConfigs()); Config exprConfig = getConfigs().getConfig(config, "expressions", ConfigFactory.empty()); for (Map.Entry<String, Object> entry : new Configs().getEntrySet(exprConfig)) { String expr = entry.getValue().toString(); this.regexes.add(new Regex(entry.getKey(), dict.compileExpression(expr).matcher(""))); } this.firstKey = (regexes.size() == 0 ? null : regexes.iterator().next().getRecordInputField()); String extractStr = getConfigs().getString(config, "extract", "true"); this.extractInPlace = extractStr.equals("inplace"); if (extractInPlace) { this.extract = true; } else { this.extract = getConfigs().getBoolean(config, "extract", true); } this.numRequiredMatches = new Validator<NumRequiredMatches>().validateEnum( config, getConfigs().getString(config, "numRequiredMatches", NumRequiredMatches.atLeastOnce.toString()), NumRequiredMatches.class); this.findSubstrings = getConfigs().getBoolean(config, "findSubstrings", false); this.addEmptyStrings = getConfigs().getBoolean(config, "addEmptyStrings", false); validateArguments(); this.renderedConfig = config.root().render(); }
List<? extends Config> morphlineConfigs = config.getConfigList("morphlines"); if (morphlineConfigs.size() == 0) { throw new MorphlineCompilationException( "Morphline file must contain at least one morphline: " + nameForErrorMsg, null); } else { for (Config candidate : morphlineConfigs) { if (morphlineId.equals(new Configs().getString(candidate, "id", null))) { morphlineConfig = candidate; break; throw new MorphlineCompilationException( "Morphline id '" + morphlineId + "' not found in morphline file: " + nameForErrorMsg, null);
public Translate(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.fieldName = getConfigs().getString(config, "field"); Config dict = getConfigs().getConfig(config, "dictionary"); for (Map.Entry<String, Object> entry : new Configs().getEntrySet(dict)) { dictionary.put(entry.getKey(), entry.getValue()); } this.fallback = getConfigs().getString(config, "fallback", null); validateArguments(); }
public ReadAvro(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); String schemaString = getConfigs().getString(config, "writerSchemaString", null); if (schemaString != null) { this.writerSchema = new Parser().parse(schemaString); } else { String schemaFile = getConfigs().getString(config, "writerSchemaFile", null); if (schemaFile != null) { try { this.writerSchema = new Parser().parse(new File(schemaFile)); } catch (IOException e) { throw new MorphlineCompilationException("Cannot parse external Avro writer schema file: " + schemaFile, config, e); } } else { this.writerSchema = null; } } this.isJson = getConfigs().getBoolean(config, "isJson", false); validateArguments(); }
public ReadJsonTestTweets(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.isLengthDelimited = getConfigs().getBoolean(config, "isLengthDelimited", true); this.idPrefix = getConfigs().getString(config, "idPrefix", null); if ("random".equals(idPrefix)) { idPrefix = String.valueOf(new Random().nextInt()); } else if (idPrefix == null) { idPrefix = ""; } validateArguments(); }
public ReadCSV(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); String separator = getConfigs().getString(config, "separator", ","); if (separator.length() != 1) { throw new MorphlineCompilationException("CSV separator must be one character only: " + separator, config); this.columnNames = getConfigs().getStringList(config, "columns"); this.charset = getConfigs().getCharset(config, "charset", null); this.ignoreFirstLine = getConfigs().getBoolean(config, "ignoreFirstLine", false); this.trim = getConfigs().getBoolean(config, "trim", true); this.addEmptyStrings = getConfigs().getBoolean(config, "addEmptyStrings", true); this.quoteChar = getConfigs().getString(config, "quoteChar", ""); if (quoteChar.length() > 1) { throw new MorphlineCompilationException( "Quote character must not have a length of more than one character: " + quoteChar, config); this.commentPrefix = getConfigs().getString(config, "commentPrefix", ""); if (commentPrefix.length() > 1) { throw new MorphlineCompilationException( "Comment prefix must not have a length of more than one character: " + commentPrefix, config); this.maxCharactersPerRecord = getConfigs().getInt(config, "maxCharactersPerRecord", 1000 * 1000); this.ignoreTooLongRecords = new Validator<OnMaxCharactersPerRecord>().validateEnum( config, getConfigs().getString(config, "onMaxCharactersPerRecord", OnMaxCharactersPerRecord.throwException.toString()), OnMaxCharactersPerRecord.class) == OnMaxCharactersPerRecord.ignoreRecord; this.tokenizer = quoteChar.length() == 0 ?
public ReadLine(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.charset = getConfigs().getCharset(config, "charset", null); this.ignoreFirstLine = getConfigs().getBoolean(config, "ignoreFirstLine", false); String cprefix = getConfigs().getString(config, "commentPrefix", ""); if (cprefix.length() > 1) { throw new MorphlineCompilationException("commentPrefix must be at most one character long: " + cprefix, config); } this.commentPrefix = (cprefix.length() > 0 ? cprefix : null); validateArguments(); }
public TokenizeText(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.inputFieldName = getConfigs().getString(config, "inputField"); this.outputFieldName = getConfigs().getString(config, "outputField"); String solrFieldType = getConfigs().getString(config, "solrFieldType"); Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); SolrLocator locator = new SolrLocator(solrLocatorConfig, context); LOG.debug("solrLocator: {}", locator); IndexSchema schema = locator.getIndexSchema(); FieldType fieldType = schema.getFieldTypeByName(solrFieldType); if (fieldType == null) { throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config); } this.analyzer = Objects.requireNonNull(fieldType.getIndexAnalyzer()); // register CharTermAttribute for later (implicit) reuse this.token = Objects.requireNonNull(analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class)); validateArguments(); }
public SaxonCommand(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.isTracing = getConfigs().getBoolean(config, "isTracing", false); boolean isLicensedSaxonEdition = getConfigs().getBoolean(config, "isLicensedSaxonEdition", false); this.processor = new Processor(isLicensedSaxonEdition); this.documentBuilder = processor.newDocumentBuilder(); Config features = getConfigs().getConfig(config, "features", ConfigFactory.empty()); for (Map.Entry<String, Object> entry : new Configs().getEntrySet(features)) { processor.setConfigurationProperty(entry.getKey(), entry.getValue()); for (String clazz : getConfigs().getStringList(config, "extensionFunctions", Collections.<String>emptyList())) { Object function; try { function = Class.forName(clazz).newInstance(); } catch (Exception e) { throw new MorphlineCompilationException("Cannot instantiate extension function: " + clazz, config); throw new MorphlineCompilationException("Extension function has wrong class: " + clazz, config);
public GeoIP(CommandBuilder builder, Config config, Command parent, Command child, final MorphlineContext context) { super(builder, config, parent, child, context); this.inputFieldName = getConfigs().getString(config, "inputField"); this.databaseFile = new File(getConfigs().getString(config, "database", "GeoLite2-City.mmdb")); try { this.databaseReader = new Reader(databaseFile); } catch (IOException e) { throw new MorphlineCompilationException("Cannot read Maxmind database: " + databaseFile, config, e); } validateArguments(); }
public ReadAvroContainer(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); String schemaString = getConfigs().getString(config, "readerSchemaString", null); if (schemaString != null) { this.readerSchema = new Parser().parse(schemaString); } else { String schemaFile = getConfigs().getString(config, "readerSchemaFile", null); if (schemaFile != null) { try { this.readerSchema = new Parser().parse(new File(schemaFile)); } catch (IOException e) { throw new MorphlineCompilationException("Cannot parse external Avro reader schema file: " + schemaFile, config, e); } } else { this.readerSchema = null; } } if (getClass() == ReadAvroContainer.class) { resolverCache = new BoundedLRUHashMap<ByteArrayKey, ResolvingDecoder>( getConfigs().getInt(config, "schemaCacheCapacity", 100)); validateArguments(); } else { resolverCache = null; } }
public OpenHdfsFile(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.conf = new Configuration(); String defaultFileSystemUri = getConfigs().getString(config, "fs", null); if (defaultFileSystemUri != null) { FileSystem.setDefaultUri(conf, defaultFileSystemUri); // see Hadoop's GenericOptionsParser } for (String value : getConfigs().getStringList(config, "conf", Collections.<String>emptyList())) { conf.addResource(new Path(value)); // see Hadoop's GenericOptionsParser } validateArguments(); }
public Split(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.fieldName = getConfigs().getString(config, "field"); this.separator = getConfigs().getString(config, "separator"); this.newFields = getConfigs().getStringList(config, "newFields"); this.dropUndeclaredField = getConfigs().getBoolean(config, "dropUndeclaredField", true); this.trimSegments = getConfigs().getBoolean(config, "trimSegments", true); LOG.debug("fieldName: {}", fieldName); validateArguments(); }
public ExtractURIQueryParameters(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.parameterName = getConfigs().getString(config, "parameter"); this.inputFieldName = getConfigs().getString(config, "inputField"); this.outputFieldName = getConfigs().getString(config, "outputField"); this.failOnInvalidURI = getConfigs().getBoolean(config, "failOnInvalidURI", false); this.maxParameters = getConfigs().getInt(config, "maxParameters", Integer.MAX_VALUE); this.charset = getConfigs().getString(config, "charset", "UTF-8"); Charset.forName(charset); // fail fast if charset is unsupported validateArguments(); }