@Named("Mock analyzer") @Concurrent(true) public class MockAnalyzer implements Analyzer<ListResult<InputRow>> { @Configured InputColumn<?>[] cols; private BlockingQueue<InputRow> rows = new LinkedBlockingQueue<>(); @Override public void run(final InputRow row, final int distinctCount) { rows.add(row); } @Override public ListResult<InputRow> getResult() { final List<InputRow> rowsList = new ArrayList<>(rows.size()); rows.drainTo(rowsList); return new ListResult<>(rowsList); } public InputColumn<?>[] getCols() { return cols; } public void setCols(final InputColumn<?>[] cols) { this.cols = cols; } }
private boolean determineConcurrent() { final Concurrent concurrent = _filterJob.getDescriptor().getAnnotation(Concurrent.class); if (concurrent == null) { // filter are by default concurrent return true; } return concurrent.value(); }
@Named("MockAnalyzer") @Description("For testing purposes. Sleeps for 5 seconds.") @Concurrent(true) public class MockFutureAnalyzer implements Analyzer<AnalyzerResultFuture<AnalyzerResult>> {
private boolean determineConcurrent() { final Concurrent concurrent = _analyzerJob.getDescriptor().getAnnotation(Concurrent.class); if (concurrent == null) { // analyzers are by default not concurrent return false; } return concurrent.value(); }
@Named("Value matcher") @Description("Matches actual values against a set of expected values.\nUse this analyzer as a way to narrow down " + "unexpected values, spelling mistakes, missing values and errors.") @Concurrent(true) public class ValueMatchAnalyzer implements Analyzer<ValueMatchAnalyzerResult>, HasLabelAdvice {
public boolean isConcurrent() { final Concurrent annotation = _componentDescriptor.getAnnotation(Concurrent.class); if (annotation != null) { return annotation.value(); } if (isAnalyzer()) { return false; } return true; }
@Named("Mark rows") @Concurrent(true) @Description("Allows the user to mark records with a given description, label, tag or annotation. " + "Each record will be collected and counted, but not written to any external store.\n"
private boolean determineConcurrent() { final Concurrent concurrent = _transformerJob.getDescriptor().getAnnotation(Concurrent.class); if (concurrent == null) { // transformers are by default concurrent return true; } return concurrent.value(); }
@Named("Mark rows") @Concurrent(true) @Description("Allows the user to mark records with a given description, label, tag or annotation. " + "Each record will be collected and counted, but not written to any external store.\n"
@Named("Weekday distribution") @Description("Finds the distribution of weekdays from Date values.") @Concurrent(true) @Categorized(DateAndTimeCategory.class) @Distributed(reducer = WeekdayDistributionResultReducer.class)
@Named("Year distribution") @Description("Finds the distribution of years from Date values.") @Concurrent(true) @Categorized(DateAndTimeCategory.class) @Distributed(reducer = DatePartDistributionResultReducer.class)
@Named("Month distribution") @Description("Finds the distribution of months from Date values.") @Concurrent(true) @Categorized(DateAndTimeCategory.class) @Distributed(reducer = MonthDistributionResultReducer.class)
@ExternalDocumentation({ @DocumentationLink(title = "Internationalization in DataCleaner", url = "https://www.youtube.com/watch?v=ApA-nhtLbhI", type = DocumentationType.VIDEO, version = "3.0") }) @Concurrent(true) public class CharacterSetDistributionAnalyzer implements Analyzer<CharacterSetDistributionResult> {
@Concurrent(true) public class NumberAnalyzer implements Analyzer<NumberAnalyzerResult> {
@Description("The String analyzer is used to collect a variety of typical metrics on string values.\n" + "Metrics include statistics on character case, words, diacritics, white-spaces and more...") @Concurrent(true) public class StringAnalyzer implements Analyzer<StringAnalyzerResult> {
@Description("Supply your own piece of JavaScript to do a custom transformation") @Categorized(ScriptingCategory.class) @Concurrent(false) public class JavaScriptAdvancedTransformer implements Transformer {
@Named("Date/time analyzer") @Description("Records a variety of interesting measures for date or time based data. Which are the highest/lowest " + "values? How is the year distribution of dates? Are there null values?") @Concurrent(true) @Categorized(DateAndTimeCategory.class) public class DateAndTimeAnalyzer implements Analyzer<DateAndTimeAnalyzerResult> {
@Categorized(TextCategory.class) @Description("Put your description of your transformer here") @Concurrent(true) public class HelloWorldTransformer implements Transformer {
@Named("Unique key check") @Description("Check your keys (or other fields) for uniqueness") @Concurrent(true) public class UniqueKeyCheckAnalyzer implements Analyzer<UniqueKeyCheckAnalyzerResult> {
{ @DocumentationLink(title = "Analyzer rundown", url = "https://www.youtube.com/watch?v=hZWxB_eu_A0", type = DocumentationType.VIDEO, version = "4.0") }) @Concurrent(true) public class ValueDistributionAnalyzer implements Analyzer<ValueDistributionAnalyzerResult> {