@Named("Mock analyzer") @Concurrent(true) public class MockAnalyzer implements Analyzer<ListResult<InputRow>> { @Configured InputColumn<?>[] cols; private BlockingQueue<InputRow> rows = new LinkedBlockingQueue<>(); @Override public void run(final InputRow row, final int distinctCount) { rows.add(row); } @Override public ListResult<InputRow> getResult() { final List<InputRow> rowsList = new ArrayList<>(rows.size()); rows.drainTo(rowsList); return new ListResult<>(rowsList); } public InputColumn<?>[] getCols() { return cols; } public void setCols(final InputColumn<?>[] cols) { this.cols = cols; } }
@Named("MockAnalyzer") @Description("For testing purposes. Sleeps for 5 seconds.") @Concurrent(true) public class MockFutureAnalyzer implements Analyzer<AnalyzerResultFuture<AnalyzerResult>> {
@Named("Value matcher") @Description("Matches actual values against a set of expected values.\nUse this analyzer as a way to narrow down " + "unexpected values, spelling mistakes, missing values and errors.") @Concurrent(true) public class ValueMatchAnalyzer implements Analyzer<ValueMatchAnalyzerResult>, HasLabelAdvice {
@Named("Mark rows") @Concurrent(true) @Description("Allows the user to mark records with a given description, label, tag or annotation. " + "Each record will be collected and counted, but not written to any external store.\n"
@Named("Mark rows") @Concurrent(true) @Description("Allows the user to mark records with a given description, label, tag or annotation. " + "Each record will be collected and counted, but not written to any external store.\n"
@Named("Year distribution") @Description("Finds the distribution of years from Date values.") @Concurrent(true) @Categorized(DateAndTimeCategory.class) @Distributed(reducer = DatePartDistributionResultReducer.class)
@Named("Weekday distribution") @Description("Finds the distribution of weekdays from Date values.") @Concurrent(true) @Categorized(DateAndTimeCategory.class) @Distributed(reducer = WeekdayDistributionResultReducer.class)
@Named("Month distribution") @Description("Finds the distribution of months from Date values.") @Concurrent(true) @Categorized(DateAndTimeCategory.class) @Distributed(reducer = MonthDistributionResultReducer.class)
@ExternalDocumentation({ @DocumentationLink(title = "Internationalization in DataCleaner", url = "https://www.youtube.com/watch?v=ApA-nhtLbhI", type = DocumentationType.VIDEO, version = "3.0") }) @Concurrent(true) public class CharacterSetDistributionAnalyzer implements Analyzer<CharacterSetDistributionResult> {
@Concurrent(true) public class NumberAnalyzer implements Analyzer<NumberAnalyzerResult> {
@Description("The String analyzer is used to collect a variety of typical metrics on string values.\n" + "Metrics include statistics on character case, words, diacritics, white-spaces and more...") @Concurrent(true) public class StringAnalyzer implements Analyzer<StringAnalyzerResult> {
@Description("Supply your own piece of JavaScript to do a custom transformation") @Categorized(ScriptingCategory.class) @Concurrent(false) public class JavaScriptAdvancedTransformer implements Transformer {
@Named("Date/time analyzer") @Description("Records a variety of interesting measures for date or time based data. Which are the highest/lowest " + "values? How is the year distribution of dates? Are there null values?") @Concurrent(true) @Categorized(DateAndTimeCategory.class) public class DateAndTimeAnalyzer implements Analyzer<DateAndTimeAnalyzerResult> {
@Categorized(TextCategory.class) @Description("Put your description of your transformer here") @Concurrent(true) public class HelloWorldTransformer implements Transformer {
@Named("Unique key check") @Description("Check your keys (or other fields) for uniqueness") @Concurrent(true) public class UniqueKeyCheckAnalyzer implements Analyzer<UniqueKeyCheckAnalyzerResult> {
{ @DocumentationLink(title = "Analyzer rundown", url = "https://www.youtube.com/watch?v=hZWxB_eu_A0", type = DocumentationType.VIDEO, version = "4.0") }) @Concurrent(true) public class ValueDistributionAnalyzer implements Analyzer<ValueDistributionAnalyzerResult> {
@Named("Week number distribution") @Description("Finds the distribution of week numbers from Date values.") @Concurrent(true) @Categorized(DateAndTimeCategory.class) @Distributed(reducer = DatePartDistributionResultReducer.class)
@Named("Delete from table") @Description("Delete records in a table. Records matching the specified condition(s) will be deleted in batch.") @Categorized(superCategory = WriteSuperCategory.class) @Concurrent(true) public class DeleteFromTableAnalyzer implements Analyzer<WriteDataResult>, Action<Iterable<Object[]>>, HasLabelAdvice, PrecedingComponentConsumer {
@Alias("Datastore lookup") @Description("Perform a lookup based on a table in any of your registered datastore (like a LEFT join).") @Concurrent(true) @Categorized(superCategory = ImproveSuperCategory.class, value = ReferenceDataCategory.class) public class TableLookupTransformer implements Transformer, HasLabelAdvice, HasAnalyzerResult<CategorizationResult> {
url = "http://kasper.eobjects.org/2010/09/pattern-finder-20-latest-feature-in.html", type = DocumentationType.TECH, version = "2.0") }) @Concurrent(true) public class PatternFinderAnalyzer implements Analyzer<PatternFinderResult> {