@Override public void run(InputRow row, int distinctCount) { rowAnnotationFactory.annotate(row, distinctCount, rowAnnotation); }
@Override public void run(InputRow row, int distinctCount) { rowAnnotationFactory.annotate(row, distinctCount, rowAnnotation); }
public void run(final Boolean value, final InputRow row, final int distinctCount) { _rowCount += distinctCount; if (value == null) { _annotationFactory.annotate(row, distinctCount, _nullAnnotation); } else { if (value.booleanValue()) { _annotationFactory.annotate(row, distinctCount, _trueAnnotation); } else { _annotationFactory.annotate(row, distinctCount, _falseAnnotation); } } }
@Override protected void storeMatch(final TokenPattern pattern, final InputRow row, final String value, final int distinctCount) { final RowAnnotation annotation = _annotations.get(pattern); if (annotation == null) { throw new IllegalStateException("No annotation available for pattern: " + pattern); } _annotationFactory.annotate(row, distinctCount, annotation); }
@Override protected void storeNewPattern(final TokenPattern pattern, final InputRow row, final String value, final int distinctCount) { final RowAnnotation annotation = _annotationFactory.createAnnotation(); _annotations.put(pattern, annotation); _annotationFactory.annotate(row, distinctCount, annotation); }
public void addObservation(InputRow row, List<Object> inspectionOutcomes) { RowAnnotation annotation = _observations.get(inspectionOutcomes); if (annotation == null) { synchronized (this) { annotation = _observations.get(inspectionOutcomes); if (annotation == null) { annotation = _rowAnnotationFactory.createAnnotation(); _observations.put(inspectionOutcomes, annotation); } } } _rowAnnotationFactory.annotate(row, annotation); }
public void addObservation(InputRow row, List<Object> inspectionOutcomes) { RowAnnotation annotation = _observations.get(inspectionOutcomes); if (annotation == null) { synchronized (this) { annotation = _observations.get(inspectionOutcomes); if (annotation == null) { annotation = _rowAnnotationFactory.createAnnotation(); _observations.put(inspectionOutcomes, annotation); } } } _rowAnnotationFactory.annotate(row, annotation); }
public void register(final Number x, final Number y, final InputRow row, final int distinctCount) { final RowAnnotation annotation = getAnnotation(x, y); _rowAnnotationFactory.annotate(row, distinctCount, annotation); }
@Override public void run(final InputRow row, final int distinctCount) { _totalCount.addAndGet(distinctCount); final Object value = row.getValue(column); if (value == null) { _rowAnnotationFactory.annotate(row, distinctCount, _nullAnnotation); } else { final String stringValue = value.toString(); final String lookupValue = getLookupValue(stringValue); final RowAnnotation annotation = _valueAnnotations.get(lookupValue); if (annotation == null) { _rowAnnotationFactory.annotate(row, distinctCount, _nonMatchingValuesAnnotation); } else { _rowAnnotationFactory.annotate(row, distinctCount, annotation); } } }
@Override public void run(final InputRow row, final int distinctCount) { final Object value = row.getValue(foreignKey); if (value == null) { if (ignoreNullValues) { // skip processing this record - null is fine return; } else { // no need in looking up "null": This one has no integrity annotationFactory.annotate(row, distinctCount, annotation); return; } } final Object[] result = _tableLookup.transform(row); assert result.length == 1; final Object object = result[0]; if (object == null) { annotationFactory.annotate(row, distinctCount, annotation); } }
public void run(final InputRow row, final String value, final int distinctCount) { if (value == null) { if (_recordAnnotations) { _annotationFactory.annotate(row, distinctCount, _nullValueAnnotation); } else { ((RowAnnotationImpl) _nullValueAnnotation).incrementRowCount(distinctCount); } } else { RowAnnotation annotation; synchronized (this) { annotation = _annotationMap.get(value); if (annotation == null) { if (_recordAnnotations) { annotation = _annotationFactory.createAnnotation(); } else { annotation = new RowAnnotationImpl(); } _annotationMap.put(value, annotation); } } if (_recordAnnotations) { _annotationFactory.annotate(row, distinctCount, annotation); } else { ((RowAnnotationImpl) annotation).incrementRowCount(distinctCount); } } _totalCount.addAndGet(distinctCount); }
public synchronized void run(final String value, final InputRow row, final int distinctCount) { final List<Entry<String, UnicodeSet>> unicodeSetsRemaining = new ArrayList<>(_unicodeSets.entrySet()); final CharIterator charIterator = new CharIterator(value); while (charIterator.hasNext()) { final Character c = charIterator.next(); if (charIterator.isWhitespace() || charIterator.isDigit()) { logger.debug("Skipping whitespace/digit char: {}", c); } else { final Iterator<Entry<String, UnicodeSet>> it = unicodeSetsRemaining.iterator(); while (it.hasNext()) { final Entry<String, UnicodeSet> unicodeSet = it.next(); if (unicodeSet.getValue().contains(c)) { final String name = unicodeSet.getKey(); final RowAnnotation annotation = _annotations.get(name); _annotationFactory.annotate(row, distinctCount, annotation); // remove this unicode set from the remaining checks on // this value. it.remove(); } } } } }
private Object[] handleDataSet(final InputRow row, final DataSet dataSet) { if (!dataSet.next()) { logger.info("Result of lookup: None!"); _annotationFactory.annotate(row, 1, _misses); switch (joinSemantic) { case LEFT_JOIN_MAX_ONE: case LEFT_JOIN: return new Object[outputColumns.length]; default: return null; } } _annotationFactory.annotate(row, 1, _matches); do { final Object[] result = dataSet.getRow().getValues(); if (logger.isInfoEnabled()) { logger.info("Result of lookup: " + Arrays.toString(result)); } switch (joinSemantic) { case LEFT_JOIN_MAX_ONE: return result; default: outputRowCollector.putValues(result); } } while (dataSet.next()); return null; }
_annotationFactory.annotate(inputRow, 1, _cached);
@Override public void run(final InputRow row, final int distinctCount) { final Boolean[] values = new Boolean[_columns.length]; for (int i = 0; i < values.length; i++) { final InputColumn<Boolean> col = _columns[i]; final Boolean value = row.getValue(col); final BooleanAnalyzerColumnDelegate delegate = _columnDelegates.get(col); values[i] = value; delegate.run(value, row, distinctCount); } // collect all combinations of booleans if (_columns.length > 1) { final ValueCombination<Boolean> valueCombination = new ValueCombination<>(values); RowAnnotation annotation = _valueCombinations.get(valueCombination); if (annotation == null) { annotation = _annotationFactory.createAnnotation(); _valueCombinations.put(valueCombination, annotation); } _annotationFactory.annotate(row, distinctCount, annotation); } }
private void reduce(final RowAnnotation annotation, final AnnotatedRowsResult annotatedRowsResult) { if (annotatedRowsResult == null) { return; } final int rowCount = annotatedRowsResult.getAnnotatedRowCount(); if (rowCount == 0) { return; } final List<InputRow> rows = annotatedRowsResult.getSampleRows(); if (rows.size() == rowCount) { for (final InputRow row : rows) { _rowAnnotationFactory.annotate(row, annotation); } } else { _rowAnnotationFactory.transferAnnotations(annotatedRowsResult.getAnnotation(), annotation); } }
@Override public void run(final InputRow row, final int distinctCount) { _rowCount.addAndGet(distinctCount); boolean allInvalid = true; for (int i = 0; i < _valueColumns.length; i++) { final Object value = row.getValue(_valueColumns[i]); final boolean valid = _conditions[i].isValid(value); if (_evaluationMode == EvaluationMode.ANY_FIELD && !valid) { _annotationFactory.annotate(row, distinctCount, _invalidRecords); if (_incompleteRowCollector != null) { _incompleteRowCollector.putValues(row.getValues(_outputDataStreamColumns).toArray()); } return; } if (valid) { allInvalid = false; } } if (_evaluationMode == EvaluationMode.ALL_FIELDS && allInvalid) { _annotationFactory.annotate(row, distinctCount, _invalidRecords); if (_incompleteRowCollector != null) { _incompleteRowCollector.putValues(row.getValues(_outputDataStreamColumns).toArray()); } return; } if (_completeRowCollector != null) { _completeRowCollector.putValues(row.getValues(_outputDataStreamColumns).toArray()); } }
_rowAnnotationFactory.annotate(inputRow, 1, annotation);
@Override public CompletenessAnalyzerResult reduce(final Collection<? extends CompletenessAnalyzerResult> results) { final CompletenessAnalyzerResult firstResult = results.iterator().next(); final RowAnnotation annotation = _rowAnnotationFactory.createAnnotation(); final InputColumn<?>[] highlightedColumns = firstResult.getHighlightedColumns(); int totalRowCount = 0; for (final CompletenessAnalyzerResult result : results) { final List<InputRow> sampleRows = result.getSampleRows(); final int invalidRowCount = result.getInvalidRowCount(); if (invalidRowCount == sampleRows.size()) { // if the rows are included for preview/sampling - then // re-annotate them in the master result for (final InputRow sampleRow : sampleRows) { _rowAnnotationFactory.annotate(sampleRow, annotation); } } else { // else we just transfer annotation counts _rowAnnotationFactory.transferAnnotations(result.getAnnotation(), annotation); } totalRowCount += result.getTotalRowCount(); } return new CompletenessAnalyzerResult(totalRowCount, annotation, _rowAnnotationFactory, highlightedColumns); }