public static void matchNLP(String tableName, NlpEntityType tokenType) throws Exception { List<String> attributeNames = Arrays.asList(MedlineIndexWriter.ABSTRACT); ISourceOperator sourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName)); NlpEntityPredicate nlpEntityPredicate = new NlpEntityPredicate(tokenType, attributeNames, SchemaConstants.SPAN_LIST); NlpEntityOperator nlpEntityOperator = new NlpEntityOperator(nlpEntityPredicate); nlpEntityOperator.setInputOperator(sourceOperator); long startMatchTime = System.currentTimeMillis(); nlpEntityOperator.open(); Tuple nextTuple = null; int counter = 0; while ((nextTuple = nlpEntityOperator.getNextTuple()) != null) { ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST); List<Span> spanList = spanListField.getValue(); counter += spanList.size(); } nlpEntityOperator.close(); long endMatchTime = System.currentTimeMillis(); double matchTime = (endMatchTime - startMatchTime) / 1000.0; totalMatchingTime += matchTime; totalResults += counter; }
public ScanBasedSourceOperator(ScanSourcePredicate predicate) throws DataflowException { try { this.dataReader = RelationManager.getInstance().getTableDataReader( predicate.getTableName(), new MatchAllDocsQuery()); // TODO add an option to set if payload is added in the future. this.dataReader.setPayloadAdded(true); } catch (StorageException e) { throw new DataflowException(e); } }
public void setDefaultMatcherConfig(ComparableMatcher comparableMatcher) throws TexeraException { // Perform the query ScanBasedSourceOperator sourceOperator = new ScanBasedSourceOperator( new ScanSourcePredicate(PEOPLE_TABLE)); comparableMatcher.setInputOperator(sourceOperator); comparableMatcher.setLimit(Integer.MAX_VALUE); comparableMatcher.setOffset(0); }
@Test public void testScanSource() throws Exception { ScanSourcePredicate scanSourcePredicate = new ScanSourcePredicate("tableName"); testPredicate(scanSourcePredicate); }
public static boolean containedInSamplerTable(List<Tuple> sampleList) throws TexeraException { ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(SAMPLER_TABLE)); scanSource.open(); Tuple nextTuple = null; List<Tuple> returnedTuples = new ArrayList<Tuple>(); while ((nextTuple = scanSource.getNextTuple()) != null) { returnedTuples.add(nextTuple); } scanSource.close(); boolean contains = TestUtils.containsAll(returnedTuples, sampleList); return contains; }
public static RegexMatcher getRegexMatcher(String tableName, String query, String attrName) { try { ScanBasedSourceOperator scanBasedSourceOperator = new ScanBasedSourceOperator( new ScanSourcePredicate(tableName)); RegexMatcher regexMatcher = new RegexMatcher(new RegexPredicate(query, Arrays.asList(attrName), SchemaConstants.SPAN_LIST)); regexMatcher.setInputOperator(scanBasedSourceOperator); return regexMatcher; } catch (DataflowException e) { e.printStackTrace(); return null; } }
private void setPreExecConfigs(Aggregator aggOperator) { ScanBasedSourceOperator sourceOperator = new ScanBasedSourceOperator( new ScanSourcePredicate(PEOPLE_TABLE)); aggOperator.setInputOperator(sourceOperator); aggOperator.open(); aggOperator.setLimit(Integer.MAX_VALUE); aggOperator.setOffset(0); }
public static boolean matchSamplerTable(List<Tuple> sampleList) throws TexeraException { ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(SAMPLER_TABLE)); scanSource.open(); ListIterator<Tuple> iter = null; iter = sampleList.listIterator(); while (iter.hasNext()) { Tuple nextTableTuple = scanSource.getNextTuple(); Tuple nextSampledTuple = iter.next(); if (!nextSampledTuple.equals(nextTableTuple)) { scanSource.close(); return false; } } scanSource.close(); return true; }
public static List<Tuple> computeSampleResults(String tableName, int k, SampleType sampleType) throws TexeraException { ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName)); Sampler tupleSampler = new Sampler(new SamplerPredicate(k, sampleType)); tupleSampler.setInputOperator(scanSource); List<Tuple> results = new ArrayList<>(); Tuple tuple; tupleSampler.open(); while((tuple = tupleSampler.getNextTuple()) != null) { results.add(tuple); } tupleSampler.close(); return results; }
public static List<Tuple> computeRegexSplitResultsOnetoOne( String tableName, String splitAttrName, String splitRegex, RegexSplitPredicate.SplitType splitType ) throws TexeraException { ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName)); RegexSplitOperator regexSplit = new RegexSplitOperator( new RegexSplitPredicate(splitRegex, splitAttrName, RegexOutputType.ONE_TO_ONE, splitType, RESULT_ATTR)); regexSplit.setInputOperator(scanSource); List<Tuple> results = new ArrayList<>(); regexSplit.open(); Tuple tuple; while((tuple = regexSplit.getNextTuple()) != null) { results.add(tuple); } regexSplit.close(); return results; }
public static List<Tuple> computeRegexSplitResultsOneToMany( String tableName, String splitAttrName, String splitRegex, RegexSplitPredicate.SplitType splitType ) throws TexeraException { ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName)); RegexSplitOperator regexSplit = new RegexSplitOperator( new RegexSplitPredicate(splitRegex, splitAttrName, RegexOutputType.ONE_TO_MANY, splitType, RESULT_ATTR)); regexSplit.setInputOperator(scanSource); List<Tuple> results = new ArrayList<>(); regexSplit.open(); Tuple tuple; while((tuple = regexSplit.getNextTuple()) != null) { results.add(tuple); } regexSplit.close(); return results; }
@Test public void testFlow() throws TexeraException, ParseException { List<Tuple> actualTuples = TestConstants.getSamplePeopleTuples(); ScanBasedSourceOperator scanBasedSourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(PEOPLE_TABLE)); scanBasedSourceOperator.open(); Tuple nextTuple = null; int numTuples = 0; List<Tuple> returnedTuples = new ArrayList<Tuple>(); while ((nextTuple = scanBasedSourceOperator.getNextTuple()) != null) { returnedTuples.add(nextTuple); numTuples++; } Assert.assertEquals(actualTuples.size(), numTuples); boolean contains = TestUtils.equals(actualTuples, returnedTuples); Assert.assertTrue(contains); scanBasedSourceOperator.close(); }
public static List<Tuple> getScanSourceResults(String tableName, String regex, List<String> attributeNames, int limit, int offset) throws TexeraException { ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName)); RegexPredicate regexPredicate = new RegexPredicate(regex, attributeNames, RESULTS); RegexMatcher regexMatcher = new RegexMatcher(regexPredicate); regexMatcher.setLimit(limit); regexMatcher.setOffset(offset); regexMatcher.setInputOperator(scanSource); Tuple tuple; List<Tuple> results = new ArrayList<>(); regexMatcher.open(); while ((tuple = regexMatcher.getNextTuple()) != null) { results.add(tuple); } regexMatcher.close(); return results; }
public List<Tuple> getQueryResults(String tableName, List<String> attributeNames, NlpEntityType nlpEntityType, int limit, int offset) throws Exception { ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName)); NlpEntityPredicate nlpEntityPredicate = new NlpEntityPredicate(nlpEntityType, attributeNames, RESULTS); NlpEntityOperator nlpEntityOperator = new NlpEntityOperator(nlpEntityPredicate); nlpEntityOperator.setInputOperator(scanSource); nlpEntityOperator.setLimit(limit); nlpEntityOperator.setOffset(offset); Tuple nextTuple = null; List<Tuple> results = new ArrayList<Tuple>(); nlpEntityOperator.open(); while ((nextTuple = nlpEntityOperator.getNextTuple()) != null) { results.add(nextTuple); } nlpEntityOperator.close(); return results; }
@Test public void test8() throws TexeraException { String splitRegex = "ana"; String splitAttrName = TestConstantsRegexSplit.DESCRIPTION; List<Tuple> results = computeRegexSplitResultsOneToMany(REGEX_TABLE, splitAttrName, splitRegex, RegexSplitPredicate.SplitType.STANDALONE); ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(REGEX_TABLE)); Tuple tupleTable; scanSource.open(); while ((tupleTable = scanSource.getNextTuple()) != null) { for (Tuple tuple : results) { Assert.assertFalse(tuple.getField(SchemaConstants._ID).equals(tupleTable.getField(SchemaConstants._ID))); } } scanSource.close(); } }
public static List<Tuple> getScanSourceResults(String tableName, String query, double threshold, List<String> attributeNames, int limit, int offset) throws TexeraException { ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName)); FuzzyTokenPredicate fuzzyTokenPredicate = new FuzzyTokenPredicate( query, attributeNames, RelationManager.getInstance().getTableAnalyzerString(tableName), threshold, RESULTS); FuzzyTokenMatcher fuzzyTokenMatcher = new FuzzyTokenMatcher(fuzzyTokenPredicate); fuzzyTokenMatcher.setLimit(limit); fuzzyTokenMatcher.setOffset(offset); fuzzyTokenMatcher.setInputOperator(scanSource); Tuple tuple; List<Tuple> results = new ArrayList<>(); fuzzyTokenMatcher.open(); while ((tuple = fuzzyTokenMatcher.getNextTuple()) != null) { results.add(tuple); } fuzzyTokenMatcher.close(); return results; }
public static List<Tuple> getScanSourceResults(String tableName, String keywordQuery, List<String> attributeNames, KeywordMatchingType matchingType, int limit, int offset) throws TexeraException { RelationManager relationManager = RelationManager.getInstance(); ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName)); KeywordPredicate keywordPredicate = new KeywordPredicate( keywordQuery, attributeNames, relationManager.getTableAnalyzerString(tableName), matchingType, RESULTS); KeywordMatcher keywordMatcher = new KeywordMatcher(keywordPredicate); keywordMatcher.setLimit(limit); keywordMatcher.setOffset(offset); keywordMatcher.setInputOperator(scanSource); Tuple tuple; List<Tuple> results = new ArrayList<>(); keywordMatcher.open(); while ((tuple = keywordMatcher.getNextTuple()) != null) { results.add(tuple); } keywordMatcher.close(); return results; }
new ScanSourcePredicate(PEOPLE_TABLE_2)); comparableMatcher.setInputOperator(sourceOperator); comparableMatcher.open();
new ScanSourcePredicate(PEOPLE_TABLE_2)); comparableMatcher.setInputOperator(sourceOperator); comparableMatcher.open();
@Test public void testThreeOutputsWithItself() throws Exception { IOperator sourceOperator = new ScanBasedSourceOperator( new ScanSourcePredicate(PEOPLE_TABLE));