/** * Gets the tuple to be inserted to the table catalog. * * @param tableName * @param tableDirectory * @param luceneAnalyzerStr * @return * @throws StorageException */ public static Tuple getTableCatalogTuple(String tableName, Path tableDirectory, String luceneAnalyzerStr) { try { return new Tuple(TABLE_CATALOG_SCHEMA, new StringField(tableName), new StringField(tableDirectory.toRealPath().toString()), new StringField(luceneAnalyzerStr)); } catch (IOException e) { throw new TexeraException(e); } }
public static Tuple parsePromedHTML(String fileName, String content) { try { Document parsedDocument = Jsoup.parse(content); String mainText = parsedDocument.getElementById("preview").text(); Tuple tuple = new Tuple(PromedSchema.PROMED_SCHEMA, new StringField(fileName), new TextField(mainText)); return tuple; } catch (Exception e) { return null; } }
public static List<Tuple> getSampleStaffTuples() { IField[] fields1 = { new StringField("Melody"), new StringField("Bocanegra"), new StringField("m.bocanegra@164.com"), new StringField("(945) 734-5156") }; IField[] fields2 = { new StringField("Kanon"), new StringField("Hwang"), new StringField("hwangk@ske.akb.edu"), new StringField("(494) 352-8098") }; IField[] fields3 = { new StringField("Shirley"), new StringField("Clarkson"), new StringField("clarkson@facebook"), new StringField("(587) 241-7550") }; IField[] fields4 = { new StringField("Lucy"), new StringField("Kimoto"), new StringField("lki?moto@microsoft.com"), new StringField("(499) 824-3625") }; Tuple tuple1 = new Tuple(SCHEMA_STAFF, fields1); Tuple tuple2 = new Tuple(SCHEMA_STAFF, fields2); Tuple tuple3 = new Tuple(SCHEMA_STAFF, fields3); Tuple tuple4 = new Tuple(SCHEMA_STAFF, fields4); return Arrays.asList(tuple1, tuple2, tuple3, tuple4); } }
/** * Gets the tuples to be inserted to the schema catalog. * * @param tableName * @param tableDirectory * @param luceneAnalyzerStr * @return * @throws StorageException */ public static List<Tuple> getSchemaCatalogTuples(String tableName, Schema tableSchema) { List<Tuple> schemaCatalogTuples = new ArrayList<>(); for (int i = 0; i < tableSchema.getAttributes().size(); i++) { Attribute attr = tableSchema.getAttributes().get(i); Tuple schemaTuple = new Tuple(SCHEMA_CATALOG_SCHEMA, new StringField(tableName), new StringField(attr.getName()), new StringField(attr.getType().toString().toLowerCase()), new IntegerField(i)); schemaCatalogTuples.add(schemaTuple); } return schemaCatalogTuples; }
private Tuple computeNextMatchingTuple() throws TexeraException { if (sortedWordCountMap == null) { computeWordCount(); } if (wordCountIterator.hasNext()) { Entry<String, Integer> entry = wordCountIterator.next(); List<IField> tupleFieldList = new ArrayList<>(); // Generate the new UUID. tupleFieldList.add(IDField.newRandomID()); tupleFieldList.add(new StringField(entry.getKey())); tupleFieldList.add(new IntegerField(entry.getValue())); cursor++; return new Tuple(SCHEMA_WORD_COUNT, tupleFieldList); } return null; }
@Override protected Tuple computeNextMatchingTuple() throws TexeraException { if (sortedWordCountMap == null) { computeWordCount(); } if (wordCountIterator.hasNext()) { Entry<String, Integer> entry = wordCountIterator.next(); List<IField> tupleFieldList = new ArrayList<>(); // Generate the new UUID. tupleFieldList.add(IDField.newRandomID()); tupleFieldList.add(new StringField(entry.getKey())); tupleFieldList.add(new IntegerField(entry.getValue())); return new Tuple(outputSchema, tupleFieldList); } return null; }
public static List<Tuple> constructSamplePeopleTuples() { IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateTimeField(LocalDateTime.parse("1970-01-01T11:11:11")), new TextField("banana") }; IField[] fields2 = { new StringField("tom hanks"), new StringField("cruise"), new IntegerField(45), new DoubleField(5.95), new DateTimeField(LocalDateTime.parse("1980-01-02T13:14:15")), new TextField("mississippi") }; Tuple tuple1 = new Tuple(SCHEMA_PEOPLE, fields1); Tuple tuple2 = new Tuple(SCHEMA_PEOPLE, fields2); return Arrays.asList(tuple1, tuple2); } }
public static List<Tuple> getSampleCorpTuples() { IField[] fields1 = { new StringField("Facebook"), new StringField("404 Not Found"), new StringField("66.220.144.0") }; IField[] fields2 = { new StringField("Weibo"), new StringField("http://weibo.com"), new StringField("180.149.134.141") }; IField[] fields3 = { new StringField("Microsoft"), new StringField("https://www.microsoft.com/en-us/"), new StringField("131.107.0.89") }; IField[] fields4 = { new StringField("Google"), new StringField("websit: www.google.com"), new StringField("8.8.8.8.8.8") }; Tuple tuple1 = new Tuple(SCHEMA_CORP, fields1); Tuple tuple2 = new Tuple(SCHEMA_CORP, fields2); Tuple tuple3 = new Tuple(SCHEMA_CORP, fields3); Tuple tuple4 = new Tuple(SCHEMA_CORP, fields4); return Arrays.asList(tuple1, tuple2, tuple3, tuple4); } }
public List<String> addDictionary(String fileName, String dictionaryContent) throws StorageException { // write metadata info DataWriter dataWriter = relationManager.getTableDataWriter(DictionaryManagerConstants.TABLE_NAME); dataWriter.open(); // clean up the same dictionary metadata if it already exists in dictionary table dataWriter.deleteTuple(new TermQuery(new Term(DictionaryManagerConstants.NAME, fileName))); // insert new tuple dataWriter.insertTuple(new Tuple(DictionaryManagerConstants.SCHEMA, new StringField(fileName))); dataWriter.close(); // write actual dictionary file writeToFile(fileName, dictionaryContent); return null; }
public static List<Tuple> getSamplePeopleTuples() { try { IField[] fields1 = { new StringField("无忌"), new StringField("长孙"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("北京大学电气工程学院") }; IField[] fields2 = { new StringField("孔明"), new StringField("洛克贝尔"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("北京大学计算机学院") }; IField[] fields3 = { new StringField("宋江"), new StringField("建筑"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("伟大的建筑是历史的坐标,具有传承的价值。") }; Tuple tuple1 = new Tuple(SCHEMA_PEOPLE, fields1); Tuple tuple2 = new Tuple(SCHEMA_PEOPLE, fields2); Tuple tuple3 = new Tuple(SCHEMA_PEOPLE, fields3); return Arrays.asList(tuple1, tuple2, tuple3); } catch (ParseException e) { // exception should not happen because we know the data is correct e.printStackTrace(); return Arrays.asList(); } } }
@Test public void testGetters() throws ParseException { // create data tuple first Attribute[] attributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1]; for (int count = 0; count < attributes.length - 1; count++) { attributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count]; } attributes[attributes.length - 1] = SchemaConstants.SPAN_LIST_ATTRIBUTE; List<IField> fields = new ArrayList<IField>( Arrays.asList(new IField[] { new StringField("bruce"), new StringField("lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("bruce was born in new york city and was grown up in los angeles") })); IField spanField = createSpanListField(); fields.add(spanField); spanTuple = new Tuple(new Schema(attributes), fields.toArray(new IField[fields.size()])); IField spanFieldRetrieved = spanTuple.getField(SchemaConstants.SPAN_LIST); Assert.assertTrue(spanFieldRetrieved instanceof ListField); Assert.assertSame(spanField, spanFieldRetrieved); }
@Test public void testMultipleWordQueryInTextFieldUsingScan2() throws Exception { ArrayList<String> names = new ArrayList<String>(Arrays.asList("tall fair")); Dictionary dictionary = new Dictionary(names); // create a data tuple first List<Span> list1 = new ArrayList<Span>(); Span span1 = new Span("description", 0, 9, "tall fair","Tall Fair"); list1.add(span1); Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1]; for (int count = 0; count < schemaAttributes.length - 1; count++) { schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count]; } schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE; IField[] fields2 = { new StringField("christian john wayne"), new StringField("rock bale"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("Tall Fair"), new ListField<Span>(list1) }; Tuple tuple2 = new Tuple(new Schema(schemaAttributes), fields2); List<Tuple> expectedResults = new ArrayList<Tuple>(); expectedResults.add(tuple2); List<String> attributeNames = Arrays.asList(TestConstants.FIRST_NAME, TestConstants.LAST_NAME, TestConstants.DESCRIPTION); List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(PEOPLE_TABLE, dictionary, attributeNames, KeywordMatchingType.SUBSTRING_SCANBASED); boolean contains = TestUtils.equals(expectedResults, returnedResults); Assert.assertTrue(contains); }
@Test public void testProjection2() throws Exception { List<String> projectionFields = Arrays.asList( TestConstants.FIRST_NAME, TestConstants.DESCRIPTION); Schema projectionSchema = new Schema(TestConstants.FIRST_NAME_ATTR, TestConstants.DESCRIPTION_ATTR); IField[] fields1 = { new StringField("bruce"), new TextField("Tall Angry") }; IField[] fields2 = { new StringField("tom hanks"), new TextField("Short Brown") }; IField[] fields3 = { new StringField("brad lie angelina"), new TextField("White Angry") }; IField[] fields4 = { new StringField("george lin lin"), new TextField("Lin Clooney is Short and lin clooney is Angry") }; IField[] fields5 = { new StringField("christian john wayne"), new TextField("Tall Fair") }; IField[] fields6 = { new StringField("Mary brown"), new TextField("Short angry") }; Tuple tuple1 = new Tuple(projectionSchema, fields1); Tuple tuple2 = new Tuple(projectionSchema, fields2); Tuple tuple3 = new Tuple(projectionSchema, fields3); Tuple tuple4 = new Tuple(projectionSchema, fields4); Tuple tuple5 = new Tuple(projectionSchema, fields5); Tuple tuple6 = new Tuple(projectionSchema, fields6); List<Tuple> expectedResults = Arrays.asList(tuple1, tuple2, tuple3, tuple4, tuple5, tuple6); List<Tuple> returnedResults = getProjectionResults( new ScanBasedSourceOperator(new ScanSourcePredicate(PEOPLE_TABLE)), projectionFields); Assert.assertTrue(TestUtils.equals(expectedResults, returnedResults)); }
@Test public void testMaxDOBMinNameAggregation() throws Exception { Attribute attribute1 = TestConstants.DATE_OF_BIRTH_ATTR; String attributeName1 = attribute1.getName(); AggregationType aggType1 = AggregationType.MAX; Attribute attribute2 = TestConstants.FIRST_NAME_ATTR; String attributeName2 = attribute2.getName(); AggregationType aggType2 = AggregationType.MIN; String resultAttributeName1 = AggregatorTestConstants.MAX_DATE_RESULT_ATTR_NAME; String resultAttributeName2 = AggregatorTestConstants.MIN_FIRST_NAME_RESULT_ATTR_NAME; AggregationAttributeAndResult aggEntity1 = new AggregationAttributeAndResult(attributeName1, aggType1, resultAttributeName1); AggregationAttributeAndResult aggEntity2 = new AggregationAttributeAndResult(attributeName2, aggType2, resultAttributeName2); List<AggregationAttributeAndResult> aggEntitiesList = new ArrayList<>(); aggEntitiesList.add(aggEntity1); aggEntitiesList.add(aggEntity2); IField[] row1 = {new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new StringField("Mary brown")}; Schema schema = new Schema(new Attribute(resultAttributeName1, AttributeType.DATE), new Attribute(resultAttributeName2, AttributeType.STRING)); List<Tuple> expectedResults = new ArrayList<>(); expectedResults.add(new Tuple(schema, row1)); List<Tuple> returnedResults = getQueryResults(aggEntitiesList); Assert.assertEquals(1, returnedResults.size()); Assert.assertTrue(TestUtils.equals(expectedResults, returnedResults)); }
/** * Scenario: verifies GetNextTuple of DictionaryMatcher and single word * queries in String Field using SUBSTRING_SCANBASED * Test in Chinese. */ @Test public void testSingleWordQueryInStringFieldUsingScanChinese() throws Exception { ArrayList<String> names = new ArrayList<String>(Arrays.asList("孔明")); Dictionary dictionary = new Dictionary(names); // create a data tuple first List<Span> list = new ArrayList<Span>(); Span span = new Span("firstName", 0, 2, "孔明", "孔明"); list.add(span); Schema resultSchema = new Schema.Builder().add(TestConstantsChinese.SCHEMA_PEOPLE).add(RESULTS_ATTRIBUTE).build(); IField[] fields1 = { new StringField("孔明"), new StringField("洛克贝尔"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("北京大学计算机学院"), new ListField<Span>(list) }; Tuple tuple1 = new Tuple(resultSchema, fields1); List<Tuple> expectedResults = new ArrayList<Tuple>(); expectedResults.add(tuple1); List<String> attributeNames = Arrays.asList(TestConstants.FIRST_NAME, TestConstantsChinese.LAST_NAME, TestConstantsChinese.DESCRIPTION); List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(CHINESE_TABLE, dictionary, attributeNames, KeywordMatchingType.SUBSTRING_SCANBASED); boolean contains = TestUtils.equals(expectedResults, returnedResults); Assert.assertTrue(contains); }
@Test public void testBothTheSpansAreSame() throws Exception { JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0)); KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction); KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction); List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0); Schema resultSchema = new Schema.Builder().add(JoinTestConstants.BOOK_SCHEMA).add(SchemaConstants.SPAN_LIST_ATTRIBUTE).build(); List<Span> spanList = new ArrayList<>(); Span span1 = new Span(JoinTestConstants.REVIEW, 11, 18, "special_special", "special"); spanList.add(span1); IField[] book1 = { new IntegerField(52), new StringField("Mary Roach"), new StringField("Grunt: The Curious Science of Humans at War"), new IntegerField(288), new TextField("It takes a special kind " + "of writer to make topics ranging from death to our " + "gastrointestinal tract interesting (sometimes " + "hilariously so), and pop science writer Mary Roach is " + "always up to the task."), new ListField<>(spanList) }; Tuple expectedTuple = new Tuple(resultSchema, book1); List<Tuple> expectedResult = new ArrayList<>(); expectedResult.add(expectedTuple); Assert.assertEquals(1, resultList.size()); Assert.assertTrue(TestUtils.equals(expectedResult, resultList)); }
@Test public void testSpansOverlapAndWithinThreshold() throws Exception { JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0)); KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "gastrointestinal tract", phrase); KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "tract interesting", phrase); List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0); Schema resultSchema = new Schema.Builder().add(JoinTestConstants.BOOK_SCHEMA).add(SchemaConstants.SPAN_LIST_ATTRIBUTE).build(); List<Span> spanList = new ArrayList<>(); Span span1 = new Span(JoinTestConstants.REVIEW, 75, 109, "gastrointestinal tract_" + "tract interesting", "gastrointestinal " + "tract interesting"); spanList.add(span1); IField[] book1 = { new IntegerField(52), new StringField("Mary Roach"), new StringField("Grunt: The Curious Science of Humans at War"), new IntegerField(288), new TextField("It takes a special kind " + "of writer to make topics ranging from death to our " + "gastrointestinal tract interesting (sometimes " + "hilariously so), and pop science writer Mary Roach is " + "always up to the task."), new ListField<>(spanList) }; Tuple expectedTuple = new Tuple(resultSchema, book1); List<Tuple> expectedResult = new ArrayList<>(); expectedResult.add(expectedTuple); Assert.assertEquals(1, resultList.size()); Assert.assertTrue(TestUtils.equals(expectedResult, resultList)); }
@Test public void testIdsMatchFieldsMatchSpanWithinThreshold() throws Exception { JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0)); KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction); KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "writer", conjunction); List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0); Schema resultSchema = new Schema.Builder().add(JoinTestConstants.BOOK_SCHEMA).add(SchemaConstants.SPAN_LIST_ATTRIBUTE).build(); List<Span> spanList = new ArrayList<>(); Span span1 = new Span(JoinTestConstants.REVIEW, 11, 33, "special_writer", "special kind of " + "writer"); spanList.add(span1); IField[] book1 = { new IntegerField(52), new StringField("Mary Roach"), new StringField("Grunt: The Curious Science of Humans at War"), new IntegerField(288), new TextField("It takes a special kind " + "of writer to make topics ranging from death to our " + "gastrointestinal tract interesting (sometimes " + "hilariously so), and pop science writer Mary Roach is " + "always up to the task."), new ListField<>(spanList) }; Tuple expectedTuple = new Tuple(resultSchema, book1); List<Tuple> expectedResult = new ArrayList<>(); expectedResult.add(expectedTuple); Assert.assertEquals(1, resultList.size()); Assert.assertTrue(TestUtils.equals(expectedResult, resultList)); }
@Test public void testOneSpanEncompassesOtherAndDifferenceLessThanThreshold() throws Exception { JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0)); KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction); KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "takes a special kind of writer", phrase); List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0); Schema resultSchema = new Schema.Builder().add(JoinTestConstants.BOOK_SCHEMA).add(SchemaConstants.SPAN_LIST_ATTRIBUTE).build(); List<Span> spanList = new ArrayList<>(); Span span1 = new Span(JoinTestConstants.REVIEW, 3, 33, "special_takes a special " + "kind of writer", "takes a special " + "kind of writer"); spanList.add(span1); IField[] book1 = { new IntegerField(52), new StringField("Mary Roach"), new StringField("Grunt: The Curious Science of Humans at War"), new IntegerField(288), new TextField("It takes a special kind " + "of writer to make topics ranging from death to our " + "gastrointestinal tract interesting (sometimes " + "hilariously so), and pop science writer Mary Roach is " + "always up to the task."), new ListField<>(spanList) }; Tuple expectedTuple = new Tuple(resultSchema, book1); List<Tuple> expectedResult = new ArrayList<>(); expectedResult.add(expectedTuple); Assert.assertEquals(1, resultList.size()); Assert.assertTrue(TestUtils.equals(expectedResult, resultList)); }
Tuple insertedTuple = new Tuple(tableSchema, new StringField("test")); IDField idField = dataWriter.insertTuple(insertedTuple); dataWriter.close();