private static void writeCell(Cell cell, IField field){ if(field == null){ cell.setCellValue(""); return; } if (field instanceof DoubleField) { cell.setCellValue((double) field.getValue()); } else if (field instanceof IntegerField) { cell.setCellValue((double) (int) field.getValue()); } else if(field instanceof DateField){ cell.setCellValue(field.getValue().toString()); } else{ cell.setCellValue(field.getValue().toString()); } }
/** * Sort the affixList in length decreasing order to filter valid tuples. * @param tuple * @param attribute * @return */ private boolean filterTuple(Tuple tuple, String attribute) { for (String affix : sortedAffixList) { if (! tuple.getField(attribute).getValue().toString().contains(affix)) { return false; } } return true; }
/** * Used to compare the value's of a field from the inner and outer tuples'. * * @param innerTuple * @param outerTuple * @param attributeName * @return True if both the tuples have the field and the values are equal. */ private boolean compareField(Tuple innerTuple, Tuple outerTuple, String attributeName) { IField innerField = innerTuple.getField(attributeName); IField outerField = outerTuple.getField(attributeName); if (innerField == null || outerField == null) { return false; } return innerField.getValue().equals(outerField.getValue()); }
private Tuple popupOneTuple() { Tuple outputTuple = tupleBuffer.get(0); tupleBuffer.remove(0); if (tupleBuffer.isEmpty()) { tupleBuffer = null; } List<IField> outputFields = new ArrayList<>(); outputFields.addAll(outputTuple.getFields()); Integer className = idClassMap.get(outputTuple.getField(SchemaConstants._ID).getValue().toString()); outputFields.add(new IntegerField( className )); return new Tuple(outputSchema, outputFields); }
public static List<Span> generatePayloadFromTuple(Tuple tuple, Analyzer luceneAnalyzer) { List<Span> tuplePayload = tuple.getSchema().getAttributes().stream() .filter(attr -> (attr.getType() == AttributeType.TEXT)) // generate payload only for TEXT field .map(attr -> attr.getName()) .map(attributeName -> generatePayload(attributeName, tuple.getField(attributeName).getValue().toString(), luceneAnalyzer)) .flatMap(payload -> payload.stream()) // flatten a list of lists to a list .collect(Collectors.toList()); return tuplePayload; }
private static Document getLuceneDocument(Tuple tuple) { List<IField> fields = tuple.getFields(); List<Attribute> attributes = tuple.getSchema().getAttributes(); Document doc = new Document(); for (int count = 0; count < fields.size(); count++) { IField field = fields.get(count); Attribute attr = attributes.get(count); AttributeType attributeType = attr.getType(); doc.add(StorageUtils.getLuceneField(attributeType, attr.getName(), field.getValue())); } return doc; }
/** * Gets the directory of a table. * * @param tableName, the name of the table, case insensitive * @return * @throws StorageException */ public String getTableDirectory(String tableName) throws StorageException { // get the tuples with tableName from the table catalog Tuple tableCatalogTuple = getTableCatalogTuple(tableName); // if the tuple is not found, then the table name is not found if (tableCatalogTuple == null) { throw new StorageException(String.format("The directory for table %s is not found.", tableName)); } // get the directory field IField directoryField = tableCatalogTuple.getField(CatalogConstants.TABLE_DIRECTORY); return directoryField.getValue().toString(); }
public List<String> getDictionaries() throws StorageException { List<String> dictionaries = new ArrayList<>(); DataReader dataReader = relationManager.getTableDataReader(DictionaryManagerConstants.TABLE_NAME, new MatchAllDocsQuery()); dataReader.open(); Tuple tuple; while ((tuple = dataReader.getNextTuple()) != null) { dictionaries.add(tuple.getField(DictionaryManagerConstants.NAME).getValue().toString()); } dataReader.close(); return dictionaries; }
public String getTextLength ( List<Tuple> listTuple, String textField ) { int length = 0; for (Tuple tuple:listTuple) { length += tuple.getField(textField).getValue().toString().length(); } return String.valueOf(length); } @AfterClass
public static boolean checkKeywordInAttributes(List<Tuple> exactResult, List<String> queryList, List<String> attributeList) { List<String> toMatch = new ArrayList<>(); for (Tuple tuple : exactResult) { for (String attribute : attributeList) { toMatch.addAll(queryList.stream() .filter(s -> tuple.getField(attribute).getValue().toString().toLowerCase().contains(s.toLowerCase())) .collect(Collectors.toList())); } } if (toMatch.isEmpty()) return false; return true; }
public static List<Span> computeMatchingResultsWithPattern(Tuple inputTuple, RegexPredicate predicate, Pattern pattern) { List<Span> matchingResults = new ArrayList<>(); for (String attributeName : predicate.getAttributeNames()) { AttributeType attributeType = inputTuple.getSchema().getAttribute(attributeName).getType(); String fieldValue = inputTuple.getField(attributeName).getValue().toString(); // types other than TEXT and STRING: throw Exception for now if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) { throw new DataflowException("KeywordMatcher: Fields other than STRING and TEXT are not supported yet"); } Matcher javaMatcher = pattern.matcher(fieldValue); while (javaMatcher.find()) { int start = javaMatcher.start(); int end = javaMatcher.end(); matchingResults.add( new Span(attributeName, start, end, predicate.getRegex(), fieldValue.substring(start, end))); } } return matchingResults; }
public static boolean containsQuery(List<Tuple> resultList, List<String> queryList, List<String> attributeList) { if (resultList.isEmpty()) return false; for (Tuple tuple : resultList) { List<String> toMatch = new ArrayList<>(); for (String attribute : attributeList) { toMatch.addAll(queryList.stream() .filter(s -> tuple.getField(attribute).getValue().toString().toLowerCase().contains(s.toLowerCase())) .collect(Collectors.toList())); } if (toMatch.isEmpty()) return false; } return true; }
public List<TableMetadata> getMetaData() throws StorageException { DataReader dataReader = RelationManager.getInstance().getTableDataReader(CatalogConstants.TABLE_CATALOG, new MatchAllDocsQuery()); List<TableMetadata> result = new ArrayList<>(); Tuple t = null; dataReader.open(); while ((t = dataReader.getNextTuple()) != null) { String tableName = (String)t.getField(CatalogConstants.TABLE_NAME).getValue(); if (!tableName.equals(CatalogConstants.SCHEMA_CATALOG.toLowerCase()) && !tableName.equals(CatalogConstants.TABLE_CATALOG.toLowerCase())) { result.add(new TableMetadata(tableName, getTableSchema(tableName))); } } dataReader.close(); return result; } }
public static HashMap<String, Integer> computeExpectedResult(List<Tuple> tuplesList, String attribute, Analyzer analyzer) { HashMap<String, Integer> resultHashMap = new HashMap<String, Integer>(); for (Tuple nextTuple : tuplesList) { String text = nextTuple.getField(attribute).getValue().toString(); List<String> terms = DataflowUtils.tokenizeQuery(analyzer, text); for (String term : terms) { String key = term.toLowerCase(); resultHashMap.put(key, resultHashMap.get(key)==null ? 1 : resultHashMap.get(key) + 1); } } return resultHashMap; }
@SuppressWarnings("unchecked") public static List<String> getTupleSpanListString(Tuple tuple, String attribute) { List<String> listStr = new ArrayList<>(); for (Span span : (List<Span>) tuple.getField(attribute).getValue()) { listStr.add(span.getValue()); } return listStr; }
public static boolean compareTuple(List<Tuple> outputTuple, Tuple expectedTuple) { for(Tuple t : outputTuple) { for (String attribute : expectedTuple.getSchema().getAttributeNames()) { if (!attribute.equals("_id")) { if (! t.getField(attribute).getValue().toString().equals(expectedTuple.getField(attribute).getValue().toString())) { return false; } } } } return true; }
@Test public void test3() throws TexeraException { String splitRegex = "b.*a"; String splitAttrName = TestConstantsRegexSplit.DESCRIPTION; List<String> splitResult = new ArrayList<>(); splitResult.add("banana"); splitResult.add("mississippi"); List<Tuple> results = computeRegexSplitResultsOneToMany(REGEX_TABLE, splitAttrName, splitRegex, RegexSplitPredicate.SplitType.STANDALONE); List<String> splitStrings = results.stream() .map(tuple -> tuple.getField(RESULT_ATTR).getValue().toString()) .collect(Collectors.toList()); Assert.assertEquals(splitResult, splitStrings); }
@Test public void test5() throws TexeraException { String splitRegex = "a.*n"; String splitAttrName = TestConstantsRegexSplit.DESCRIPTION; List<String> splitResult = new ArrayList<>(); splitResult.add("b"); splitResult.add("anana"); splitResult.add("mississippi"); List<Tuple> results = computeRegexSplitResultsOneToMany(REGEX_TABLE, splitAttrName, splitRegex, RegexSplitPredicate.SplitType.GROUP_RIGHT); List<String> splitStrings = results.stream() .map(tuple -> tuple.getField(RESULT_ATTR).getValue().toString()) .collect(Collectors.toList()); Assert.assertEquals(splitResult, splitStrings); }
@Test public void test6() throws TexeraException { String splitRegex = "ana"; String splitAttrName = TestConstantsRegexSplit.DESCRIPTION; List<String> splitResult = new ArrayList<>(); splitResult.add("b"); splitResult.add("ana"); splitResult.add("na"); splitResult.add("mississippi"); List<Tuple> results = computeRegexSplitResultsOneToMany(REGEX_TABLE, splitAttrName, splitRegex, RegexSplitPredicate.SplitType.STANDALONE); List<String> splitStrings = results.stream() .map(tuple -> tuple.getField(RESULT_ATTR).getValue().toString()) .collect(Collectors.toList()); Assert.assertEquals(splitResult, splitStrings); }
@Test public void test4() throws TexeraException { String splitRegex = "a.*n"; String splitAttrName = TestConstantsRegexSplit.DESCRIPTION; List<String> splitResult = new ArrayList<>(); splitResult.add("banan"); splitResult.add("a"); splitResult.add("mississippi"); List<Tuple> results = computeRegexSplitResultsOneToMany(REGEX_TABLE, splitAttrName, splitRegex, RegexSplitPredicate.SplitType.GROUP_LEFT); List<String> splitStrings = results.stream() .map(tuple -> tuple.getField(RESULT_ATTR).getValue().toString()) .collect(Collectors.toList()); Assert.assertEquals(splitResult, splitStrings); }