edu.uci.ics.texera.api.field.ListField java code examples

@Override
public ListField<Span> deserialize(JsonParser p, DeserializationContext ctxt)
    throws IOException, JsonProcessingException {
  JsonNode node = p.getCodec().readTree(p);
  JsonNode fieldValueNode = node.get(JsonConstants.FIELD_VALUE);
  
  ArrayList<Span> spanList = new ArrayList<>();
  for (int i = 0; i < fieldValueNode.size(); i++) {
    JsonNode spanValueNode = fieldValueNode.get(i);
    spanList.add(new ObjectMapper().convertValue(spanValueNode, Span.class));
  }
  return new ListField<Span>(spanList);
}

resultFields.add(new ListField<Span>(mergeSpanList));
List<Span> innerPayload = innerPayloadField.getValue();      
ListField<Span> outerPayloadField = outerTuple.getField(SchemaConstants.PAYLOAD);
List<Span> outerPayload = outerPayloadField.getValue();
resultPayload.addAll(outerPayload.stream().map(span -> addFieldPrefix(span, "outer_")).collect(Collectors.toList()));
resultFields.add(new ListField<Span>(resultPayload));

/**
 * Creates Map of label and corresponding spans
 * @param inputTuple
 * @return 
 */
private Map<String, List<Span>> fetchLabelSpans(Tuple inputTuple) {
  Map<String, List<Span>> labelSpanMap = new HashMap<>();
  for (String label : this.labelList) {
    ListField<Span> spanListField = inputTuple.getField(label);
    labelSpanMap.put(label, spanListField.getValue());
  }
  return labelSpanMap;
}

/**
 * Create Map of label id and corresponding attribute values
 * @param inputTuple
 * @return map of label id and corresponding attribute values
 */
private Map<String, Set<String>> fetchLabelValues(Tuple inputTuple) throws DataflowException {
  Map<String, Set<String>> labelSpanList = new HashMap<>();
  for (String label : this.labelList) {
    if (! inputTuple.getSchema().containsAttribute(label)) {
      throw new DataflowException("label " + label + " does not exist");
    }
    ListField<Span> spanListField = inputTuple.getField(label);
    Set<String> labelValues = spanListField.getValue().stream()
        .map(span -> span.getValue())
        .map(value -> escapeString(value))
        .collect(Collectors.toSet());
    labelSpanList.put(label, labelValues);
  }
  return labelSpanList;
}

private Tuple constructTuple(int docID) throws IOException, ParseException {
  Document luceneDocument = luceneIndexSearcher.doc(docID);
  ArrayList<IField> docFields = documentToFields(luceneDocument);
  if (payloadAdded) {
    ArrayList<Span> payloadSpanList = buildPayloadFromTermVector(docFields, docID);
    ListField<Span> payloadField = new ListField<Span>(payloadSpanList);
    docFields.add(payloadField);
  }
  Tuple resultTuple = new Tuple(outputSchema, docFields.stream().toArray(IField[]::new));
  return resultTuple;
}

private void computeWordCount() throws TexeraException {
  Tuple tuple;
  HashMap<String, Integer> wordCountMap = new HashMap<>();
  while ((tuple = this.inputOperator.getNextTuple()) != null) {
    if (addPayload) {
      tuple = new Tuple.Builder(tuple).add(SchemaConstants.PAYLOAD_ATTRIBUTE,new ListField<Span>(
              DataflowUtils.generatePayloadFromTuple(tuple, predicate.getLuceneAnalyzerString()))).build();
    }
    
    ListField<Span> payloadField = tuple.getField("payload");
    List<Span> payloadSpanList = payloadField.getValue();
    
    for (Span span : payloadSpanList) {
      if (span.getAttributeName().equals(predicate.getAttribute())) {
        String key = span.getValue().toLowerCase();
        wordCountMap.put(key, wordCountMap.get(key)==null ? 1 : wordCountMap.get(key) + 1);
      }
    }
  }
  sortedWordCountMap = wordCountMap.entrySet().stream()
      .sorted((e1, e2) -> e2.getValue().compareTo(e1.getValue()))
      .collect(Collectors.toList());
  wordCountIterator = sortedWordCountMap.iterator();
}

public static void match(ArrayList<String> queryList, double threshold, String luceneAnalyzerStr,
    String tableName, boolean bool) throws TexeraException, IOException {
  List<String> attributeNames = Arrays.asList(MedlineIndexWriter.ABSTRACT);
  for (String query : queryList) {
    FuzzyTokenSourcePredicate predicate = new FuzzyTokenSourcePredicate(query, attributeNames, luceneAnalyzerStr,
        threshold, tableName, SchemaConstants.SPAN_LIST);
    FuzzyTokenMatcherSourceOperator fuzzyTokenSource = new FuzzyTokenMatcherSourceOperator(predicate);
    long startMatchTime = System.currentTimeMillis();
    fuzzyTokenSource.open();
    int counter = 0;
    Tuple nextTuple = null;
    while ((nextTuple = fuzzyTokenSource.getNextTuple()) != null) {
      ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
      List<Span> spanList = spanListField.getValue();
      counter += spanList.size();
    }
    fuzzyTokenSource.close();
    long endMatchTime = System.currentTimeMillis();
    double matchTime = (endMatchTime - startMatchTime) / 1000.0;
    timeResults.add(Double.parseDouble(String.format("%.4f", matchTime)));
    totalResultCount += counter;
  }
}

@Override
public Tuple processOneInputTuple(Tuple inputTuple) throws TexeraException {
  List<Span> matchingResults = new ArrayList<>();
  for (String attributeName : predicate.getAttributeNames()) {
    IField field = inputTuple.getField(attributeName);
    matchingResults.addAll(extractNlpSpans(field, attributeName));
  }
  if (matchingResults.isEmpty()) {
    return null;
  }
  return new Tuple.Builder(inputTuple)
      .add(predicate.getResultAttribute(), AttributeType.LIST, new ListField<Span>(matchingResults))
      .build();
}

public static void match(ArrayList<String> queryList, KeywordMatchingType opType, String luceneAnalyzerStr,
    String tableName) throws TexeraException, IOException {
  String[] attributeNames = new String[] { MedlineIndexWriter.ABSTRACT };
  for (String query : queryList) {
    KeywordSourcePredicate predicate = new KeywordSourcePredicate(
        query,
        Arrays.asList(attributeNames),
        luceneAnalyzerStr, 
        opType, 
        tableName,
        SchemaConstants.SPAN_LIST);
    KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(predicate);
    long startMatchTime = System.currentTimeMillis();
    keywordSource.open();
    int counter = 0;
    Tuple nextTuple = null;
    while ((nextTuple = keywordSource.getNextTuple()) != null) {
      ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
      List<Span> spanList = spanListField.getValue();
      counter += spanList.size();
    }
    keywordSource.close();
    long endMatchTime = System.currentTimeMillis();
    double matchTime = (endMatchTime - startMatchTime) / 1000.0;
    timeResults.add(Double.parseDouble(String.format("%.4f", matchTime)));
    totalResultCount += counter;
  }
}

private IField createSpanListField() {
  List<Span> list = new ArrayList<Span>();
  // The key value will be:
  // For RegexMatcher : "n.*k"
  // For NamedEntityMatcher : LOCATION
  // For DictionaryMatcher: "new york" - For DictionaryMatcher the key and
  // value are same
  // For KeyWordMatcher: "new york" - the value can be "new" or "york"
  Span span1 = new Span("description", 18, 26, "LOCATION", "new york");
  Span span2 = new Span("description", 52, 63, "LOCATION", "los angeles");
  list.add(span1);
  list.add(span2);
  IField spanListField = new ListField<Span>(list);
  return spanListField;
}

public List<Span> appendPhraseMatchingSpans4Dictionary(Tuple inputTuple, List<String> attributeNames, List<List<String>> queryTokenList, List<Set<String>> queryTokenSetList, List<List<String>> queryTokenListWithStopwords, List<String> queryList) throws DataflowException {
  List<Span> matchingResults = new ArrayList<>();
  ListField<Span> payloadField = inputTuple.getField(SchemaConstants.PAYLOAD);
  List<Span> payload = payloadField.getValue();
  Map<Integer, List<Span>> relevantSpansMap = filterRelevantSpans(payload, queryTokenSetList);
  for (String attributeName : attributeNames) {

public static List<Tuple> getOneToOneResultTuple() throws ParseException {
  // Build the expected result Tuple
  List<Span> spanList = new ArrayList<Span>();
  
  Span span1 = new Span(TEXT, 0, sentence1.length(), PropertyNameConstants.NLP_SPLIT_KEY, sentence1);
  spanList.add(span1);
  Span span2 = new Span(TEXT, sentence1.length()+1, sentence1.length()+sentence2.length()+1, PropertyNameConstants.NLP_SPLIT_KEY, sentence2);
  spanList.add(span2);
  Tuple tuple1 = getOneToOneTestTuple().get(0);
  Tuple returnTuple = new Tuple.Builder(tuple1).add(SchemaConstants.SPAN_LIST_ATTRIBUTE, new ListField<Span>(spanList)).build();
  return Arrays.asList(returnTuple);
}

private List<Span> appendConjunctionMatchingSpans4Dictionary(Tuple inputTuple, List<String> attributeNames, List<Set<String>> queryTokenSetList, List<String> queryList) throws DataflowException {
  List<Span> matchingResults = new ArrayList<>();
  ListField<Span> payloadField = inputTuple.getField(SchemaConstants.PAYLOAD);
  List<Span> payload = payloadField.getValue();
  Map<Integer, List<Span>> relevantSpansMap = filterRelevantSpans(payload, queryTokenSetList);
  for (String attributeName : attributeNames) {

    return null;
  outputFields.addAll(currentTuple.getFields());
  outputFields.add(new ListField<Span>(computeSentenceList(currentTuple)));
} else if(predicate.getOutputType() == RegexOutputType.ONE_TO_MANY) {
  if(currentSentenceList.isEmpty()) {

public static void matchRegex(List<String> regexes, String tableName) throws TexeraException, IOException {
  List<String> attributeNames = Arrays.asList(MedlineIndexWriter.ABSTRACT);
  
  for(String regex: regexes){
    // analyzer should generate grams all in lower case to build a lower
    // case index.
    RegexSourcePredicate predicate = new RegexSourcePredicate(regex, attributeNames, tableName, SchemaConstants.SPAN_LIST);
    RegexMatcherSourceOperator regexSource = new RegexMatcherSourceOperator(predicate);

    long startMatchTime = System.currentTimeMillis();
    regexSource.open();
    int counter = 0;
    Tuple nextTuple = null;
    while ((nextTuple = regexSource.getNextTuple()) != null) {
      ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
      List<Span> spanList = spanListField.getValue();
      counter += spanList.size();
    }
    regexSource.close();
    long endMatchTime = System.currentTimeMillis();
    double matchTime = (endMatchTime - startMatchTime) / 1000.0;
    totalMatchingTime += matchTime;
    totalRegexResultCount += counter;
  }
}

field = new ListField<String>(Arrays.asList(fieldValue));
break;

private List<Span> appendConjunctionMatchingSpans(Tuple inputTuple, List<String> attributeNames, Set<String> queryTokenSet, String queryKeyword) throws DataflowException {
  ListField<Span> payloadField = inputTuple.getField(SchemaConstants.PAYLOAD);
  List<Span> payload = payloadField.getValue();
  List<Span> matchingResults = new ArrayList<>();
  for (String attributeName : attributeNames) {

if (currentTuple == null) return null;
outputFields.addAll(currentTuple.getFields());
outputFields.add(new ListField<Span>(computeSentenceList(currentTuple)));

private List<Span> appendPhraseMatchingSpans(Tuple inputTuple, List<String> attributeNames, List<String> queryTokenList, List<String> queryTokenListWithStopwords, String queryKeyword) throws DataflowException {
  ListField<Span> payloadField = inputTuple.getField(SchemaConstants.PAYLOAD);
  List<Span> payload = payloadField.getValue();
  List<Span> matchingResults = new ArrayList<>();
  for (String attributeName : attributeNames) {

@Test
public void testGetNextTuplePeopleFirstName() throws Exception {
  String query = "g[^\\s]*";
  List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(
      PEOPLE_TABLE, query, Arrays.asList(TestConstants.FIRST_NAME));
  List<Tuple> expectedResults = new ArrayList<Tuple>();
  // expected to match "brad lie angelina"
  List<Tuple> data = TestConstants.getSamplePeopleTuples();
  Schema spanSchema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS, AttributeType.LIST).build();
  List<Span> spans = new ArrayList<Span>();
  spans.add(new Span(TestConstants.FIRST_NAME, 11, 17, query, "gelina"));
  IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
  List<IField> fields = new ArrayList<IField>(data.get(2).getFields());
  fields.add(spanField);
  expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
  // expected to match "george lin lin"
  spans.clear();
  spans.add(new Span(TestConstants.FIRST_NAME, 0, 6, query, "george"));
  spanField = new ListField<Span>(new ArrayList<Span>(spans));
  fields = new ArrayList<IField>(data.get(3).getFields());
  fields.add(spanField);
  expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
  Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}

Most used methods

Popular in Java

Updating database using SQL prepared statement
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
compareTo (BigDecimal)
getContentResolver (Context)
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
Reference (javax.naming)
Best plugins for Eclipse

How to useListField in edu.uci.ics.texera.api.field

Best Java code snippets using edu.uci.ics.texera.api.field.ListField (Showing top 20 results out of 315)

How to use
ListField
in
edu.uci.ics.texera.api.field