public static Tuple parsePromedHTML(String fileName, String content) { try { Document parsedDocument = Jsoup.parse(content); String mainText = parsedDocument.getElementById("preview").text(); Tuple tuple = new Tuple(PromedSchema.PROMED_SCHEMA, new StringField(fileName), new TextField(mainText)); return tuple; } catch (Exception e) { return null; } }
public static List<Tuple> getTwoSentenceTestTuple() { IField[] fields1 = { new TextField("Microsoft, Google and Facebook are organizations."), new TextField("Donald Trump and Barack Obama are persons") }; IField[] fields2 = { new TextField("I made an appointment at 8 am."), new TextField("Aug 16, 2016 is a really important date.")}; IField[] fields3 = { new TextField("I really love Kelly Clarkson's Because of You."), new TextField("Shirley Temple is a very famous actress.")}; Tuple tuple1 = new Tuple(SCHEMA_TWO_SENTENCE, fields1); Tuple tuple2 = new Tuple(SCHEMA_TWO_SENTENCE, fields2); Tuple tuple3 = new Tuple(SCHEMA_TWO_SENTENCE, fields3); return Arrays.asList(tuple1, tuple2, tuple3); }
public static List<Tuple> getTest4Tuple() throws ParseException { IField[] fields1 = { new TextField("Microsoft, Google and Facebook are organizations."), new TextField("Donald Trump and Barack Obama are persons") }; Tuple tuple1 = new Tuple(SCHEMA_TWO_SENTENCE, fields1); return Arrays.asList(tuple1); }
public static List<Tuple> getTest1Tuple() throws ParseException { IField[] fields1 = { new TextField("Microsoft is an organization.") }; Tuple tuple1 = new Tuple(SCHEMA_ONE_SENTENCE, fields1); return Arrays.asList(tuple1); }
public static List<Tuple> getTest7Tuple() throws ParseException { IField[] fields1 = { new TextField( "Feeling the warm sun rays beaming steadily down, the girl decided there was no need to wear a coat.") }; Tuple tuple1 = new Tuple(SCHEMA_ONE_SENTENCE, fields1); return Arrays.asList(tuple1); }
public static List<Tuple> getTest2Tuple() throws ParseException { IField[] fields1 = { new TextField("Microsoft, Google and Facebook are organizations.") }; Tuple tuple1 = new Tuple(SCHEMA_ONE_SENTENCE, fields1); return Arrays.asList(tuple1); }
public static List<Tuple> getOneToOneTestTuple() throws ParseException { IField[] fields1 = { new TextField(sentence1 + sentence2) }; Tuple tuple1 = new Tuple(SPLIT_SCHEMA, fields1); return Arrays.asList(tuple1); }
private static Tuple getTextTuple(String content) { IField field = new TextField(content); Tuple tuple = new Tuple(SCHEMA_TEXT, field); return tuple; }
public static List<Tuple> getOneToManyTestTuple() throws ParseException { IField[] fields1 = { new TextField(sentence1 + sentence2) }; Tuple tuple1 = new Tuple(SPLIT_SCHEMA, fields1); return Arrays.asList(tuple1); }
public static List<Tuple> getTest3Tuple() throws ParseException { IField[] fields1 = { new TextField( "Microsoft, Google and Facebook are organizations and Donald Trump and Barack Obama are persons.") }; Tuple tuple1 = new Tuple(SCHEMA_ONE_SENTENCE, fields1); return Arrays.asList(tuple1); }
public static List<Tuple> getTest8Tuple() { IField[] fields1 = { new TextField("This backpack costs me 300 dollars.")}; Tuple tuple1 = new Tuple(SCHEMA_ONE_SENTENCE, fields1); return Arrays.asList(tuple1); }
@Test public void testTextField() { TextField textField = new TextField("text field test"); JsonNode jsonNode = TestUtils.testJsonSerialization(textField); Assert.assertTrue(jsonNode.get(JsonConstants.FIELD_VALUE).isTextual()); }
public static List<Tuple> constructSamplePeopleTuples() { IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateTimeField(LocalDateTime.parse("1970-01-01T11:11:11")), new TextField("banana") }; IField[] fields2 = { new StringField("tom hanks"), new StringField("cruise"), new IntegerField(45), new DoubleField(5.95), new DateTimeField(LocalDateTime.parse("1980-01-02T13:14:15")), new TextField("mississippi") }; Tuple tuple1 = new Tuple(SCHEMA_PEOPLE, fields1); Tuple tuple2 = new Tuple(SCHEMA_PEOPLE, fields2); return Arrays.asList(tuple1, tuple2); } }
public static List<Tuple> getTest5ResultTuples() { List<Span> spanList = new ArrayList<Span>(); Span span1 = new Span("sentence_two", 0, 12, NlpEntityType.PERSON.toString(), "Donald Trump"); Span span2 = new Span("sentence_two", 17, 29, NlpEntityType.PERSON.toString(), "Barack Obama"); spanList.add(span1); spanList.add(span2); IField[] fields1 = { new TextField("Microsoft, Google and Facebook are organizations."), new TextField("Donald Trump and Barack Obama are persons") }; Tuple tuple1 = new Tuple(SCHEMA_TWO_SENTENCE, fields1); Tuple returnTuple = new Tuple.Builder(tuple1).add(REULST_ATTRIBUTE, new ListField<Span>(spanList)).build(); return Arrays.asList(returnTuple); }
@Test public void testTuple() { Schema schema = new Schema(Arrays.asList( new Attribute("_id", AttributeType._ID_TYPE), new Attribute("text", AttributeType.TEXT))); Tuple tuple = new Tuple(schema, Arrays.asList( IDField.newRandomID(), new TextField("tuple test text"))); TestUtils.testJsonSerialization(tuple); }
public static List<Tuple> getTest1ResultTuples() { List<Span> spanList = new ArrayList<Span>(); Span span1 = new Span("sentence_one", 0, 9, NlpEntityType.ORGANIZATION.toString(), "Microsoft"); spanList.add(span1); IField[] fields1 = { new TextField("Microsoft is an organization.") }; Tuple tuple1 = new Tuple(SCHEMA_ONE_SENTENCE, fields1); Tuple returnTuple = new Tuple.Builder(tuple1).add(REULST_ATTRIBUTE, new ListField<Span>(spanList)).build(); return Arrays.asList(returnTuple); }
public static List<Tuple> getTest7ResultTuples() { List<Span> spanList = new ArrayList<Span>(); Span span1 = new Span("sentence_one", 12, 16, NlpEntityType.ADJECTIVE.toString(), "warm"); spanList.add(span1); IField[] fields1 = { new TextField( "Feeling the warm sun rays beaming steadily down, the girl decided there was no need to wear a coat.") }; Tuple tuple1 = new Tuple(SCHEMA_ONE_SENTENCE, fields1); Tuple returnTuple = new Tuple.Builder(tuple1).add(REULST_ATTRIBUTE, new ListField<Span>(spanList)).build(); return Arrays.asList(returnTuple); }
public static List<Tuple> getTest8ResultTuples() { List<Span> spanList = new ArrayList<Span>(); Span span1 = new Span("sentence_one", 23, 34, NlpEntityType.MONEY.toString(), "300 dollars"); spanList.add(span1); IField[] fields1 = {new TextField("This backpack costs me 300 dollars.")}; Tuple tuple1 = new Tuple(SCHEMA_ONE_SENTENCE, fields1); Tuple returnTuple = new Tuple.Builder(tuple1).add(REULST_ATTRIBUTE, new ListField<Span>(spanList)).build(); return Arrays.asList(returnTuple); }
@Test public void testTupleWithSpanlist() { Tuple tuple = new Tuple.Builder() .add("attr1", AttributeType.TEXT, new TextField("test")) .add("spanList", AttributeType.LIST, new ListField<Span>(Arrays.asList( new Span("attr1", 0, 4, "test", "test")))) .build(); TestUtils.testJsonSerialization(tuple); }