/** * Make field from definition and path * * @param templateFieldDefinition the field definition * @param fields the fields * @return the configuration */ private TemplateFieldConfiguration makeField( TemplateFieldDefinition templateFieldDefinition, String fieldPath) { TemplateFieldConfiguration field = new TemplateFieldConfiguration(templateFieldDefinition.getName(), fieldPath); field.setRequired(templateFieldDefinition.getRequired()); field.setRepeat(templateFieldDefinition.getRepeat()); field.setRegex(templateFieldDefinition.getRegex()); return field; }
private Path createGoodRecordDefinitionWithRegexDefaultNotNeeded() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(4)"); fieldDefinitionConfiguration.setRegex("(?<=ant )(.*)(?= over)"); fieldDefinitionConfiguration.setDefaultValue("crawled"); return createRecord("test", fieldDefinitionConfiguration); }
private Path createGoodRecordDefinitionWithRegexRequired() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(3)"); fieldDefinitionConfiguration.setRegex("(?<=brown )(.*)(?= jumped)"); fieldDefinitionConfiguration.setRequired(true); return createRecord("test", fieldDefinitionConfiguration); }
private TemplateRecordConfiguration createRepeatQuoteRecord2() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("quote2"); record.setOrder(2); record.setPrecedingPath("Document > Link"); record.setFollowingPath("Document > Link:nth-of-type(2)"); record.setKind(Kind.NAMED); TemplateFieldConfiguration field = new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(2)"); field.setRepeat(true); record.setFieldPaths(ImmutableList.of(field)); return record; }
private Path createDefaultRecordDefinition() throws IOException { List<TemplateFieldConfiguration> fields = new ArrayList<>(); fields.add(new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(2)")); Path definitionFile = Files.createTempFile( tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(), ".yml"); TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration(fields, 0); YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition)); return definitionFile; }
private Path createRecordDefinitionWithRequiredField() throws IOException { TemplateFieldConfiguration notRequiredField = new TemplateFieldConfiguration("optional", "Paragraph:nth-of-type(3)"); TemplateFieldConfiguration requiredField = new TemplateFieldConfiguration("required", "Paragraph:nth-of-type(2)"); requiredField.setRequired(true); return createRecord("record", notRequiredField, requiredField); }
private Path createGoodRecordDefinitionWithRegex() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(2)"); fieldDefinitionConfiguration.setRegex("(?<=brown )(.*)(?= jumped)"); return createRecord("test", fieldDefinitionConfiguration); }
private Path createGoodRecordDefinitionWithDefaultAndMissing() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(5)"); fieldDefinitionConfiguration.setDefaultValue("default value"); List<TemplateFieldConfiguration> fields = new ArrayList<>(); fields.add(fieldDefinitionConfiguration); Path definitionFile = Files.createTempFile( tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(), ".yml"); TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration(fields, 0); YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition)); return definitionFile; }
protected void assertDefaultRecord(List<TemplateRecordConfiguration> definitions) { TemplateRecordConfiguration defaultRecord = definitions .stream() .filter(p -> p.getKind().equals(Kind.DEFAULT)) .collect(Collectors.toList()) .get(0); assertEquals(null, defaultRecord.getName()); assertEquals(1, defaultRecord.getFields().size()); TemplateFieldConfiguration field = defaultRecord.getFields().get(0); assertEquals("noRecordField", field.getName()); assertEquals("Paragraph:nth-of-type(1)", field.getPath()); }
private Path createGoodRecordDefinitionWithRegexRequiredAndMissing() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(3)"); fieldDefinitionConfiguration.setRegex("(?<=white )(.*)(?= jumped)"); fieldDefinitionConfiguration.setRequired(true); return createRecord("test", fieldDefinitionConfiguration); }
private TemplateRecordConfiguration createRepeatQuoteRecord1() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("quote1"); record.setOrder(1); record.setPrecedingPath(""); record.setFollowingPath("Document > Link"); record.setKind(Kind.NAMED); TemplateFieldConfiguration field = new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(1)"); field.setRepeat(true); record.setFieldPaths(ImmutableList.of(field)); return record; }
private Path createGoodRecordDefinition() throws IOException { return createRecord( "test", new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(2)")); }
private Path createGoodRecordDefinitionRequiredMissing() throws IOException { List<TemplateFieldConfiguration> fields = new ArrayList<>(); TemplateFieldConfiguration field = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(5)"); field.setRequired(true); fields.add(field); Path definitionFile = Files.createTempFile( tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(), ".yml"); TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration(fields, 0); YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition)); return definitionFile; }
protected TemplateRecordConfiguration assertNamedRecord( List<TemplateRecordConfiguration> definitions) { TemplateRecordConfiguration record = definitions .stream() .filter(p -> p.getKind().equals(Kind.NAMED) && p.getName().equals("record1")) .collect(Collectors.toList()) .get(0); assertEquals(Kind.NAMED, record.getKind()); assertEquals(2, record.getFields().size()); for (TemplateFieldConfiguration field : record.getFields()) { String name = field.getName(); if (name.equals("field1")) { assertEquals("Paragraph:nth-of-type(2)", field.getPath()); } else if (field.getName().equals("field2")) { assertEquals("Paragraph:nth-of-type(3)", field.getPath()); } else { fail("field not expected: " + name); } } assertEquals("Paragraph:nth-of-type(1)", record.getPrecedingPath()); assertEquals("Paragraph:nth-of-type(4)", record.getFollowingPath()); return record; }
private TemplateRecordConfiguration createMissingRepeatQuoteRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("missing"); record.setOrder(3); record.setPrecedingPath("Document > Link:nth-of-type(2)"); record.setFollowingPath("Document > Link:nth-of-type(3)"); record.setKind(Kind.NAMED); TemplateFieldConfiguration field = new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(3)"); field.setRepeat(true); record.setFieldPaths(ImmutableList.of(field)); return record; }
private Path createGoodRecordDefinitionWithRegexDefaultNeeded() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(4)"); fieldDefinitionConfiguration.setRegex("(?<=white )(.*)(?= jumped)"); fieldDefinitionConfiguration.setDefaultValue("horse"); return createRecord("test", fieldDefinitionConfiguration); }
private Path createNoFollowingRecordDefinition() throws IOException { List<TemplateFieldConfiguration> fields = new ArrayList<>(); fields.add(new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(6)")); Path definitionFile = Files.createTempFile( tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(), ".yml"); String precedingPath = "Paragraph:nth-of-type(5)"; String followingPath = ""; TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration("test", precedingPath, followingPath, fields, 0); YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition)); return definitionFile; } }
private TemplateRecordConfiguration createRowRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("row"); record.setOrder(5); record.setPrecedingPath("Document > Quotation:nth-of-type(4)"); record.setFollowingPath("Document > Section"); record.setCoveredPaths(ImmutableList.of("Document > Table")); record.setMinimalRepeat("Document > Table > TableBody > TableRow"); record.setRepeat(true); record.setKind(Kind.NAMED); TemplateFieldConfiguration cell1 = new TemplateFieldConfiguration( "cell1", "Document > Table > TableBody > TableRow > TableCell:nth-of-type(1) > Paragraph"); cell1.setRepeat(true); TemplateFieldConfiguration cell2 = new TemplateFieldConfiguration( "cell2", "Document > Table > TableBody > TableRow > TableCell:nth-of-type(2)> Paragraph"); cell2.setRepeat(true); record.setFieldPaths(ImmutableList.of(cell1, cell2)); return record; }
private Path createBadRecordDefinition() throws IOException { return createRecord("test", new TemplateFieldConfiguration("field", "Table")); }
private TemplateRecordConfiguration createSectionRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("section"); record.setOrder(6); record.setPrecedingPath("Document > Table > TableBody > TableRow > TableCell:nth-of-type(2)"); record.setFollowingPath(""); record.setCoveredPaths( ImmutableList.of("Document > Section > Heading", "Document > Section > Paragraph")); record.setMinimalRepeat(""); record.setRepeat(true); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("heading", "Document > Section > Heading"), new TemplateFieldConfiguration("para", "Document > Section > Paragraph")); record.setFieldPaths(fields); return record; }