private Path createDefaultRecordDefinition() throws IOException { List<TemplateFieldConfiguration> fields = new ArrayList<>(); fields.add(new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(2)")); Path definitionFile = Files.createTempFile( tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(), ".yml"); TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration(fields, 0); YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition)); return definitionFile; }
private Path createRecordDefinitionWithRequiredField() throws IOException { TemplateFieldConfiguration notRequiredField = new TemplateFieldConfiguration("optional", "Paragraph:nth-of-type(3)"); TemplateFieldConfiguration requiredField = new TemplateFieldConfiguration("required", "Paragraph:nth-of-type(2)"); requiredField.setRequired(true); return createRecord("record", notRequiredField, requiredField); }
private Path createGoodRecordDefinition() throws IOException { return createRecord( "test", new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(2)")); }
private Path createNoFollowingRecordDefinition() throws IOException { List<TemplateFieldConfiguration> fields = new ArrayList<>(); fields.add(new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(6)")); Path definitionFile = Files.createTempFile( tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(), ".yml"); String precedingPath = "Paragraph:nth-of-type(5)"; String followingPath = ""; TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration("test", precedingPath, followingPath, fields, 0); YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition)); return definitionFile; } }
private Path createBadRecordDefinition() throws IOException { return createRecord("test", new TemplateFieldConfiguration("field", "Table")); }
private Path createGoodRecordDefinitionRequiredMissing() throws IOException { List<TemplateFieldConfiguration> fields = new ArrayList<>(); TemplateFieldConfiguration field = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(5)"); field.setRequired(true); fields.add(field); Path definitionFile = Files.createTempFile( tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(), ".yml"); TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration(fields, 0); YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition)); return definitionFile; }
private Path createGoodRecordDefinitionWithDefaultAndMissing() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(5)"); fieldDefinitionConfiguration.setDefaultValue("default value"); List<TemplateFieldConfiguration> fields = new ArrayList<>(); fields.add(fieldDefinitionConfiguration); Path definitionFile = Files.createTempFile( tempDirectory, AbstractRecordAnnotatorTest.class.getSimpleName(), ".yml"); TemplateRecordConfiguration recordDefinition = new TemplateRecordConfiguration(fields, 0); YAMLMAPPER.writeValue(definitionFile.toFile(), singleton(recordDefinition)); return definitionFile; }
private Path createGoodRecordDefinitionWithRegex() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(2)"); fieldDefinitionConfiguration.setRegex("(?<=brown )(.*)(?= jumped)"); return createRecord("test", fieldDefinitionConfiguration); }
private Path createGoodRecordDefinitionWithRegexRequired() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(3)"); fieldDefinitionConfiguration.setRegex("(?<=brown )(.*)(?= jumped)"); fieldDefinitionConfiguration.setRequired(true); return createRecord("test", fieldDefinitionConfiguration); }
private Path createGoodRecordDefinitionWithRegexDefaultNotNeeded() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(4)"); fieldDefinitionConfiguration.setRegex("(?<=ant )(.*)(?= over)"); fieldDefinitionConfiguration.setDefaultValue("crawled"); return createRecord("test", fieldDefinitionConfiguration); }
private Path createGoodRecordDefinitionWithRegexDefaultNeeded() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(4)"); fieldDefinitionConfiguration.setRegex("(?<=white )(.*)(?= jumped)"); fieldDefinitionConfiguration.setDefaultValue("horse"); return createRecord("test", fieldDefinitionConfiguration); }
private Path createGoodRecordDefinitionWithRegexRequiredAndMissing() throws IOException { TemplateFieldConfiguration fieldDefinitionConfiguration = new TemplateFieldConfiguration("field", "Paragraph:nth-of-type(3)"); fieldDefinitionConfiguration.setRegex("(?<=white )(.*)(?= jumped)"); fieldDefinitionConfiguration.setRequired(true); return createRecord("test", fieldDefinitionConfiguration); }
/** * Make field from definition and path * * @param templateFieldDefinition the field definition * @param fields the fields * @return the configuration */ private TemplateFieldConfiguration makeField( TemplateFieldDefinition templateFieldDefinition, String fieldPath) { TemplateFieldConfiguration field = new TemplateFieldConfiguration(templateFieldDefinition.getName(), fieldPath); field.setRequired(templateFieldDefinition.getRequired()); field.setRepeat(templateFieldDefinition.getRepeat()); field.setRegex(templateFieldDefinition.getRegex()); return field; }
private TemplateRecordConfiguration createRepeatQuoteRecord2() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("quote2"); record.setOrder(2); record.setPrecedingPath("Document > Link"); record.setFollowingPath("Document > Link:nth-of-type(2)"); record.setKind(Kind.NAMED); TemplateFieldConfiguration field = new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(2)"); field.setRepeat(true); record.setFieldPaths(ImmutableList.of(field)); return record; }
private TemplateRecordConfiguration createRepeatQuoteRecord1() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("quote1"); record.setOrder(1); record.setPrecedingPath(""); record.setFollowingPath("Document > Link"); record.setKind(Kind.NAMED); TemplateFieldConfiguration field = new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(1)"); field.setRepeat(true); record.setFieldPaths(ImmutableList.of(field)); return record; }
private TemplateRecordConfiguration createMissingRepeatQuoteRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("missing"); record.setOrder(3); record.setPrecedingPath("Document > Link:nth-of-type(2)"); record.setFollowingPath("Document > Link:nth-of-type(3)"); record.setKind(Kind.NAMED); TemplateFieldConfiguration field = new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(3)"); field.setRepeat(true); record.setFieldPaths(ImmutableList.of(field)); return record; }
private TemplateRecordConfiguration createSingleQuoteRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("single"); record.setOrder(4); record.setPrecedingPath("Document > Link:nth-of-type(3)"); record.setFollowingPath("Document > Table"); record.setRepeat(false); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(4)")); record.setFieldPaths(fields); return record; }
private TemplateRecordConfiguration createMissingRepeatQuoteRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("missing"); record.setOrder(3); record.setPrecedingPath("Document > Link:nth-of-type(2)"); record.setFollowingPath("Document > Link:nth-of-type(3)"); record.setCoveredPaths(ImmutableList.of("Document > Quotation")); record.setMinimalRepeat("Document > Quotation"); record.setRepeat(true); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(3)")); record.setFieldPaths(fields); return record; }
private TemplateRecordConfiguration createSingleQuoteRecord() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("single"); record.setOrder(4); record.setPrecedingPath("Document > Link:nth-of-type(3)"); record.setFollowingPath("Document > Table"); record.setRepeat(false); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(4)")); record.setFieldPaths(fields); return record; }
private TemplateRecordConfiguration createRepeatQuoteRecord1() { TemplateRecordConfiguration record = new TemplateRecordConfiguration(); record.setName("quote1"); record.setOrder(1); record.setPrecedingPath(""); record.setFollowingPath("Document > Link"); record.setCoveredPaths(ImmutableList.of("Document > Quotation")); record.setMinimalRepeat("Document > Quotation"); record.setRepeat(true); record.setKind(Kind.NAMED); List<TemplateFieldConfiguration> fields = ImmutableList.of( new TemplateFieldConfiguration("quote", "Document > Quotation:nth-of-type(1)")); record.setFieldPaths(fields); return record; }