public static KAFDocument readDocument(@Nullable final Path path) throws IOException { final KAFDocument document; if (path == null) { document = KAFDocument.createFromStream(IO.utf8Reader(IO.buffer(System.in))); document.getPublic().publicId = ""; } else { try (BufferedReader reader = Files.newBufferedReader(path)) { document = KAFDocument.createFromStream(reader); document.getPublic().publicId = path.toString(); } } return document; }
public void generate(final KAFDocument document, @Nullable final Iterable<Integer> sentenceIDs, final RDFHandler handler) throws RDFHandlerException { final boolean[] ids = new boolean[document.getNumSentences() + 1]; if (sentenceIDs == null) { Arrays.fill(ids, true); } else { for (final Integer sentenceID : sentenceIDs) { ids[sentenceID] = true; } } final String baseURI = document.getPublic().uri; new Extractor(baseURI, handler, document, ids).run(); }
String title = document.getFileDesc().title; String text = document.getRawText().substring(title.length() + 1).trim(); String id = document.getPublic().publicId;
public Extractor(final String baseURI, final RDFHandler handler, final KAFDocument document, final boolean[] sentenceIDs) { this.baseURI = baseURI; this.handler = handler; this.statements = QuadModel.create(); this.mintedURIs = HashBiMap.create(); this.document = document; this.documentURI = FACTORY.createURI(Util.cleanIRI(document.getPublic().uri)); this.sentenceIDs = sentenceIDs; final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } this.documentText = builder.toString(); this.annotations = Maps.newHashMap(); }
@SuppressWarnings("deprecation") Extraction(final QuadModel model, final KAFDocument document) { // Reconstruct the document text using term offsets to avoid alignment issues final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } // Initialize the object this.model = model; this.document = document; this.vf = model.getValueFactory(); this.documentText = builder.toString(); this.documentURI = this.vf.createURI(Util.cleanIRI(document.getPublic().uri)); this.mentions = Maps.newHashMap(); }
private static void createDocument(ArrayList<String> list, File folder, Integer index) { StringBuffer buffer = new StringBuffer(); for (String line:list) { buffer.append(line); buffer.append("\n"); } String text = buffer.toString(); String nafFileName = index + ".naf"; File nafFile = new File(folder.getAbsolutePath() + File.separator + nafFileName); String documentURI = NAMESPACE + nafFileName; final KAFDocument document = new KAFDocument("en", "v3"); document.setRawText(text); document.createPublic(); document.getPublic().publicId = new URIImpl(documentURI).getLocalName(); document.getPublic().uri = documentURI; document.createFileDesc(); document.getFileDesc().filename = nafFileName; document.getFileDesc().title = "-"; document.save(nafFile.getAbsolutePath()); }
documentModel.put("title", doc.getPublic().uri); documentModel.put("sentences", sentencesModel); documentModel.put("metadata", (Callable<String>) () -> { return renderProperties(new StringBuilder(), model, // new URIImpl(doc.getPublic().uri), true).toString(); }); documentModel.put("mentions", (Callable<String>) () -> {
documentModel.put("title", doc.getPublic().uri); documentModel.put("sentences", sentencesModel); documentModel.put("metadata", new Renderable(doc, model, -1, times, Renderable.METADATA));
docName = document.getPublic().publicId; MDC.put("context", docName); filter.filter(document);
String uri = document.getPublic().uri;
} else if (this.type == METADATA) { renderProperties(builder, this.model, new URIImpl(this.document.getPublic().uri), true, KS.HAS_MENTION); } else if (this.type == MENTIONS) { renderMentionsTable(builder, this.model);
emitMeta(nafURI, DCTERMS.IDENTIFIER, this.document.getPublic().publicId);
LOGGER.debug("== Filtering {} ==", document.getPublic().uri);
this.model.add(nafURI, KS.VERSION, this.vf.createLiteral(this.document.getVersion())); this.model.add(nafURI, DCTERMS.IDENTIFIER, this.vf.createLiteral(this.document.getPublic().publicId));