private DataBaseConnector getDataBaseConnector(String costosysConfig) throws AnalysisEngineProcessException { DataBaseConnector dbc; try { dbc = new DataBaseConnector(costosysConfig); } catch (FileNotFoundException e) { throw new AnalysisEngineProcessException(e); } return dbc; }
private void checkTableDefinition(String annotationTableName, String schemaAnnotation) throws ResourceInitializationException { try { dbc.checkTableDefinition(annotationTableName, schemaAnnotation); } catch (TableSchemaMismatchException e) { throw new ResourceInitializationException(e); } }
private void createNamespaceTable(DataBaseConnector dbc) { if (!dbc.tableExists(dbc.getActiveDataPGSchema() + "." + XMI_NS_TABLE)) { try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()){ conn.setAutoCommit(true); Statement stmt = conn.createStatement(); String sql = String.format("CREATE TABLE %s (%s text PRIMARY KEY, %s text)", dbc.getActiveDataPGSchema() + "." + XMI_NS_TABLE, PREFIX, NS_URI); stmt.execute(sql); } catch (SQLException e) { e.printStackTrace(); SQLException ne = e.getNextException(); if (null != ne) ne.printStackTrace(); } } }
private void checkTableExists() throws ResourceInitializationException { // Check whether the table we are supposed to read from actually exists. if (!dbc.withConnectionQueryBoolean(c -> c.tableExists(tableName))) { throw new ResourceInitializationException( new IllegalArgumentException("The configured table \"" + tableName + "\" does not exist.")); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // Check whether a subset table name or a data table name was given. if (readDataTable) { log.debug("Reading from data table {}", tableName); dataTableDocumentIds = dbc.query(tableName, Arrays.asList(dbc.getFieldConfiguration(dbc.getActiveTableSchema()).getPrimaryKey())); hasNext = dataTableDocumentIds.hasNext(); } else { log.debug("Reading from subset table {}", tableName); hasNext = dbc.withConnectionQueryBoolean(c -> c.hasUnfetchedRows(tableName)); } }
dbc = new DataBaseConnector(costosysConfig); } catch (FileNotFoundException e) { throw new ResourceInitializationException(e); try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) { List<Map<String, String>> primaryKeyFields = dbc.getActiveTableFieldConfiguration().getPrimaryKeyFields().collect(Collectors.toList()); if (rowBatch.getReadsBaseXmiDocument()) { determineDataInGzipFormat(tableName); FieldConfig xmiDocumentTableSchema = dbc.addXmiTextFieldConfiguration(primaryKeyFields, doGzip); dbc.setActiveTableSchema(xmiDocumentTableSchema.getName()); final String[] tables = rowBatch.getTables().toStringArray(); String[] additionalTables = Arrays.copyOfRange(tables, 1, tables.length); if (additionalTables != null && additionalTables.length > 0) { FieldConfig xmiAnnotationTableSchema = dbc.addXmiAnnotationFieldConfiguration(primaryKeyFields, doGzip); rowBatch.setTableSchemas(1, xmiAnnotationTableSchema.getName()); FieldConfig xmiDocumentFieldConfiguration = dbc.addXmiDocumentFieldConfiguration(primaryKeyFields, doGzip); dbc.setActiveTableSchema(xmiDocumentFieldConfiguration.getName());
dbc = new DataBaseConnector(dbcConfigPath); } catch (FileNotFoundException e1) { throw new ResourceInitializationException(e1); writeBatchSize = Optional.ofNullable((Integer) aContext.getConfigParameterValue(PARAM_WRITE_BATCH_SIZE)).orElse(50); componentDbName = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_COMPONENT_DB_NAME)).orElse(getClass().getSimpleName()); annotationStorageSchema = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_ANNO_STORAGE_PG_SCHEMA)).orElse(dbc.getActiveDataPGSchema()); schemaDocument = dbc.addXmiDocumentFieldConfiguration(dbc.getActiveTableFieldConfiguration().getPrimaryKeyFields().collect(Collectors.toList()), doGzip).getName(); } else { schemaDocument = dbc.addXmiTextFieldConfiguration(dbc.getActiveTableFieldConfiguration().getPrimaryKeyFields().collect(Collectors.toList()), doGzip).getName(); schemaAnnotation = dbc.addXmiAnnotationFieldConfiguration(dbc.getActiveTableFieldConfiguration().getPrimaryKeyFields().collect(Collectors.toList()), doGzip).getName(); annotations = (String[]) aContext.getConfigParameterValue(PARAM_ANNOS_TO_STORE); if (null != annotations) dbc.reserveConnection(); try { annotationTableManager = new AnnotationTableManager(dbc, docTableParamValue, annotationsToStore, schemaDocument, for (String annotation : annotationsToStore) { String annotationTableName = annotationTableManager.convertAnnotationTypeToTableName(annotation, storeAll); if (dbc.tableExists(annotationTableName)) checkTableDefinition(annotationTableName, schemaAnnotation); serializedCASes.put(annotationTableName, new ArrayList<>()); if (dbc.tableExists(effectiveDocTableName)) checkTableDefinition(effectiveDocTableName, schemaDocument); annotationInserter = new XmiDataInserter(annotationsToStoreTableNames, effectiveDocTableName, dbc,
additionalTableNames = (String[]) getConfigParameterValue(PARAM_ADDITIONAL_TABLES); additionalTableSchemas = (String[]) context.getConfigParameterValue(PARAM_ADDITIONAL_TABLE_SCHEMAS); additionalTablesPGSchema = Optional.ofNullable((String) getConfigParameterValue(PARAM_ADDITONAL_TABLES_STORAGE_PG_SCHEMA)).orElse(dbc.getActiveDataPGSchema()); checkAdditionalTableParameters(additionalTableNames, additionalTableSchemas); determineDataTable(); + " if the data table is referenced by a subset, for which the name has to be" + " given in the Table parameter."); dbc.checkTableDefinition(tableName); Integer tableRows = dbc.withConnectionQueryInteger(c -> c.countRowsOfDataTable(tableName, whereCondition)); totalDocumentCount = limitParameter != null ? Math.min(tableRows, limitParameter) : tableRows; hasNext = !dbc.withConnectionQueryBoolean(c -> c.isEmpty(tableName)); tables = new String[]{tableName}; schemas = new String[]{dbc.getActiveTableSchema()}; } else { if (batchSize == 0) log.warn("Batch size of retrieved documents is set to 0. Nothing will be returned."); try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) { if (resetTable) dbc.resetSubset(tableName); Integer unprocessedDocs = dbc.countUnprocessed(tableName); totalDocumentCount = limitParameter != null ? Math.min(unprocessedDocs, limitParameter) : unprocessedDocs; dataTable = dbc.getReferencedTable(tableName); hasNext = dbc.hasUnfetchedRows(tableName); log.debug("Checking if the subset table \"{}\" has unfetched rows. Result: {}", tableName, hasNext); dbc.checkTableSchemaCompatibility(dbc.getActiveTableSchema(), additionalTableSchemas); ImmutablePair<Integer, String[]> additionalTableNumAndNames = checkAndAdjustAdditionalTables(dbc, dataTable, additionalTableNames);
effectiveTableName = dbDocumentTableName; try { if (!dbc.tableExists(effectiveTableName)) { log.info("Creating table '{}' with schema '{}' (columns: {}).", effectiveTableName, schema, dbc.getFieldConfiguration(schema).getColumns()); String pgSchema = getTableSchema(effectiveTableName); if (!dbc.schemaExists(pgSchema)) dbc.createSchema(pgSchema); if (storeAll) { dbc.createTable(effectiveTableName, schema, "Created by " + XMIDBWriter.class.getName() + " on " + new Date()); } else { if (!effectiveTableName.equals(dbDocumentTableName)) { dbc.createTable(effectiveTableName, dbDocumentTableName, schema, "Created by " + XMIDBWriter.class.getName() + " on " + new Date() + " to store annotations of type\"" + tableName + "\" for the documents stored in table \"" + dbDocumentTableName + "\"."); } else { dbc.createTable(effectiveTableName, schema, "Created by " + XMIDBWriter.class.getName() + " on " + new Date() + " to store the base documents without" + " linguistic or semantic annotations." + " Those are stored in the tables named" dbc.checkTableDefinition(effectiveTableName, tableSchemaNameToCheck);
private void determineDataInGzipFormat(String table) throws ResourceInitializationException { doGzip = true; dataTable = dbc.getNextOrThisDataTable(table); log.debug("Fetching a single row from data table {} in order to determine whether data is in GZIP format", dataTable); try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()){ ResultSet rs = conn.createStatement().executeQuery(String.format("SELECT xmi FROM %s LIMIT 1", dataTable)); while (rs.next()) { byte[] xmiData = rs.getBytes("xmi"); try (GZIPInputStream gzis = new GZIPInputStream(new ByteArrayInputStream(xmiData))) { gzis.read(); } catch (IOException e) { log.debug("Attempt to read XMI data in GZIP format failed. Assuming non-gzipped XMI data. Expected exception:", e); doGzip = false; } } } catch (SQLException e) { if (e.getMessage().contains("does not exist")) log.error("An exception occurred when trying to read the xmi column of the data table \"{}\". It seems the table does not contain XMI data and this is invalid to use with this reader.", dataTable); throw new ResourceInitializationException(e); } }
/** * Adds the extract string <tt>tablename</tt> to the list of annotation * table names. * * @param tablename */ void addAnnotationTableToList(String tablename) { try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()){ conn.setAutoCommit(true); Statement stmt = conn.createStatement(); String template = "INSERT INTO %s VALUES('%s')"; String sql = String.format(template, dbc.getActiveDataPGSchema() + "." + ANNOTATION_LIST_TABLE, tablename); stmt.execute(sql); } catch (PSQLException e) { log.debug("Tried to add already existing annotation table to annotation list: \"{}\", ignoring.", tablename); } catch (SQLException e) { e.printStackTrace(); SQLException ne = e.getNextException(); if (null != ne) ne.printStackTrace(); } }
String dataTable = null; try { dataTable = dbc.getNextOrThisDataTable(tableName); dbc.checkTableDefinition(dataTable, xmiDocumentTableSchema.getName()); } catch (CoStoSysSQLRuntimeException e) { throw new ResourceInitializationException(e); try { String error; if (dbc.isDataTable(tableName)) error = String.format("The table \"%s\" specified to read for the %s does not match the " + "XMI text storage data schema. Either the DoGzip parameter does not match the setting that " +
try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) { ids = dbc.retrieveAndMark(tableName, getReaderComponentName(), hostName, pid, limit, selectionOrder); " file {} does not match the columns in the subset table {}: {}", dbc.getActiveTableSchema(), costosysConfig, tableName, e.getMessage()); throw new IllegalArgumentException(e); if (!joinTables) { log.trace("Fetching data from the data table {} without additional tables.", dataTable); documents = dbc.retrieveColumnsByTableSchema(ids, dataTable); } else { log.trace("Fetching data by joining tables {}. The used table schemas are {}.", tables, schemas); documents = dbc.retrieveColumnsByTableSchema(ids, tables, schemas); documents = dbc.queryWithTime(ids, dataTable, dataTimestamp);
try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) { nsAndXmiVersion = getNamespaceMap(); numDataRetrievedDataFields = dbc.getFieldConfiguration().getColumnsToRetrieve().length; log.info("{}: {}", PARAM_XERCES_ATTRIBUTE_BUFFER_SIZE, xercesAttributeBufferSize); log.info("Data columns set for retrieval: {}", Arrays.toString(dbc.getFieldConfiguration().getColumnsToRetrieve()));
try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) { log.trace("Using connection {} to retrieveAndMark", ignored.getConnection()); ids = dbc.retrieveAndMark(tableName, getClass().getSimpleName(), hostName, pid, limit, selectionOrder); } catch (TableSchemaMismatchException e) { log.error("Table schema mismatch: The active table schema {} specified in the CoStoSys configuration" + " file {} does not match the columns in the subset table {}: {}", dbc.getActiveTableSchema(), costosysConfig, tableName, e.getMessage()); throw new IllegalArgumentException(e);
Map<String, String> field = dbc.getFieldConfiguration(tableSchemaName).getFields().get(1); String xmiFieldType = field.get(JulieXMLConstants.TYPE); if (doGzip) {
String uri = null; if (notFound.size() > 0) { try (CoStoSysConnection conn = dbc.reserveConnection()) { conn.setAutoCommit(true); Statement stmt = conn.createStatement(); String sql = String.format("SELECT %s FROM %s", PREFIX, dbc.getActiveDataPGSchema() + "." + XMI_NS_TABLE); ResultSet rs = stmt.executeQuery(String.format(sql)); while (rs.next()) { prefix = nsEntry.getKey(); uri = nsEntry.getValue(); sql = String.format(template, dbc.getActiveDataPGSchema() + "." + XMI_NS_TABLE, prefix, uri); stmt.execute(sql);
try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()){ conn.setAutoCommit(false); for (String tableName : serializedCASes.keySet()) { serializedCASes.get(tableName).size(), tableName); if (storeAll) { dbc.updateFromRowIterator(iterator, tableName, false, schemaDocument); } else { dbc.updateFromRowIterator(iterator, tableName, false, tableName.equals(effectiveDocTableName) ? schemaDocument : schemaAnnotation); serializedCASes.get(tableName).size(), tableName); if (storeAll) { dbc.importFromRowIterator(iterator, tableName, false, schemaDocument); } else { dbc.importFromRowIterator(iterator, tableName, false, tableName.equals(effectiveDocTableName) ? schemaDocument : schemaAnnotation);
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); driver = (String) getConfigParameterValue(PARAM_DB_DRIVER); batchSize = Optional.ofNullable((Integer) getConfigParameterValue(PARAM_BATCH_SIZE)).orElse(Integer.parseInt(DEFAULT_BATCH_SIZE)); tableName = (String) getConfigParameterValue(PARAM_TABLE); selectionOrder = (String) getConfigParameterValue(PARAM_SELECTION_ORDER); whereCondition = (String) getConfigParameterValue(PARAM_WHERE_CONDITION); limitParameter = (Integer) getConfigParameterValue(PARAM_LIMIT); costosysConfig = (String) getConfigParameterValue(PARAM_COSTOSYS_CONFIG_NAME); checkParameters(); try { // It might happen that a subclass has already initialized the DBC if (dbc == null) dbc = new DataBaseConnector(costosysConfig); dbc.setQueryBatchSize(batchSize); checkTableExists(); logConfigurationState(); } catch (FileNotFoundException e) { throw new ResourceInitializationException(e); } }
/** * If <code>documentTableParameter</code> is not schema qualified, prependns the active data postgres schema. * * @param documentTableParameter The document table, as given in the component parameter. * @return The effective document table name. */ String getEffectiveDocumentTableName(String documentTableParameter) { // If the table is already schema qualified, accept it. if (documentTableParameter.contains(".")) return documentTableParameter; return dbc.getActiveDataPGSchema() + "." + documentTableParameter; }