@Override public Dataset<T> alias(final String alias) { final boolean userTriggered = initializeFunction(alias); final Dataset<T> result = from(super.alias(alias)); this.setIsUserTriggered(userTriggered); return result; }
@Override public Dataset<T> alias(final scala.Symbol alias) { final boolean userTriggered = initializeFunction(alias); final Dataset<T> result = from(super.alias(alias)); this.setIsUserTriggered(userTriggered); return result; }
/** * Returns all concept maps that are disjoint with concept maps stored in the default database and * adds them to our collection. The directory may be anything readable from a Spark path, * including local filesystems, HDFS, S3, or others. * * @param path a path from which disjoint concept maps will be loaded * @param database the database to check concept maps against * @return an instance of ConceptMaps that includes content from that directory that is disjoint * with content already contained in the default database. */ public C withDisjointMapsFromDirectory(String path, String database) { Dataset<UrlAndVersion> currentMembers = this.spark .sql("SELECT url, version FROM " + database + "." + CONCEPT_MAP_TABLE) .as(URL_AND_VERSION_ENCODER) .alias("current"); Dataset<T> maps = conceptMapsDatasetFromDirectory(path) .alias("new") .join(currentMembers, col("new.url").equalTo(col("current.url")) .and(col("new.version").equalTo(col("current.version"))), "leftanti") .as(conceptMapEncoder); return withConceptMaps(maps); }
/** * Returns all concept maps that are disjoint with concept maps stored in the default database and * adds them to our collection. The directory may be anything readable from a Spark path, * including local filesystems, HDFS, S3, or others. * * @param path a path from which disjoint concept maps will be loaded * @param database the database to check concept maps against * @return an instance of ConceptMaps that includes content from that directory that is disjoint * with content already contained in the default database. */ public C withDisjointMapsFromDirectory(String path, String database) { Dataset<UrlAndVersion> currentMembers = this.spark .sql("SELECT url, version FROM " + database + "." + CONCEPT_MAP_TABLE) .as(URL_AND_VERSION_ENCODER) .alias("current"); Dataset<T> maps = conceptMapsDatasetFromDirectory(path) .alias("new") .join(currentMembers, col("new.url").equalTo(col("current.url")) .and(col("new.version").equalTo(col("current.version"))), "leftanti") .as(conceptMapEncoder); return withConceptMaps(maps); }
/** * Returns all value sets that are disjoint with value sets stored in the given database and * adds them to our collection. The directory may be anything readable from a Spark path, * including local filesystems, HDFS, S3, or others. * * @param path a path from which disjoint value sets will be loaded * @param database the database to check value sets against * @return an instance of ValueSets that includes content from that directory that is disjoint * with content already contained in the given database. */ public C withDisjointValueSetsFromDirectory(String path, String database) { Dataset<UrlAndVersion> currentMembers = this.spark.table(database + "." + VALUE_SETS_TABLE) .select("url", "version") .distinct() .as(URL_AND_VERSION_ENCODER) .alias("current"); Dataset<T> valueSets = valueSetDatasetFromDirectory(path) .alias("new") .join(currentMembers, col("new.url").equalTo(col("current.url")) .and(col("new.version").equalTo(col("current.version"))), "leftanti") .as(valueSetEncoder); return withValueSets(valueSets); }
/** * Returns all value sets that are disjoint with value sets stored in the given database and * adds them to our collection. The directory may be anything readable from a Spark path, * including local filesystems, HDFS, S3, or others. * * @param path a path from which disjoint value sets will be loaded * @param database the database to check value sets against * @return an instance of ValueSets that includes content from that directory that is disjoint * with content already contained in the given database. */ public C withDisjointValueSetsFromDirectory(String path, String database) { Dataset<UrlAndVersion> currentMembers = this.spark.table(database + "." + VALUE_SETS_TABLE) .select("url", "version") .distinct() .as(URL_AND_VERSION_ENCODER) .alias("current"); Dataset<T> valueSets = valueSetDatasetFromDirectory(path) .alias("new") .join(currentMembers, col("new.url").equalTo(col("current.url")) .and(col("new.version").equalTo(col("current.version"))), "leftanti") .as(valueSetEncoder); return withValueSets(valueSets); }