/** * Sets a language filter. If given, all data will be preprocessed to * contain only data for the given languages. * * @see DocumentDataFilter#setLanguageFilter(Set) * @param languageFilter * set of language codes that should be retained (can be empty) */ public void setLanguageFilter(Set<String> languageFilter) { this.filter.setLanguageFilter(languageFilter); }
/** * Sets a site link filter. If given, all data will be preprocessed to * contain only data for the given site keys. * * @see DocumentDataFilter#setSiteLinkFilter(Set) * @param siteLinkFilter * set of siteLinks that should be retained (can be empty) */ public void setSiteLinkFilter(Set<String> siteLinkFilter) { this.filter.setSiteLinkFilter(siteLinkFilter); }
/** * Sets the value for the API's "props" parameter based on the current * settings. * * @param properties * current setting of parameters */ private void setRequestProps(WbGetEntitiesActionData properties) { StringBuilder builder = new StringBuilder(); builder.append("info|datatype"); if (!this.filter.excludeAllLanguages()) { builder.append("|labels|aliases|descriptions"); } if (!this.filter.excludeAllProperties()) { builder.append("|claims"); } if (!this.filter.excludeAllSiteLinks()) { builder.append("|sitelinks"); } properties.props = builder.toString(); }
/** * Sets the value for the API's "languages" parameter based on the current * settings. * * @param properties * current setting of parameters */ private void setRequestLanguages(WbGetEntitiesActionData properties) { if (this.filter.excludeAllLanguages() || this.filter.getLanguageFilter() == null) { return; } properties.languages = ApiConnection.implodeObjects(this.filter .getLanguageFilter()); }
/** * Sets the value for the API's "sitefilter" parameter based on the current * settings. * * @param properties * current setting of parameters */ private void setRequestSitefilter(WbGetEntitiesActionData properties) { if (this.filter.excludeAllSiteLinks() || this.filter.getSiteLinkFilter() == null) { return; } properties.sitefilter = ApiConnection.implodeObjects(this.filter .getSiteLinkFilter()); }
/** * Wraps the given processor into a {@link EntityDocumentProcessorFilter} if * global filters are configured; otherwise just returns the processor * unchanged. * * @param processor * the processor to wrap */ private EntityDocumentProcessor filterEntityDocumentProcessor( EntityDocumentProcessor processor) { if (this.filter.getPropertyFilter() == null && this.filter.getSiteLinkFilter() == null && this.filter.getLanguageFilter() == null) { return processor; } else { return new EntityDocumentProcessorFilter( processor, this.filter); } }
/** * Constructor. Initializes various helper objects we use for the JSON * serialization, and opens the file that we want to write to. * * @throws IOException * if there is a problem opening the output file */ public JsonSerializationProcessor() throws IOException { // The filter is used to copy selected parts of the data. We use this // to remove some parts from the documents we serialize. DocumentDataFilter filter = new DocumentDataFilter(); // Only copy English labels, descriptions, and aliases: filter.setLanguageFilter(Collections.singleton("en")); // Only copy statements of some properties: Set<PropertyIdValue> propertyFilter = new HashSet<>(); propertyFilter.add(Datamodel.makeWikidataPropertyIdValue("P18")); // image propertyFilter.add(Datamodel.makeWikidataPropertyIdValue("P106")); // occupation propertyFilter.add(Datamodel.makeWikidataPropertyIdValue("P569")); // birthdate filter.setPropertyFilter(propertyFilter); // Do not copy any sitelinks: filter.setSiteLinkFilter(Collections.<String>emptySet()); this.datamodelFilter = new DatamodelFilter(new DataObjectFactoryImpl(), new DocumentDataFilter()); // The (compressed) file we write to. OutputStream outputStream = new GzipCompressorOutputStream( new BufferedOutputStream( ExampleHelpers .openExampleFileOuputStream(OUTPUT_FILE_NAME))); this.jsonSerializer = new JsonSerializer(outputStream); this.jsonSerializer.open(); }
.println("*** Fetching data using filters to reduce data volume:"); wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki")); wbdf.getFilter().setLanguageFilter(Collections.singleton("fr")); wbdf.getFilter().setPropertyFilter( Collections.<PropertyIdValue> emptySet()); EntityDocument q8 = wbdf.getEntityDocument("Q8");
throws MediaWikiApiErrorException, IOException { WikibaseDataFetcher wbdf = new WikibaseDataFetcher(connection, siteIri); wbdf.getFilter().excludeAllProperties(); wbdf.getFilter().setLanguageFilter(Collections.singleton("en"));
Datamodel.SITE_WIKIDATA); dataFetcher.getFilter().excludeAllLanguages(); dataFetcher.getFilter().excludeAllSiteLinks();
dataFetcher.getFilter().setLanguageFilter(Collections.emptySet()); dataFetcher.getFilter().setSiteLinkFilter(Collections.emptySet());
/** * Copies a list of {@link StatementGroup} objects. * * @param statementGroups * object to copy * @return the copied object */ private List<StatementGroup> copyStatementGroups(List<StatementGroup> statementGroups) { if (filter.excludeAllProperties()) { return Collections.emptyList(); } List<StatementGroup> result = new ArrayList<>(statementGroups.size()); for (StatementGroup statementGroup : statementGroups) { if (filter.includePropertyId(statementGroup.getProperty())) { result.add(copy(statementGroup)); } } return result; }
/** * Copies a map of {@link SiteLink} objects. * * @param siteLinks * object to copy * @return the copied object */ private Map<String, SiteLink> copySiteLinks(Map<String, SiteLink> siteLinks) { if (filter.excludeAllSiteLinks()) { return Collections.emptyMap(); } Map<String, SiteLink> result = new HashMap<>(siteLinks.size()); for (Entry<String, SiteLink> entry : siteLinks.entrySet()) { if (filter.includeSiteLink(entry.getKey())) { result.put(entry.getKey(), copy(entry.getValue())); } } return result; }
/** * Copies a collection of {@link MonolingualTextValue} objects * * @param monoLingualTextValues * object to copy * @return the copied object */ private List<MonolingualTextValue> copyMonoLingualTextValues(Collection<MonolingualTextValue> monoLingualTextValues) { if (filter.excludeAllLanguages()) { return Collections.emptyList(); } List<MonolingualTextValue> result = new ArrayList<>(monoLingualTextValues.size()); for (MonolingualTextValue mtv : monoLingualTextValues) { if (filter.includeLanguage(mtv.getLanguageCode())) { result.add(copy(mtv)); } } return result; }
/** * Sets a property filter. If given, all data will be preprocessed to * contain only statements for the given (main) properties. * * @see DocumentDataFilter#setPropertyFilter(Set) * @param propertyFilter * set of properties that should be retained (can be empty) */ public void setPropertyFilter(Set<PropertyIdValue> propertyFilter) { this.filter.setPropertyFilter(propertyFilter); }
/** * @deprecated Use {@link DatamodelFilter} * * Returns the (possibly empty) set of language codes that are used to * filter data while copying it, or null if no such filter is configured * (default). If not equal to null, only terms in the given language will be * copied. * * @return set of language codes to use for filtering */ @Deprecated public Set<String> getOptionLanguageFilter() { return filter.getLanguageFilter(); }
/** * @deprecated Use {@link DatamodelFilter} * * Returns the (possibly empty) set of site keys that are used to filter * {@link SiteLink} objects while copying data, or null if no such filter is * configured (default). If not equal to null, only site links for the given * sites will be copied. * * @return set of site keys to use for filtering */ @Deprecated public Set<String> getOptionSiteLinkFilter() { return filter.getSiteLinkFilter(); }
/** * @deprecated Use {@link DatamodelFilter} * * Returns the (possibly empty) set of {@link PropertyIdValue} objects that * are used to filter statements while copying data, or null if no such * filter is configured (default). If not equal to null, only statements * using the given properties will be copied. * * @return set of properties to use for filtering */ @Deprecated public Set<PropertyIdValue> getOptionPropertyFilter() { return filter.getPropertyFilter(); }
DocumentDataFilter documentDataFilter = new DocumentDataFilter(); documentDataFilter.setLanguageFilter(Collections.singleton("en")); propertyFilter.add(Datamodel.makeWikidataPropertyIdValue("P106")); // occupation propertyFilter.add(Datamodel.makeWikidataPropertyIdValue("P569")); // birthdate documentDataFilter.setPropertyFilter(propertyFilter); documentDataFilter.setSiteLinkFilter(Collections.emptySet());
.println("*** Fetching data using filters to reduce data volume:"); wbdf.getFilter().setSiteLinkFilter(Collections.singleton("enwiki")); wbdf.getFilter().setLanguageFilter(Collections.singleton("fr")); wbdf.getFilter().setPropertyFilter( Collections.<PropertyIdValue> emptySet()); EntityDocument q8 = wbdf.getEntityDocument("Q8");