public ItemDocument filter(ItemDocument item) { return dataObjectFactory.getItemDocument( item.getEntityId(), filterMonoLingualTextValues(item.getLabels().values()), filterMonoLingualTextValues(item.getDescriptions().values()), filterMonoLingualTextValues(flatten(item.getAliases().values())), filterStatementGroups(item.getStatementGroups()), filterSiteLinks(item.getSiteLinks()), item.getRevisionId() ); }
@Override public void processItemDocument(ItemDocument itemDocument) { // Record relevant labels: Integer itemId = getNumId(itemDocument.getEntityId().getId(), false); if (this.classRecords.containsKey(itemId)) { this.classRecords.get(itemId).label = itemDocument.findLabel("en"); } countTerms(itemDocument, itemStatistics); processStatementDocument(itemDocument, itemStatistics); this.countSiteLinks += itemDocument.getSiteLinks().size(); for (SiteLink siteLink : itemDocument.getSiteLinks().values()) { countSiteLink(siteLink); } }
@Override public void processItemDocument(ItemDocument itemDocument) { for (String propertyId : integerProperties) { if (hasPlusMinusOneValues(itemDocument .findStatementGroup(propertyId))) { fixIntegerPrecisions(itemDocument.getEntityId(), propertyId); } // else: ignore items that have no value or only correct values // for the property we consider } }
/** * Returns true if the parameters are two {@link ItemDocument} objects with * exactly the same data. It does not matter if they are different * implementations of the interface as long as their content is the same. * * @param o1 * the first object to compare * @param o2 * the second object to compare * @return true if both objects are equal */ public static boolean equalsItemDocument(ItemDocument o1, Object o2) { if (o2 == o1) { return true; } if (!(o2 instanceof ItemDocument)) { return false; } ItemDocument other = (ItemDocument) o2; // Note: item id already compared by equalsTermedDocument() return equalsTermedDocument(o1, other) && o1.getSiteLinks().equals(other.getSiteLinks()) && o1.getStatementGroups().equals(other.getStatementGroups()); }
@Override public void processItemDocument(ItemDocument itemDocument) { if (itemDocument.hasStatement("P1181")) { if (lacksSomeLanguage(itemDocument)) { addLabelForNumbers(itemDocument.getEntityId()); } else { System.out.println("*** Labels already complete for " + itemDocument.getEntityId().getId()); } } // else: ignore items that have no numeric value }
boolean isHuman = false; for (StatementGroup statementGroup : itemDocument.getStatementGroups()) { switch (statementGroup.getProperty().getId()) { case "P21": // P21 is "sex or gender" for (SiteLink siteLink : itemDocument.getSiteLinks().values()) { SiteRecord siteRecord = getSiteRecord(siteLink.getSiteKey()); siteRecord.pageCount++; if (this.genderNames.containsKey(itemDocument.getEntityId())) { MonolingualTextValue label = itemDocument.getLabels().get("en"); if (label != null) { this.genderNames.put(itemDocument.getEntityId(), label.getText());
if (!itemDocument.hasStatementValue(filterPropertyId, filterValue)) { return; .findStatementStringValue(extractPropertyId); out.print(itemDocument.getEntityId().getId()); out.print(","); out.print(csvEscape(itemDocument.findLabel("en"))); out.print(","); out.print(csvEscape(itemDocument.findLabel("de"))); out.print(","); out.print(csvEscape(stringValue.getString())); out.print(","); SiteLink enwiki = itemDocument.getSiteLinks().get("enwiki"); if (enwiki != null) { out.print(csvEscape(enwiki.getPageTitle())); SiteLink dewiki = itemDocument.getSiteLinks().get("dewiki"); if (dewiki != null) { out.print(csvEscape(dewiki.getPageTitle()));
.findStatementQuantityValue("P1181"); if (number == null) { System.out.println("*** No unique numeric value for " + qid); if (!currentItemDocument.hasStatementValue("P31", numberClasses)) { System.out .println("*** " currentItemDocument.getRevisionId()); ArrayList<String> languages = new ArrayList<>( arabicNumeralLanguages.length); for (int i = 0; i < arabicNumeralLanguages.length; i++) { if (!currentItemDocument.getLabels().containsKey( arabicNumeralLanguages[i])) { itemDocumentBuilder.withLabel(numberString, logEntityModification(currentItemDocument.getEntityId(), numberString, languages);
/** * Returns a human-readable string representation of the given object. * * @see java.lang.Object#toString() * @param o * the object to represent as string * @return a string representation of the object */ public static String toString(ItemDocument o) { StringBuilder sb = new StringBuilder(); sb.append("==ItemDocument ").append(o.getEntityId().getIri()); sb.append(" (r").append(o.getRevisionId()).append(") "); sb.append("==").append(toStringForTermedDocument(o)); sb.append(toStringForStatementDocument(o)); sb.append("* Site links: "); boolean first = true; SortedSet<String> siteKeys = new TreeSet<>(o.getSiteLinks() .keySet()); for (String key : siteKeys) { if (first) { first = false; } else { sb.append("; "); } sb.append(toString(o.getSiteLinks().get(key))); } return sb.toString(); }
String data = JsonSerializer.getJsonString(itemDocument); return (ItemDocument) this.wbEditingAction.wbEditEntity(itemDocument .getEntityId().getId(), null, null, null, data, clear, this.editAsBot, itemDocument.getRevisionId(), summary);
@Override public void processItemDocument(ItemDocument itemDocument) { this.countItems++; if (itemDocument.getStatementGroups().size() > 0) { this.countPropertyItems++; if (TOP_LEVEL_CLASSES.contains(itemDocument.getEntityId().getId()) || this.classRecords.containsKey(itemDocument.getEntityId())) { classRecord = getClassRecord(itemDocument.getEntityId()); for (StatementGroup sg : itemDocument.getStatementGroups()) { PropertyRecord propertyRecord = getPropertyRecord(sg.getProperty()); propertyRecord.itemCount++; boolean isSubclassOf = "P279".equals(sg.getProperty().getId()); if (isSubclassOf && classRecord == null) { classRecord = getClassRecord(itemDocument.getEntityId());
public void writeItemDocument(ItemDocument document) throws RDFHandlerException { if (!hasTask(RdfSerializer.TASK_ITEMS)) { return; } String subjectUri = document.getEntityId().getIri(); // probably // construct the // URI from // Vocabulary Resource subject = this.rdfWriter.getUri(subjectUri); if ((this.tasks & (RdfSerializer.TASK_ALL_EXACT_DATA | RdfSerializer.TASK_SIMPLE_STATEMENTS)) != 0) { this.rdfWriter.writeTripleValueObject(subject, RdfWriter.RDF_TYPE, RdfWriter.WB_ITEM); } writeDocumentTerms(subject, document); if (hasTask(RdfSerializer.TASK_SIMPLE_STATEMENTS)) { writeSimpleStatements(subject, document); } if (hasTask(RdfSerializer.TASK_STATEMENTS)) { writeStatements(subject, document); } writeSiteLinks(subject, document.getSiteLinks()); this.snakRdfConverter.writeAuxiliaryTriples(); this.owlDeclarationBuffer.writePropertyDeclarations(this.rdfWriter, hasTask(RdfSerializer.TASK_STATEMENTS), hasTask(RdfSerializer.TASK_SIMPLE_STATEMENTS)); this.referenceRdfConverter.writeReferences(); }
@Override public void processItemDocument(ItemDocument itemDocument) { this.itemCount++; // Find the first quantity value for this property, if any: QuantityValue quantityValue = itemDocument .findStatementQuantityValue(numberPropertyId); // If a value was found, compare it to the current maximum: if (quantityValue != null) { this.itemsWithPropertyCount++; BigDecimal numericValue = quantityValue.getNumericValue(); if (this.largestNumberValue == null || numericValue.compareTo(this.largestNumberValue) > 0) { this.largestNumberValue = numericValue; this.largestNumberItem = itemDocument.getEntityId(); MonolingualTextValue label = itemDocument.getLabels().get("en"); if (label != null) { this.largestNumberItemLabel = label.getText(); } else { this.largestNumberItemLabel = this.largestNumberItem .getId(); } } } // Print progress every 100,000 items: if (this.itemCount % 100000 == 0) { printStatus(); } }
/** * Counts the given coordinates, unless the item document is filtered. * It is assumed that the coordinates are in the admissible range. * * @param xCoord * @param yCoord * @param itemDocument * @return */ public void countCoordinates(int xCoord, int yCoord, ItemDocument itemDocument) { if (this.siteFilter != null) { if (!itemDocument.getSiteLinks().containsKey(this.siteFilter)) { return; } } this.count++; this.values[xCoord][yCoord] += 1; if (this.maxValue < this.values[xCoord][yCoord]) { this.maxValue = this.values[xCoord][yCoord]; } }
if (q42 instanceof ItemDocument) { System.out.println("The English name for entity Q42 is " + ((ItemDocument) q42).getLabels().get("en").getText()); if (q8 instanceof ItemDocument) { System.out.println("The French label for entity Q8 is " + ((ItemDocument) q8).getLabels().get("fr").getText() + "\nand its English Wikipedia page has the title " + ((ItemDocument) q8).getSiteLinks().get("enwiki") .getPageTitle() + ".");
private void countCooccurringProperties(ItemDocument itemDocument, UsageRecord usageRecord, PropertyIdValue thisPropertyIdValue) { for (StatementGroup sg : itemDocument.getStatementGroups()) { if (!sg.getProperty().equals(thisPropertyIdValue)) { if (!usageRecord.propertyCoCounts.containsKey(sg.getProperty())) { usageRecord.propertyCoCounts.put(sg.getProperty(), 1); } else { usageRecord.propertyCoCounts .put(sg.getProperty(), usageRecord.propertyCoCounts .get(sg.getProperty()) + 1); } } } }
/** * Returns true if the given item document lacks a label for at least one of * the languages covered. * * @param itemDocument * @return true if some label is missing */ protected boolean lacksSomeLanguage(ItemDocument itemDocument) { for (int i = 0; i < arabicNumeralLanguages.length; i++) { if (!itemDocument.getLabels() .containsKey(arabicNumeralLanguages[i])) { return true; } } return false; }
boolean isHuman = false; for (StatementGroup statementGroup : itemDocument.getStatementGroups()) { switch (statementGroup.getProperty().getId()) { case "P21": // P21 is "sex or gender" for (SiteLink siteLink : itemDocument.getSiteLinks().values()) { SiteRecord siteRecord = getSiteRecord(siteLink.getSiteKey()); siteRecord.pageCount++; if (this.genderNames.containsKey(itemDocument.getEntityId())) { MonolingualTextValue label = itemDocument.getLabels().get("en"); if (label != null) { this.genderNames.put(itemDocument.getEntityId(), label.getText());
if (!itemDocument.hasStatementValue(filterPropertyId, filterValue)) { return; .findStatementStringValue(extractPropertyId); out.print(itemDocument.getEntityId().getId()); out.print(","); out.print(csvEscape(itemDocument.findLabel("en"))); out.print(","); out.print(csvEscape(itemDocument.findLabel("de"))); out.print(","); out.print(csvEscape(stringValue.getString())); out.print(","); SiteLink enwiki = itemDocument.getSiteLinks().get("enwiki"); if (enwiki != null) { out.print(csvEscape(enwiki.getPageTitle())); SiteLink dewiki = itemDocument.getSiteLinks().get("dewiki"); if (dewiki != null) { out.print(csvEscape(dewiki.getPageTitle()));
.findStatementQuantityValue("P1181"); if (number == null) { System.out.println("*** No unique numeric value for " + qid); if (!currentItemDocument.hasStatementValue("P31", numberClasses)) { System.out .println("*** " currentItemDocument.getRevisionId()); ArrayList<String> languages = new ArrayList<>( arabicNumeralLanguages.length); for (int i = 0; i < arabicNumeralLanguages.length; i++) { if (!currentItemDocument.getLabels().containsKey( arabicNumeralLanguages[i])) { itemDocumentBuilder.withLabel(numberString, logEntityModification(currentItemDocument.getEntityId(), numberString, languages);