private String getid(Record r) { for (String propname : r.getProperties()) { Property prop = config.getPropertyByName(propname); if (prop == null) throw new DukeConfigException("Record has property " + propname + " which is not in configuration"); if (prop.isIdProperty()) return r.getValue(propname); } return null; }
private String getid(Record r) { for (Property p : idprops) { String v = r.getValue(p.getName()); if (v == null) continue; return v; } throw new DukeException("No identity for record " + r); } }
private String getIdentity(Record r) { for (Property p : config.getIdentityProperties()) { Collection<String> vs = r.getValues(p.getName()); if (vs == null) continue; for (String v : vs) return v; } throw new DukeException("No identity found in record [" + PrintMatchListener.toString(r) + "]"); }
private boolean isSameAs(Record r1, Record r2) { for (Property idp : config.getIdentityProperties()) { Collection<String> vs2 = r2.getValues(idp.getName()); Collection<String> vs1 = r1.getValues(idp.getName()); if (vs1 == null) continue; for (String v1 : vs1) if (vs2.contains(v1)) return true; } return false; }
private String getId(Record r) { for (Property idprop : config.getIdentityProperties()) { String v = r.getValue(idprop.getName()); if (v != null) return v; } return null; }
private boolean isFuzzy(String fieldName) { Comparator c = config.getPropertyByName(fieldName).getComparator(); return c != null && c.isTokenized(); }
/** * Reads all available records from the data sources and processes * them in batches, notifying the listeners throughout. */ public void deduplicate(int batch_size) { deduplicate(config.getDataSources(), batch_size); }
public Record next() { Record r = recit.next(); if (!recit.hasNext()) findNextIterator(); return r; }
private Link link(String id1, String id2, LinkKind kind) { Link link = new Link(id1, id2, LinkStatus.ASSERTED, kind, 1.0); linkdb.assertLink(link); return link; }
/** * Does record linkage across the two groups, but does not link * records within each group. */ public void link() { link(config.getDataSources(1), config.getDataSources(2), DEFAULT_BATCH_SIZE); }
public RDBMSLinkDatabase(String dbtype) { this.dbtype = getDatabaseType(dbtype); this.tblprefix = ""; this.logger = new DummyLogger(); }
/** * Commits all state to disk and frees up resources. */ public void close() { database1.close(); if (hasTwoDatabases()) database2.close(); }
private String getid(Record r) { for (String propname : r.getProperties()) if (config.getPropertyByName(propname).isIdProperty()) return r.getValue(propname); return null; } }
private String getid(Record r) { for (Property p : idprops) { String v = r.getValue(p.getName()); if (v == null) continue; return v; } throw new DukeException("No identity for record " + r); } }
protected void indexById(Record record) { for (Property idprop : config.getIdentityProperties()) for (String id : record.getValues(idprop.getName())) idmap.put(id, record); }
/** * Reads all available records from the data sources and processes * them in batches, notifying the listeners throughout. */ public void deduplicate() { deduplicate(config.getDataSources(), DEFAULT_BATCH_SIZE); }