@Override protected void handleEntry(ArcRecordBase entry) throws IOException { final int maxLEN = 4095; String entryUri = entry.getUrlStr(); String entryPath = new URL(entryUri).getFile(); // remove querystring to get prefix (may include slashes) final int queryPos = entryPath.indexOf('?'); String querylessPath = queryPos > 0 ? entryPath.substring(0, queryPos) : entryPath; String entryName = entryPath.substring(querylessPath.lastIndexOf('/') + 1); String prefixPath = entryUri.substring(0, entryUri.length() - entryName.length()); ResourceId correlationId = parentId; // by default, files are correlated to the parent. // If there is a path, get the actual correlation id for its parent folder: if (!prefixPath.isEmpty()) { correlationId = directories.get(prefixPath); // If we haven't seen the path before, add the ancestor folders not yet seen: if (correlationId == null) { correlationId = processAncestorFolders(WEB_ARCHIVE_TYPE, prefixPath, entryUri, parentId, parentName, directories); } } // if the file name (including querystring) is > 4096 chars, truncate it for the DB String truncatedName = entryName.length() < maxLEN ? entryName : entryName.substring(0, maxLEN); submit(entry, truncatedName, parentName, in, correlationId, originatorNodeId); }
@Override protected void handleEntry(ArcRecordBase entry) throws IOException { final int maxLEN = 4095; String entryUri = entry.getUrlStr(); String entryPath = new URL(entryUri).getFile(); // remove querystring to get prefix (may include slashes) final int queryPos = entryPath.indexOf('?'); String querylessPath = queryPos > 0 ? entryPath.substring(0, queryPos) : entryPath; String entryName = entryPath.substring(querylessPath.lastIndexOf('/') + 1); String prefixPath = entryUri.substring(0, entryUri.length() - entryName.length()); ResourceId correlationId = parentId; // by default, files are correlated to the parent. // If there is a path, get the actual correlation id for its parent folder: if (!prefixPath.isEmpty()) { correlationId = directories.get(prefixPath); // If we haven't seen the path before, add the ancestor folders not yet seen: if (correlationId == null) { correlationId = processAncestorFolders(WEB_ARCHIVE_TYPE, prefixPath, entryUri, parentId, parentName, directories); } } // if the file name (including querystring) is > 4096 chars, truncate it for the DB String truncatedName = entryName.length() < maxLEN ? entryName : entryName.substring(0, maxLEN); submit(entry, truncatedName, parentName, in, correlationId, originatorNodeId); }