private void testFilenameEquality(String expected, String path) { assertEquals(expected, FilenameUtils.getName(path)); }
@Test public void normalizeWithNull() throws Exception { try { FilenameUtils.normalize(null); fail("missing check for null parameters"); } catch (IllegalArgumentException x) { assertTrue(x.getMessage() != null && x.getMessage().contains("name")); assertTrue(x.getMessage() != null && x.getMessage().contains("not be null")); } }
@Override void writeFile(final String path, final byte[] data) throws IOException { ObjectMetadata bucketMetadata = new ObjectMetadata(); bucketMetadata.setContentMD5(Md5Utils.md5AsBase64(data)); bucketMetadata.setContentLength(data.length); // Give Tika a few hints for the content detection Metadata tikaMetadata = new Metadata(); tikaMetadata.set(Metadata.RESOURCE_NAME_KEY, FilenameUtils.getName(FilenameUtils.normalize(path))); // Fire! try (InputStream bis = TikaInputStream.get(data, tikaMetadata)) { bucketMetadata.setContentType(TIKA_DETECTOR.detect(bis, tikaMetadata).toString()); s3.putObject(bucket, resolveKey(path), bis, bucketMetadata); } }
private boolean walk(Iterator<S3ObjectSummary> iter, ObjectId file, String path) throws IOException { byte[] content; byte[] newHash; LOG.debug("Start processing file: {}", path); try (DigestInputStream is = new DigestInputStream(repository.open(file).openStream(), DigestUtils.getMd5Digest())) { // Get content content = IOUtils.toByteArray(is); // Get hash newHash = is.getMessageDigest().digest(); } if (isUploadFile(iter, path, Hex.encodeHexString(newHash))) { LOG.info("Uploading file: {}", path); ObjectMetadata bucketMetadata = new ObjectMetadata(); bucketMetadata.setContentMD5(Base64.encodeAsString(newHash)); bucketMetadata.setContentLength(content.length); // Give Tika a few hints for the content detection Metadata tikaMetadata = new Metadata(); tikaMetadata.set(Metadata.RESOURCE_NAME_KEY, FilenameUtils.getName(FilenameUtils.normalize(path))); // Fire! try (InputStream bis = TikaInputStream.get(content, tikaMetadata)) { bucketMetadata.setContentType(TIKA_DETECTOR.detect(bis, tikaMetadata).toString()); s3.putObject(bucket.getName(), path, bis, bucketMetadata); return true; } } LOG.info("Skipping file (same checksum): {}", path); return false; }
private String getResourceName(Metadata metadata, ParserState state) { String objectName = ""; if (metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY) != null) { objectName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY); } else if (metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID) != null) { objectName = metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID); } else { objectName = "embedded-" + (++state.unknownCount); } //make sure that there isn't any path info in the objectName //some parsers can return paths, not just file names objectName = FilenameUtils.getName(objectName); return objectName; }
@Test public void normalizeWithNotPrintableChars() throws Exception { final String TEST_NAME = new String( new char[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, '.', 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 } ); final String EXPECTED_NAME = "%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F" + "." + "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F"; assertEquals(EXPECTED_NAME, FilenameUtils.normalize(TEST_NAME)); }
private String getResourceName(Metadata metadata) { String objectName = ""; if (metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY) != null) { objectName = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY); } else if (metadata.get(TikaMetadataKeys.EMBEDDED_RELATIONSHIP_ID) != null) { objectName = metadata.get(TikaMetadataKeys.EMBEDDED_RELATIONSHIP_ID); } else { objectName = "embedded-" + (++unknownCount); } //make sure that there isn't any path info in the objectName //some parsers can return paths, not just file names objectName = FilenameUtils.getName(objectName); return objectName; }
@Test public void normalizeWithReservedChars() throws Exception { final String TEST_NAME = "?a/b\nc\td\re*f\\g:h<i>j.txt|"; final String EXPECTED_NAME = "%3Fa/b%0Ac%09d%0De%2Af\\g%3Ah%3Ci%3Ej.txt%7C"; assertEquals(EXPECTED_NAME, FilenameUtils.normalize(TEST_NAME)); }
private String getResourceName(Metadata metadata, ParserState state) { String objectName = ""; if (metadata.get(Metadata.RESOURCE_NAME_KEY) != null) { objectName = metadata.get(Metadata.RESOURCE_NAME_KEY); } else if (metadata.get(Metadata.EMBEDDED_RELATIONSHIP_ID) != null) { objectName = metadata.get(Metadata.EMBEDDED_RELATIONSHIP_ID); } else { objectName = "embedded-" + (++state.unknownCount); } //make sure that there isn't any path info in the objectName //some parsers can return paths, not just file names objectName = FilenameUtils.getName(objectName); return objectName; }
@Test public void normalizeWithReservedChar() throws Exception { final String[] TEST_NAMES = { "test?.txt", "?test.txt", "test.txt?", "?test?txt?" }; final String[] EXPECTED_NAMES = { "test%3F.txt", "%3Ftest.txt", "test.txt%3F", "%3Ftest%3Ftxt%3F" }; for (int i=0; i<TEST_NAMES.length; ++i) { //System.out.println("checking " + TEST_NAMES[i]); assertEquals(EXPECTED_NAMES[i], FilenameUtils.normalize(TEST_NAMES[i])); } }
/** * Different filesystems and operating systems have different restrictions * on the name that can be used for files and directories. * FilenameUtils.normalize() returns a cross platform file name that turns * special characters in a HEX based code convention. This is %<code>. * For example why?.zip will be converted into why%3F.zip * * @see http://en.wikipedia.org/wiki/Filename#Comparison_of_filename_limitations * * Reserved chars are the ones in FilenameUtils.RESERVED_FILENAME_CHARACTERS: */ @Test public void normalizeNothingTodo() throws Exception { final String TEST_NAME = "test.zip"; assertEquals(TEST_NAME, FilenameUtils.normalize(TEST_NAME)); }