/** * Digest hash of the file. */ public String hash() { checkMetadata(); return metadata.hash(); }
@Test public void binary_file_with_unmappable_character() throws Exception { File woff = new File(this.getClass().getResource("glyphicons-halflings-regular.woff").toURI()); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(woff), StandardCharsets.UTF_8, woff.getAbsolutePath()); assertThat(metadata.lines()).isEqualTo(135); assertThat(metadata.nonBlankLines()).isEqualTo(133); assertThat(metadata.hash()).isNotEmpty(); assertThat(logTester.logs(LoggerLevel.WARN).get(0)).contains("Invalid character encountered in file"); assertThat(logTester.logs(LoggerLevel.WARN).get(0)).contains( "glyphicons-halflings-regular.woff at line 1 for encoding UTF-8. Please fix file content or configure the encoding to be used using property 'sonar.sourceEncoding'."); }
@Test public void read_with_wrong_encoding() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "marker´s\n", Charset.forName("cp1252")); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(2); assertThat(metadata.hash()).isEqualTo(md5Hex("marker\ufffds\n")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 9); }
@Test public void non_ascii_utf_16() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "föo\r\nbàr\r\n\u1D11Ebaßz\r\n", StandardCharsets.UTF_16, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_16, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(4); assertThat(metadata.nonBlankLines()).isEqualTo(3); assertThat(metadata.hash()).isEqualTo(md5Hex("föo\nbàr\n\u1D11Ebaßz\n".getBytes(StandardCharsets.UTF_8))); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 5, 10, 18); }
@Test public void windows_without_latest_eol() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "foo\r\nbar\r\nbaz", StandardCharsets.UTF_8, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(3); assertThat(metadata.nonBlankLines()).isEqualTo(3); assertThat(metadata.hash()).isEqualTo(md5Hex("foo\nbar\nbaz")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 5, 10); assertThat(metadata.originalLineEndOffsets()).containsOnly(3, 8, 13); assertThat(metadata.isEmpty()).isFalse(); }
@Test public void non_ascii_utf_8() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "föo\r\nbàr\r\n\u1D11Ebaßz\r\n", StandardCharsets.UTF_8, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(4); assertThat(metadata.nonBlankLines()).isEqualTo(3); assertThat(metadata.hash()).isEqualTo(md5Hex("föo\nbàr\n\u1D11Ebaßz\n")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 5, 10, 18); }
@Test public void mac_with_latest_eol() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "foo\rbar\rbaz\r", StandardCharsets.UTF_8, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(4); assertThat(metadata.nonBlankLines()).isEqualTo(3); assertThat(metadata.hash()).isEqualTo(md5Hex("foo\nbar\nbaz\n")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 4, 8, 12); assertThat(metadata.originalLineEndOffsets()).containsOnly(3, 7, 11, 12); }
@Test public void start_with_newline() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "\nfoo\nbar\r\nbaz", StandardCharsets.UTF_8, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(4); assertThat(metadata.nonBlankLines()).isEqualTo(3); assertThat(metadata.hash()).isEqualTo(md5Hex("\nfoo\nbar\nbaz")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 1, 5, 10); assertThat(metadata.originalLineEndOffsets()).containsOnly(0, 4, 8, 13); }
@Test public void unix_without_latest_eol() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "foo\nbar\nbaz", StandardCharsets.UTF_8, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(3); assertThat(metadata.nonBlankLines()).isEqualTo(3); assertThat(metadata.hash()).isEqualTo(md5Hex("foo\nbar\nbaz")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 4, 8); assertThat(metadata.originalLineEndOffsets()).containsOnly(3, 7, 11); assertThat(metadata.isEmpty()).isFalse(); }
@Test public void line_feed_is_included_into_hash() throws Exception { File file1 = temp.newFile(); FileUtils.write(file1, "foo\nbar\n", StandardCharsets.UTF_8, true); // same as file1, except an additional return carriage File file1a = temp.newFile(); FileUtils.write(file1a, "foo\r\nbar\n", StandardCharsets.UTF_8, true); File file2 = temp.newFile(); FileUtils.write(file2, "foo\nbar", StandardCharsets.UTF_8, true); String hash1 = new FileMetadata().readMetadata(new FileInputStream(file1), StandardCharsets.UTF_8, file1.getName()).hash(); String hash1a = new FileMetadata().readMetadata(new FileInputStream(file1a), StandardCharsets.UTF_8, file1a.getName()).hash(); String hash2 = new FileMetadata().readMetadata(new FileInputStream(file2), StandardCharsets.UTF_8, file2.getName()).hash(); assertThat(hash1).isEqualTo(hash1a); assertThat(hash1).isNotEqualTo(hash2); }
@Test public void testRoundtrip() { Metadata metadata = new Metadata(10, 20, "hash", new int[] {1, 3}, new int[] {2, 4}, 5); assertThat(metadata.isEmpty()).isFalse(); assertThat(metadata.lines()).isEqualTo(10); assertThat(metadata.nonBlankLines()).isEqualTo(20); assertThat(metadata.originalLineStartOffsets()).isEqualTo(new int[] {1, 3}); assertThat(metadata.originalLineEndOffsets()).isEqualTo(new int[] {2, 4}); assertThat(metadata.lastValidOffset()).isEqualTo(5); assertThat(metadata.hash()).isEqualTo("hash"); } }
@Test public void unix_with_latest_eol() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "foo\nbar\nbaz\n", StandardCharsets.UTF_8, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(4); assertThat(metadata.nonBlankLines()).isEqualTo(3); assertThat(metadata.hash()).isEqualTo(md5Hex("foo\nbar\nbaz\n")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 4, 8, 12); assertThat(metadata.originalLineEndOffsets()).containsOnly(3, 7, 11, 12); }
@Test public void several_new_lines() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "foo\n\n\nbar", StandardCharsets.UTF_8, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(4); assertThat(metadata.nonBlankLines()).isEqualTo(2); assertThat(metadata.hash()).isEqualTo(md5Hex("foo\n\n\nbar")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 4, 5, 6); assertThat(metadata.originalLineEndOffsets()).containsOnly(3, 4, 5, 9); }
@Test public void mac_without_latest_eol() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "foo\rbar\rbaz", StandardCharsets.UTF_8, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(3); assertThat(metadata.nonBlankLines()).isEqualTo(3); assertThat(metadata.hash()).isEqualTo(md5Hex("foo\nbar\nbaz")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 4, 8); assertThat(metadata.originalLineEndOffsets()).containsOnly(3, 7, 11); }
@Test public void mix_of_newlines_with_latest_eol() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "foo\nbar\r\nbaz\n", StandardCharsets.UTF_8, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(4); assertThat(metadata.nonBlankLines()).isEqualTo(3); assertThat(metadata.hash()).isEqualTo(md5Hex("foo\nbar\nbaz\n")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 4, 9, 13); assertThat(metadata.originalLineEndOffsets()).containsOnly(3, 7, 12, 13); }
@Test public void mix_of_newlines_without_latest_eol() throws Exception { File tempFile = temp.newFile(); FileUtils.write(tempFile, "foo\nbar\r\nbaz", StandardCharsets.UTF_8, true); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(3); assertThat(metadata.nonBlankLines()).isEqualTo(3); assertThat(metadata.hash()).isEqualTo(md5Hex("foo\nbar\nbaz")); assertThat(metadata.originalLineStartOffsets()).containsOnly(0, 4, 9); assertThat(metadata.originalLineEndOffsets()).containsOnly(3, 7, 12); }
@Before public void prepare() throws IOException { baseDir = temp.newFolder(); Path filepath = baseDir.toPath().resolve(FILE_PATH); Files.write(filepath, FILE_CONTENT.getBytes()); String md5sum = new FileMetadata() .readMetadata(Files.newInputStream(filepath), StandardCharsets.UTF_8, FILE_PATH) .hash(); tester.addFileData(FILE_PATH, new FileData(md5sum, "1.1")); }
@Test public void empty_file() throws Exception { File tempFile = temp.newFile(); FileUtils.touch(tempFile); Metadata metadata = new FileMetadata().readMetadata(new FileInputStream(tempFile), StandardCharsets.UTF_8, tempFile.getName()); assertThat(metadata.lines()).isEqualTo(1); assertThat(metadata.nonBlankLines()).isEqualTo(0); assertThat(metadata.hash()).isNotEmpty(); assertThat(metadata.originalLineStartOffsets()).containsOnly(0); assertThat(metadata.originalLineEndOffsets()).containsOnly(0); assertThat(metadata.isEmpty()).isTrue(); }
public TestInputFileBuilder setMetadata(Metadata metadata) { this.setLines(metadata.lines()); this.setLastValidOffset(metadata.lastValidOffset()); this.setNonBlankLines(metadata.nonBlankLines()); this.setHash(metadata.hash()); this.setOriginalLineStartOffsets(metadata.originalLineStartOffsets()); this.setOriginalLineEndOffsets(metadata.originalLineEndOffsets()); return this; }
/** * Sets all metadata in the file, including charset and status. * It is an expensive computation, reading the entire file. */ public void setMetadata(String moduleKeyWithBranch, final DefaultInputFile inputFile, Charset defaultEncoding) { CharsetDetector charsetDetector = new CharsetDetector(inputFile.path(), defaultEncoding); try { Charset charset; if (charsetDetector.run()) { charset = charsetDetector.charset(); } else { LOG.debug("Failed to detect a valid charset for file '{}'. Using default charset.", inputFile); charset = defaultEncoding; } InputStream is = charsetDetector.inputStream(); inputFile.setCharset(charset); Metadata metadata = fileMetadata.readMetadata(is, charset, inputFile.absolutePath(), exclusionsScanner.createCharHandlerFor(inputFile)); inputFile.setMetadata(metadata); inputFile.setStatus(statusDetection.status(moduleKeyWithBranch, inputFile, metadata.hash())); LOG.debug("'{}' generated metadata{} with charset '{}'", inputFile, inputFile.type() == Type.TEST ? " as test " : "", charset); } catch (Exception e) { throw new IllegalStateException(e); } }