public RecordReader<Text, SequencedFragment> createRecordReader( InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { context.setStatus(genericSplit.toString()); return new FastqRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat } }
public RecordReader<Text, SequencedFragment> createRecordReader( InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { context.setStatus(genericSplit.toString()); return new FastqRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat } }
public RecordReader<Text, SequencedFragment> createRecordReader( InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { context.setStatus(genericSplit.toString()); return new FastqRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat } }
@Test public void testIlluminaMetaInfoNullFC() throws IOException { writeToTempFastq(illuminaFastqNoFlowCellID); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastqNoFlowCellID.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean found = reader.next(key, fragment); assertTrue(found); assertEquals("EAS139", fragment.getInstrument()); assertEquals(136, fragment.getRunNumber().intValue()); assertEquals("", fragment.getFlowcellId()); assertEquals(2, fragment.getLane().intValue()); assertEquals(5, fragment.getTile().intValue()); assertEquals(1000, fragment.getXpos().intValue()); assertEquals(12850, fragment.getYpos().intValue()); assertEquals(1, fragment.getRead().intValue()); assertEquals(false, fragment.getFilterPassed().booleanValue()); assertEquals(18, fragment.getControlNumber().intValue()); assertEquals("ATCACG", fragment.getIndexSequence()); }
@Test public void testIlluminaMetaInfo() throws IOException { writeToTempFastq(illuminaFastq); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastq.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean found = reader.next(key, fragment); assertTrue(found); assertEquals("EAS139", fragment.getInstrument()); assertEquals(136, fragment.getRunNumber().intValue()); assertEquals("FC706VJ", fragment.getFlowcellId()); assertEquals(2, fragment.getLane().intValue()); assertEquals(5, fragment.getTile().intValue()); assertEquals(1000, fragment.getXpos().intValue()); assertEquals(12850, fragment.getYpos().intValue()); assertEquals(1, fragment.getRead().intValue()); assertEquals(false, fragment.getFilterPassed().booleanValue()); assertEquals(18, fragment.getControlNumber().intValue()); assertEquals("ATCACG", fragment.getIndexSequence()); }
@Test public void testGzCompressedInput() throws IOException { // write gzip-compressed data GzipCodec codec = new GzipCodec(); PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) ); fastqOut.write(twoFastq); fastqOut.close(); // now try to read it split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoFastq.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString()); assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString()); retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1", key.toString()); assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString()); }
@Test public void testReadStartInMiddle() throws IOException { writeToTempFastq(twoFastq); split = new FileSplit(new Path(tempFastq.toURI().toString()), 10, twoFastq.length() - 10, null); FastqRecordReader reader = new FastqRecordReader(conf, split); assertEquals(oneFastq.length() + 1, reader.getPos()); // The start of the second record. We +1 for the \n that is not in oneFastq assertEquals(0.0, reader.getProgress(), 0.01); boolean retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1", key.toString()); assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString()); assertEquals("BDDCDBDD?A=?=:=7,7*@A;;53/53.:@>@@4=>@@@=?1?###############################################", fragment.getQuality().toString()); assertEquals(twoFastq.length(), reader.getPos()); // now should be at the end of the data assertEquals(1.0, reader.getProgress(), 0.01); retval = reader.next(key, fragment); assertFalse(retval); }
@Test public void testReadFastqWithAmpersandQuality() throws IOException { writeToTempFastq(fastqWithAmpersandQuality); // split doesn't start at 0, forcing reader to advance looking for first complete record split = new FileSplit(new Path(tempFastq.toURI().toString()), 3, fastqWithAmpersandQuality.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString()); assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString()); assertEquals("###########################################################################################", fragment.getQuality().toString()); retval = reader.next(key, fragment); assertFalse(retval); }
@Test public void testReadFastqWithIdTwice() throws IOException { writeToTempFastq(fastqWithIdTwice); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, fastqWithIdTwice.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString()); assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString()); assertEquals("###########################################################################################", fragment.getQuality().toString()); retval = reader.next(key, fragment); assertFalse(retval); }
private void verifySkipFailedQC() throws IOException { writeToTempFastq(twoFastqWithIllumina); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, twoFastqWithIllumina.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean found = reader.next(key, fragment); assertTrue(found); assertEquals(2, (int)fragment.getRead()); found = reader.next(key, fragment); assertTrue(found); assertEquals(3, (int)fragment.getRead()); found = reader.next(key, fragment); assertFalse(found); }
@Test public void testIlluminaNoIndex() throws IOException { writeToTempFastq(illuminaFastqNoIndex); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastqNoIndex.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean found = reader.next(key, fragment); assertTrue(found); // ensure all meta-data was picked up assertEquals("EAS139", fragment.getInstrument()); assertEquals(136, fragment.getRunNumber().intValue()); // now verify the index assertEquals("", fragment.getIndexSequence()); }
@Test public void testOneNotThenIllumina() throws IOException { writeToTempFastq(oneFastq + "\n" + illuminaFastq); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastq.length() + oneFastq.length() + 1, null); FastqRecordReader reader = new FastqRecordReader(conf, split); assertTrue(reader.next(key, fragment)); assertNull(fragment.getInstrument()); assertTrue(reader.next(key, fragment)); assertNull(fragment.getInstrument()); assertFalse(reader.next(key, fragment)); }
@Test public void testOneIlluminaThenNot() throws IOException { writeToTempFastq(illuminaFastq + "\n" + oneFastq); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastq.length() + oneFastq.length() + 1, null); FastqRecordReader reader = new FastqRecordReader(conf, split); assertTrue(reader.next(key, fragment)); assertEquals("EAS139", fragment.getInstrument()); assertTrue(reader.next(key, fragment)); assertNull(fragment.getInstrument()); assertFalse(reader.next(key, fragment)); }
@Test public void testProgress() throws IOException { writeToTempFastq(twoFastq); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, twoFastq.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); assertEquals(0.0, reader.getProgress(), 0.01); reader.next(key, fragment); assertEquals(0.5, reader.getProgress(), 0.01); reader.next(key, fragment); assertEquals(1.0, reader.getProgress(), 0.01); }
private FastqRecordReader createReaderForOneFastq() throws IOException { writeToTempFastq(oneFastq); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, oneFastq.length(), null); return new FastqRecordReader(conf, split); }
@Test(expected=RuntimeException.class) public void testCompressedSplit() throws IOException { // write gzip-compressed data GzipCodec codec = new GzipCodec(); PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) ); fastqOut.write(twoFastq); fastqOut.close(); // now try to read it starting from the middle split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoFastq.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); }
@Test public void testMakePositionMessage() throws IOException { writeToTempFastq(fastqWithIdTwice); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, fastqWithIdTwice.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); assertNotNull(reader.makePositionMessage()); }
@Test public void testNameWithoutReadNum() throws IOException { writeToTempFastq(oneFastqWithoutRead); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, oneFastqWithoutRead.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean retval = reader.next(key, fragment); assertTrue(retval); assertNull("Read is not null", fragment.getRead()); }
private void verifyInputQualityConfig() throws IOException { writeToTempFastq(illuminaFastqWithPhred64Quality); split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastqWithPhred64Quality.length(), null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean found = reader.next(key, fragment); assertTrue(found); assertEquals("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC", fragment.getQuality().toString()); }
@Test public void testSliceEndsBeforeEndOfFile() throws IOException { writeToTempFastq(twoFastq); // slice ends at position 10--i.e. somewhere in the first record. The second record should not be read. split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, 10, null); FastqRecordReader reader = new FastqRecordReader(conf, split); boolean retval = reader.next(key, fragment); assertTrue(retval); assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString()); assertFalse("FastqRecordReader is reading a record that starts after the end of the slice", reader.next(key, fragment)); }