Skip to content

Commit 1449dec

Browse files
authored
Support loading of CSI from URLs/streams. #1507 (#1595)
* Support loading of CSI from URLs/streams. * fixes #1507
1 parent 22aec67 commit 1449dec

File tree

7 files changed

+116
-17
lines changed

7 files changed

+116
-17
lines changed

src/main/java/htsjdk/samtools/BAMFileReader.java

+27-13
Original file line numberDiff line numberDiff line change
@@ -407,20 +407,29 @@ public boolean hasIndex() {
407407
*/
408408
@Override
409409
public BAMIndex getIndex() {
410-
if(!hasIndex())
410+
if(!hasIndex()) {
411411
throw new SAMException("No index is available for this BAM file.");
412+
}
412413
if(mIndex == null) {
413-
SamIndexes samIndex = getIndexType();
414-
if (samIndex == null) {
415-
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary())
416-
: new DiskBasedBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary());
417-
} else if (samIndex.equals(SamIndexes.BAI)) {
418-
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping)
419-
: new DiskBasedBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping);
420-
} else if (samIndex.equals(SamIndexes.CSI)) {
421-
mIndex = new CSIIndex(mIndexFile, mEnableIndexMemoryMapping, getFileHeader().getSequenceDictionary());
422-
} else {
423-
throw new SAMFormatException("Unsupported BAM index file: " + mIndexFile.getName());
414+
final SamIndexes samIndexType = getIndexType();
415+
final SAMSequenceDictionary sequenceDictionary = getFileHeader().getSequenceDictionary();
416+
if(mIndexFile != null) {
417+
if (samIndexType.equals(SamIndexes.BAI)) {
418+
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, sequenceDictionary, mEnableIndexMemoryMapping)
419+
: new DiskBasedBAMFileIndex(mIndexFile, sequenceDictionary, mEnableIndexMemoryMapping);
420+
} else if (samIndexType.equals(SamIndexes.CSI)) {
421+
mIndex = new CSIIndex(mIndexFile, mEnableIndexMemoryMapping, sequenceDictionary);
422+
} else {
423+
throw new SAMFormatException("Unsupported BAM index file format: " + mIndexFile.getName());
424+
}
425+
} else if(mIndexStream != null) {
426+
if (samIndexType.equals(SamIndexes.BAI)) {
427+
mIndex = new CachingBAMFileIndex(mIndexStream, sequenceDictionary);
428+
} else if (samIndexType.equals(SamIndexes.CSI)) {
429+
mIndex = new CSIIndex(mIndexStream, sequenceDictionary);
430+
} else {
431+
throw new SAMFormatException("Unsupported BAM index file format: " + mIndexStream.getSource());
432+
}
424433
}
425434
}
426435

@@ -438,8 +447,13 @@ public SamIndexes getIndexType() {
438447
} else if (mIndexFile.getName().toLowerCase().endsWith(FileExtensions.CSI)) {
439448
return SamIndexes.CSI;
440449
}
441-
442450
throw new SAMFormatException("Unknown BAM index file type: " + mIndexFile.getName());
451+
} else if (mIndexStream != null) {
452+
final SamIndexes samIndexesType = SamIndexes.getSAMIndexTypeFromStream(mIndexStream);
453+
if (samIndexesType == SamIndexes.BAI || samIndexesType == SamIndexes.CSI) {
454+
return samIndexesType;
455+
}
456+
throw new SAMFormatException(String.format("Unknown BAM index file type: %s in %s", samIndexesType, mIndexStream.getSource()));
443457
}
444458

445459
return null;

src/main/java/htsjdk/samtools/CSIIndex.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ public class CSIIndex extends AbstractBAMFileIndex implements BrowseableBAMIndex
3535
*/
3636

3737
public CSIIndex(final SeekableStream stream, final SAMSequenceDictionary dictionary) {
38-
this(new IndexStreamBuffer(stream), stream.getSource(), dictionary);
38+
this(IndexFileBufferFactory.getBuffer(stream), stream.getSource(), dictionary);
3939
}
4040

4141
public CSIIndex(final Path path, final SAMSequenceDictionary dictionary) throws IOException {

src/main/java/htsjdk/samtools/CompressedIndexFileBuffer.java

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package htsjdk.samtools;
22

3+
import htsjdk.samtools.seekablestream.SeekableStream;
34
import htsjdk.samtools.util.BinaryCodec;
45
import htsjdk.samtools.util.BlockCompressedInputStream;
56
import htsjdk.samtools.util.RuntimeIOException;
@@ -25,6 +26,11 @@ class CompressedIndexFileBuffer implements IndexFileBuffer {
2526
}
2627
}
2728

29+
CompressedIndexFileBuffer(SeekableStream seekableStream) {
30+
mCompressedStream = new BlockCompressedInputStream(seekableStream);
31+
binaryCodec = new BinaryCodec(mCompressedStream);
32+
}
33+
2834
@Override
2935
public void readBytes(final byte[] bytes) {
3036
binaryCodec.readBytes(bytes);

src/main/java/htsjdk/samtools/IndexFileBufferFactory.java

+10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package htsjdk.samtools;
22

3+
import htsjdk.samtools.seekablestream.SeekableStream;
34
import htsjdk.samtools.util.IOUtil;
45
import htsjdk.samtools.util.RuntimeIOException;
56

@@ -18,4 +19,13 @@ static IndexFileBuffer getBuffer(File file, boolean enableMemoryMapping) {
1819

1920
return isCompressed ? new CompressedIndexFileBuffer(file) : (enableMemoryMapping ? new MemoryMappedFileBuffer(file) : new RandomAccessFileBuffer(file));
2021
}
22+
23+
static IndexFileBuffer getBuffer(SeekableStream seekableStream) {
24+
boolean isCompressed;
25+
isCompressed = IOUtil.isGZIPInputStream(seekableStream);
26+
27+
return isCompressed ?
28+
new CompressedIndexFileBuffer(seekableStream) :
29+
new IndexStreamBuffer(seekableStream);
30+
}
2131
}

src/main/java/htsjdk/samtools/SamIndexes.java

+33
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
55
import htsjdk.samtools.seekablestream.SeekableStream;
66
import htsjdk.samtools.util.FileExtensions;
7+
import htsjdk.samtools.util.IOUtil;
8+
import htsjdk.samtools.util.RuntimeIOException;
79

810
import java.io.BufferedInputStream;
911
import java.io.File;
1012
import java.io.IOException;
1113
import java.io.InputStream;
1214
import java.net.URL;
15+
import java.util.zip.GZIPInputStream;
1316

1417
/**
1518
* A helper class to read BAI and CRAI indexes. Main goal is to provide BAI stream as a sort of common API for all index types.
@@ -102,6 +105,36 @@ public static SeekableStream asBaiSeekableStreamOrNull(final SeekableStream inpu
102105
return null;
103106
}
104107

108+
public static SamIndexes getSAMIndexTypeFromStream(final SeekableStream seekableStream) {
109+
SamIndexes indexType = null;
110+
try {
111+
seekableStream.seek(0);
112+
final SeekableBufferedStream bss = new SeekableBufferedStream(seekableStream);
113+
114+
if (IOUtil.isGZIPInputStream(bss)) {
115+
bss.seek(0);
116+
GZIPInputStream gzipStream = new GZIPInputStream(bss);
117+
if (doesStreamStartWith(gzipStream, CSI.magic)) {
118+
indexType = CSI;
119+
} else {
120+
// the CRAI format has no signature bytes, so optimistically call it CRAI
121+
// if its gzipped but not CSI
122+
indexType = CRAI;
123+
}
124+
} else {
125+
bss.seek(0);
126+
if (doesStreamStartWith(bss, BAI.magic)) {
127+
indexType = BAI;
128+
}
129+
}
130+
seekableStream.seek(0);
131+
} catch (final IOException e) {
132+
throw new RuntimeIOException("Error interrogating index input stream", e);
133+
}
134+
135+
return indexType;
136+
}
137+
105138
private static boolean doesStreamStartWith(final InputStream is, final byte[] bytes) throws IOException {
106139
for (final byte b : bytes) {
107140
if (is.read() != (0xFF & b)) {

src/test/java/htsjdk/samtools/BAMFileReaderTest.java

+19-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package htsjdk.samtools;
22

33
import htsjdk.HtsjdkTest;
4-
import htsjdk.samtools.seekablestream.ByteArraySeekableStream;
54
import htsjdk.samtools.util.CloseableIterator;
65
import htsjdk.samtools.util.CoordMath;
76
import org.testng.Assert;
@@ -10,7 +9,8 @@
109

1110
import java.io.File;
1211
import java.io.IOException;
13-
import java.util.List;
12+
import java.net.URL;
13+
import java.nio.file.Paths;
1414

1515
public class BAMFileReaderTest extends HtsjdkTest {
1616
private final static File bamFile = new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam");
@@ -36,6 +36,23 @@ public void init() throws IOException {
3636
bamFileReaderNull = new BAMFileReader(bamFile, null, true, false, ValidationStringency.DEFAULT_STRINGENCY, DefaultSAMRecordFactory.getInstance());
3737
}
3838

39+
@Test
40+
public static void testCSIFromURL() throws IOException {
41+
// https://github.com/samtools/htsjdk/issues/1507
42+
final URL bamURL = Paths.get(bamFile.toURI()).toUri().toURL();
43+
final URL csiURL = Paths.get(csiFileIndex.toURI()).toUri().toURL();
44+
final SamInputResource resource = SamInputResource.of(bamURL).index(csiURL);
45+
final SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
46+
try (final SamReader samReader = factory.open(resource)) {
47+
Assert.assertTrue(samReader.hasIndex());
48+
final BAMIndex index = samReader.indexing().getIndex();
49+
Assert.assertTrue(index instanceof CSIIndex);
50+
try (final SAMRecordIterator unusedIterator =
51+
samReader.queryAlignmentStart("chr1_random", 1)) {}
52+
try (final SAMRecordIterator unusedIterator = samReader.queryUnmapped()) {}
53+
}
54+
}
55+
3956
@Test
4057
public static void testGetIndexTypeOK() {
4158
BAMIndexMetaData.printIndexStats(bamFile);

src/test/java/htsjdk/samtools/SamIndexesTest.java

+20-1
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
import htsjdk.samtools.seekablestream.SeekableFileStream;
66
import htsjdk.samtools.seekablestream.SeekableMemoryStream;
77
import htsjdk.samtools.seekablestream.SeekableStream;
8+
import htsjdk.samtools.seekablestream.SeekableStreamFactory;
89
import htsjdk.samtools.util.IOUtil;
910
import org.testng.Assert;
11+
import org.testng.annotations.DataProvider;
1012
import org.testng.annotations.Test;
1113

1214
import java.io.ByteArrayInputStream;
@@ -188,4 +190,21 @@ public void testOpenIndexUrlAsBaiOrNull() throws IOException {
188190
Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset());
189191
Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1);
190192
}
191-
}
193+
194+
@DataProvider(name = "getSAMIndexTypeFromStreamTests")
195+
public Object[][] getSAMIndexTypeFromStreamTests() {
196+
return new Object[][]{
197+
{ new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.bai"), SamIndexes.BAI },
198+
{ new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.csi"), SamIndexes.CSI },
199+
{ new File("src/test/resources/htsjdk/samtools/cram/cramQueryWithCRAI.cram.crai"), SamIndexes.CRAI},
200+
};
201+
}
202+
203+
@Test(dataProvider = "getSAMIndexTypeFromStreamTests")
204+
public void testGetSAMIndexTypeFromStream(final File indexFile, final SamIndexes expectedIndexType) throws IOException {
205+
try (final SeekableStream seekableStream = SeekableStreamFactory.getInstance().getStreamFor(indexFile.getPath())) {
206+
Assert.assertEquals(SamIndexes.getSAMIndexTypeFromStream(seekableStream),expectedIndexType);
207+
Assert.assertEquals(seekableStream.position(), 0);
208+
}
209+
}
210+
}

0 commit comments

Comments
 (0)