22
22
23
23
public class NewVCFLoader {
24
24
25
+ public static final String DEFAULT_VCF_INDEX_FILE = "/opt/local/hpds/vcfIndex.tsv" ;
26
+ public static final String DEFAULT_STORAGE_DIR = "/opt/local/hpds/all" ;
27
+ public static final String DEFAULT_MERGED_DIR = "/opt/local/hpds/merged" ;
25
28
private static Logger logger = LoggerFactory .getLogger (NewVCFLoader .class );
26
- private static File storageDir = null ;
27
- private static String storageDirStr = "/opt/local/hpds/all" ;
28
- private static String mergedDirStr = "/opt/local/hpds/merged" ;
29
29
30
- private static VariantIndexBuilder variantIndexBuilder = new VariantIndexBuilder ();
30
+ protected File indexFile ;
31
+ protected File storageDir ;
32
+ protected String storageDirStr ;
33
+ protected String mergedDirStr ;
34
+
35
+ protected VariantIndexBuilder variantIndexBuilder ;
31
36
32
37
// DO NOT CHANGE THIS unless you want to reload all the data everywhere.
33
- private static int CHUNK_SIZE = 1000 ;
38
+ protected static int CHUNK_SIZE = 1000 ;
34
39
35
40
/**
36
41
* @param args - if testing, this should be an array ['vcfIndexFile path', 'output storage dir', 'merged dir'].
37
42
* by default this will be [/opt/local/hpds/vcfIndex.tsv, "/opt/local/hpds/all", "/opt/local/hpds/merged" ].
38
43
*/
39
- public static void main (String [] args ) throws FileNotFoundException , IOException {
40
-
41
- File indexFile ;
44
+ public static void main (String [] args ) throws IOException {
45
+
46
+ NewVCFLoader vcfLoader ;
42
47
if (args != null && args .length >= 3 ) {
43
- logger .info ("Reading parameters from input - this is a test" );
44
- indexFile = new File (args [0 ]);
45
- storageDirStr = args [1 ];
46
- storageDir = new File (args [1 ]);
47
- mergedDirStr = args [2 ];
48
+ logger .info ("Reading parameters from input" );
49
+ vcfLoader = new NewVCFLoader (new File (args [0 ]), args [1 ], args [2 ]);
48
50
} else {
49
- indexFile = new File ("/opt/local/hpds/vcfIndex.tsv" );
50
- storageDir = new File (storageDirStr );
51
+ logger .info (args .length + " arguments provided" );
52
+ logger .info ("Using default values" );
53
+ vcfLoader = new NewVCFLoader ();
51
54
}
52
- loadVCFs ( indexFile );
55
+ vcfLoader . loadAndMerge ( );
53
56
}
54
57
55
- private static ExecutorService chunkWriteEx = Executors .newFixedThreadPool (1 );
58
+ protected void loadAndMerge () throws IOException {
59
+ createWalkers ();
60
+ loadVCFs ();
61
+ }
56
62
57
- private static ConcurrentHashMap <String , InfoStore > infoStoreMap = new ConcurrentHashMap <String , InfoStore >();
63
+ public NewVCFLoader () {
64
+ this .indexFile = new File (DEFAULT_VCF_INDEX_FILE );
65
+ this .storageDirStr = DEFAULT_STORAGE_DIR ;
66
+ this .storageDir = new File (DEFAULT_STORAGE_DIR );
67
+ this .mergedDirStr = DEFAULT_MERGED_DIR ;
68
+ this .variantIndexBuilder = new VariantIndexBuilder ();
69
+ }
58
70
59
- private static HashMap <String , char [][]> zygosityMaskStrings ;
71
+ public NewVCFLoader (File indexFile , String storageDir , String mergedDirStr ) {
72
+ this .indexFile = indexFile ;
73
+ this .storageDirStr = storageDir ;
74
+ this .storageDir = new File (storageDir );
75
+ this .mergedDirStr = mergedDirStr ;
76
+ this .variantIndexBuilder = new VariantIndexBuilder ();
77
+ }
60
78
61
- private static TreeMap < String , FileBackedJsonIndexStorage < Integer , ConcurrentHashMap < String , VariableVariantMasks >>> variantMaskStorage = new TreeMap <>( );
79
+ protected ExecutorService chunkWriteEx = Executors . newFixedThreadPool ( 1 );
62
80
63
- private static long startTime ;
81
+ protected ConcurrentHashMap < String , InfoStore > infoStoreMap = new ConcurrentHashMap < String , InfoStore >() ;
64
82
65
- private static List < VCFWalker > walkers = new ArrayList <>() ;
83
+ protected HashMap < String , char [][]> zygosityMaskStrings ;
66
84
67
- private static boolean contigIsHemizygous ;
85
+ protected TreeMap < String , FileBackedJsonIndexStorage < Integer , ConcurrentHashMap < String , VariableVariantMasks >>> variantMaskStorage = new TreeMap <>() ;
68
86
69
- private static void loadVCFs (File indexFile ) throws IOException {
87
+ protected long startTime ;
88
+
89
+ protected List <VCFWalker > walkers = new ArrayList <>();
90
+
91
+ private boolean contigIsHemizygous ;
92
+
93
+ protected void loadVCFs () throws IOException {
70
94
startTime = System .currentTimeMillis ();
71
- List <VCFIndexLine > vcfIndexLines = parseVCFIndex (indexFile );
72
- for (VCFIndexLine line : vcfIndexLines ) {
73
- walkers .add (new VCFWalker (line ));
74
- }
75
- TreeSet <Integer > allPatientIds = new TreeSet <Integer >();
95
+ TreeSet <Integer > allPatientIds = new TreeSet <>();
76
96
77
97
// Pull the INFO columns out of the headers for each walker and add all patient ids
78
98
walkers .stream ().forEach (walker -> {
@@ -232,7 +252,14 @@ private static void loadVCFs(File indexFile) throws IOException {
232
252
saveVariantStore (store , variantMaskStorage );
233
253
}
234
254
235
- private static String sampleIdsForMask (String [] sampleIds , VariantMask variantMask ) {
255
+ private void createWalkers () {
256
+ List <VCFIndexLine > vcfIndexLines = parseVCFIndex (indexFile );
257
+ for (VCFIndexLine line : vcfIndexLines ) {
258
+ walkers .add (new VCFWalker (line ));
259
+ }
260
+ }
261
+
262
+ protected String sampleIdsForMask (String [] sampleIds , VariantMask variantMask ) {
236
263
StringBuilder idList = new StringBuilder ();
237
264
if (variantMask != null ) {
238
265
if (variantMask instanceof VariantMaskBitmaskImpl ) {
@@ -251,7 +278,7 @@ private static String sampleIdsForMask(String[] sampleIds, VariantMask variantMa
251
278
return idList .toString ();
252
279
}
253
280
254
- private static void flipChunk (String lastContigProcessed , int lastChunkProcessed , int currentChunk ,
281
+ protected void flipChunk (String lastContigProcessed , int lastChunkProcessed , int currentChunk ,
255
282
String currentContig , boolean isLastChunk , String currentLine ) throws IOException , FileNotFoundException {
256
283
if (!currentContig .contentEquals (lastContigProcessed ) || isLastChunk ) {
257
284
if (infoStoreFlipped .get (lastContigProcessed ) == null || !infoStoreFlipped .get (lastContigProcessed )) {
@@ -310,7 +337,7 @@ private static void flipChunk(String lastContigProcessed, int lastChunkProcessed
310
337
}
311
338
}
312
339
313
- private static void saveVariantStore (VariantStore store ,
340
+ protected void saveVariantStore (VariantStore store ,
314
341
TreeMap <String , FileBackedJsonIndexStorage <Integer , ConcurrentHashMap <String , VariableVariantMasks >>> variantMaskStorage )
315
342
throws IOException , FileNotFoundException {
316
343
store .setVariantMaskStorage (variantMaskStorage );
@@ -323,7 +350,7 @@ private static void saveVariantStore(VariantStore store,
323
350
logger .debug ("Done saving variant masks." );
324
351
}
325
352
326
- private static void saveInfoStores () throws IOException , FileNotFoundException {
353
+ protected void saveInfoStores () throws IOException , FileNotFoundException {
327
354
logger .debug ("Saving info" + (System .currentTimeMillis () - startTime ) + " seconds" );
328
355
try (FileOutputStream fos = new FileOutputStream (new File (storageDir , "infoStores.javabin" ));
329
356
GZIPOutputStream gzos = new GZIPOutputStream (fos );
@@ -334,7 +361,7 @@ private static void saveInfoStores() throws IOException, FileNotFoundException {
334
361
logger .info ("completed load in " + (System .currentTimeMillis () - startTime ) + " seconds" );
335
362
}
336
363
337
- public static void splitInfoStoresByColumn () throws FileNotFoundException , IOException {
364
+ public void splitInfoStoresByColumn () throws FileNotFoundException , IOException {
338
365
logger .debug ("Splitting" + (System .currentTimeMillis () - startTime ) + " seconds" );
339
366
try {
340
367
VCFPerPatientInfoStoreSplitter .splitAll (storageDir , new File (mergedDirStr ));
@@ -344,7 +371,7 @@ public static void splitInfoStoresByColumn() throws FileNotFoundException, IOExc
344
371
logger .debug ("Split" + (System .currentTimeMillis () - startTime ) + " seconds" );
345
372
}
346
373
347
- public static void convertInfoStoresToByteIndexed () throws FileNotFoundException , IOException {
374
+ public void convertInfoStoresToByteIndexed () throws FileNotFoundException , IOException {
348
375
logger .debug ("Converting" + (System .currentTimeMillis () - startTime ) + " seconds" );
349
376
try {
350
377
VCFPerPatientInfoStoreToFBBIISConverter .convertAll (mergedDirStr , storageDirStr );
@@ -354,7 +381,7 @@ public static void convertInfoStoresToByteIndexed() throws FileNotFoundException
354
381
logger .debug ("Converted " + ((System .currentTimeMillis () - startTime ) / 1000 ) + " seconds" );
355
382
}
356
383
357
- private static void shutdownChunkWriteExecutor () {
384
+ protected void shutdownChunkWriteExecutor () {
358
385
chunkWriteEx .shutdown ();
359
386
while (!chunkWriteEx .isTerminated ()) {
360
387
try {
@@ -377,16 +404,16 @@ private static ConcurrentHashMap<String, VariableVariantMasks> convertLoadingMap
377
404
378
405
static TreeMap <String , Boolean > infoStoreFlipped = new TreeMap <String , Boolean >();
379
406
380
- private static class VCFWalker implements Comparable <VCFWalker > {
407
+ protected class VCFWalker implements Comparable <VCFWalker > {
381
408
382
- private List <Integer > indices ;
383
- private Integer [] vcfOffsets ;
384
- private Integer [] bitmaskOffsets ;
385
- private HashMap <Integer , Integer > vcfIndexLookup ;
386
- private String currentLine ;
387
- private String [] currentLineSplit ;
388
- private BufferedReader vcfReader ;
389
- private VCFIndexLine vcfIndexLine ;
409
+ protected List <Integer > indices ;
410
+ protected Integer [] vcfOffsets ;
411
+ protected Integer [] bitmaskOffsets ;
412
+ protected HashMap <Integer , Integer > vcfIndexLookup ;
413
+ protected String currentLine ;
414
+ protected String [] currentLineSplit ;
415
+ protected BufferedReader vcfReader ;
416
+ protected VCFIndexLine vcfIndexLine ;
390
417
boolean hasNext = true ;
391
418
String currentContig ;
392
419
Integer currentPosition ;
@@ -469,7 +496,7 @@ private void setMasksForSample(char[][] zygosityMaskStrings, int index, int star
469
496
zygosityMaskStrings [patientZygosityIndex ][bitmaskOffsets [index ]] = '1' ;
470
497
}
471
498
472
- private String currentSpecNotation () {
499
+ protected String currentSpecNotation () {
473
500
String [] variantInfo = currentLineSplit [7 ].split ("[=;]" );
474
501
String gene = "NULL" ;
475
502
String consequence = "NULL" ;
@@ -616,7 +643,7 @@ public int compareTo(VCFWalker o) {
616
643
private static final int SAMPLE_RELATIONSHIPS_COLUMN = 6 ;
617
644
private static final int RELATED_SAMPLE_IDS_COLUMN = 7 ;
618
645
619
- private static class VCFIndexLine implements Comparable <VCFIndexLine > {
646
+ protected static class VCFIndexLine implements Comparable <VCFIndexLine > {
620
647
String vcfPath ;
621
648
String contig ;
622
649
boolean isAnnotated ;
0 commit comments