Skip to content

Commit aec89f9

Browse files
committed
Support un-normalized input VCFs
1 parent 72e97ec commit aec89f9

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

vcf2vcf.pl

+5-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
# E.g. Get DP:AD from tumor/normal BAMs for a TGG-insertion at GRCh38 loci 21:46302071-46302072:
2525
# ::NOTE:: This returns multiple lines+alleles, so the matching allele needs to be parsed out
26-
# samtools mpileup --region chr21:46302071-46302071 --count-orphans --no-BAQ --min-MQ 1 --min-BQ 20 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --BCF --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref /ifs/depot/assemblies/H.sapiens/GRCh38_GDC/GRCh38.d1.vd1.fa /ifs/tcga/gdc/files/95a08b09-c46a-458c-bd21-deb43a309b00/69f9c49d8f6376a7092cff2a3bd2922b_gdc_realn.bam /ifs/tcga/gdc/files/47ac9742-74bc-4d76-a2ac-46c708e9cbbd/e30ade9704fbc29ccd9e6b69c91db237_gdc_realn.bam | bcftools norm --fasta-ref /ifs/depot/assemblies/H.sapiens/GRCh38_GDC/GRCh38.d1.vd1.fa
26+
# samtools mpileup --region chr21:46302071-46302071 --count-orphans --no-BAQ --min-MQ 1 --min-BQ 5 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --VCF --uncompressed --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref /ifs/depot/assemblies/H.sapiens/GRCh38_GDC/GRCh38.d1.vd1.fa /ifs/tcga/gdc/files/95a08b09-c46a-458c-bd21-deb43a309b00/69f9c49d8f6376a7092cff2a3bd2922b_gdc_realn.bam /ifs/tcga/gdc/files/47ac9742-74bc-4d76-a2ac-46c708e9cbbd/e30ade9704fbc29ccd9e6b69c91db237_gdc_realn.bam
2727

2828
# Define FILTER descriptors that we'll add if user specified the --add-filters option
2929
my ( $min_tum_depth, $min_nrm_depth ) = ( 14, 8 );
@@ -212,7 +212,7 @@
212212
$nrm_info{DP} = $nrm_info{AD} = $nrm_info{ADF} = $nrm_info{ADR} = ".";
213213

214214
# Generate mpileup and parse out only DP,AD,ADF,ADR for tumor/normal samples
215-
my @p_lines = `samtools mpileup --region $chrom:$pos-$pos --count-orphans --no-BAQ --min-MQ 1 --min-BQ 20 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --BCF --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref $ref_fasta $tumor_bam $normal_bam 2> /dev/null | bcftools norm --fasta-ref $ref_fasta 2> /dev/null`;
215+
my @p_lines = `samtools mpileup --region $chrom:$pos-$pos --count-orphans --no-BAQ --min-MQ 1 --min-BQ 5 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --VCF --uncompressed --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref $ref_fasta $tumor_bam $normal_bam 2> /dev/null`;
216216

217217
my ( $p_vcf_tumor_idx, $p_vcf_normal_idx ) = ( 0, 1 );
218218
foreach my $p_line ( @p_lines ) {
@@ -244,12 +244,14 @@
244244

245245
# Parse out the mpileup REF/ALT alleles and match them to the input REF/ALT alleles
246246
my @p_alts = split( /,/, $p_alt );
247-
my %p_allele_idx = ( $alleles[0], 0 ); # The reference allele doesn't need to match
247+
my %p_allele_idx = ( $alleles[0], 0 ); # The reference allele is always the first one
248248
for( my $i = 1; $i <= $#alleles; ++$i ) {
249249
foreach my $p_alt ( @p_alts ) {
250250
# De-pad suffixed bps that are identical between ref/var alleles
251251
my $p_ref_norm = $p_ref;
252252
while( $p_ref_norm and $p_alt and substr( $p_ref_norm, -1, 1 ) eq substr( $p_alt, -1, 1 ) and $p_ref_norm ne $p_alt ) {
253+
# Just in case the input also has one or more suffixed bps
254+
last if( $p_ref_norm eq $alleles[0] and $p_alt eq $alleles[$i] );
253255
( $p_ref_norm, $p_alt ) = map{substr( $_, 0, -1 )} ( $p_ref_norm, $p_alt );
254256
}
255257
# Make sure that both REF and ALT alleles match, because deletions can vary

0 commit comments

Comments
 (0)