|
23 | 23 |
|
24 | 24 | # E.g. Get DP:AD from tumor/normal BAMs for a TGG-insertion at GRCh38 loci 21:46302071-46302072:
|
25 | 25 | # ::NOTE:: This returns multiple lines+alleles, so the matching allele needs to be parsed out
|
26 |
| -# samtools mpileup --region chr21:46302071-46302071 --count-orphans --no-BAQ --min-MQ 1 --min-BQ 20 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --BCF --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref /ifs/depot/assemblies/H.sapiens/GRCh38_GDC/GRCh38.d1.vd1.fa /ifs/tcga/gdc/files/95a08b09-c46a-458c-bd21-deb43a309b00/69f9c49d8f6376a7092cff2a3bd2922b_gdc_realn.bam /ifs/tcga/gdc/files/47ac9742-74bc-4d76-a2ac-46c708e9cbbd/e30ade9704fbc29ccd9e6b69c91db237_gdc_realn.bam | bcftools norm --fasta-ref /ifs/depot/assemblies/H.sapiens/GRCh38_GDC/GRCh38.d1.vd1.fa |
| 26 | +# samtools mpileup --region chr21:46302071-46302071 --count-orphans --no-BAQ --min-MQ 1 --min-BQ 5 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --VCF --uncompressed --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref /ifs/depot/assemblies/H.sapiens/GRCh38_GDC/GRCh38.d1.vd1.fa /ifs/tcga/gdc/files/95a08b09-c46a-458c-bd21-deb43a309b00/69f9c49d8f6376a7092cff2a3bd2922b_gdc_realn.bam /ifs/tcga/gdc/files/47ac9742-74bc-4d76-a2ac-46c708e9cbbd/e30ade9704fbc29ccd9e6b69c91db237_gdc_realn.bam |
27 | 27 |
|
28 | 28 | # Define FILTER descriptors that we'll add if user specified the --add-filters option
|
29 | 29 | my ( $min_tum_depth, $min_nrm_depth ) = ( 14, 8 );
|
|
212 | 212 | $nrm_info{DP} = $nrm_info{AD} = $nrm_info{ADF} = $nrm_info{ADR} = ".";
|
213 | 213 |
|
214 | 214 | # Generate mpileup and parse out only DP,AD,ADF,ADR for tumor/normal samples
|
215 |
| - my @p_lines = `samtools mpileup --region $chrom:$pos-$pos --count-orphans --no-BAQ --min-MQ 1 --min-BQ 20 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --BCF --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref $ref_fasta $tumor_bam $normal_bam 2> /dev/null | bcftools norm --fasta-ref $ref_fasta 2> /dev/null`; |
| 215 | + my @p_lines = `samtools mpileup --region $chrom:$pos-$pos --count-orphans --no-BAQ --min-MQ 1 --min-BQ 5 --ignore-RG --excl-flags UNMAP,SECONDARY,QCFAIL,DUP --VCF --uncompressed --output-tags DP,AD,ADF,ADR --ext-prob 20 --gap-frac 0.002 --tandem-qual 80 --min-ireads 1 --open-prob 30 --fasta-ref $ref_fasta $tumor_bam $normal_bam 2> /dev/null`; |
216 | 216 |
|
217 | 217 | my ( $p_vcf_tumor_idx, $p_vcf_normal_idx ) = ( 0, 1 );
|
218 | 218 | foreach my $p_line ( @p_lines ) {
|
|
244 | 244 |
|
245 | 245 | # Parse out the mpileup REF/ALT alleles and match them to the input REF/ALT alleles
|
246 | 246 | my @p_alts = split( /,/, $p_alt );
|
247 |
| - my %p_allele_idx = ( $alleles[0], 0 ); # The reference allele doesn't need to match |
| 247 | + my %p_allele_idx = ( $alleles[0], 0 ); # The reference allele is always the first one |
248 | 248 | for( my $i = 1; $i <= $#alleles; ++$i ) {
|
249 | 249 | foreach my $p_alt ( @p_alts ) {
|
250 | 250 | # De-pad suffixed bps that are identical between ref/var alleles
|
251 | 251 | my $p_ref_norm = $p_ref;
|
252 | 252 | while( $p_ref_norm and $p_alt and substr( $p_ref_norm, -1, 1 ) eq substr( $p_alt, -1, 1 ) and $p_ref_norm ne $p_alt ) {
|
| 253 | + # Just in case the input also has one or more suffixed bps |
| 254 | + last if( $p_ref_norm eq $alleles[0] and $p_alt eq $alleles[$i] ); |
253 | 255 | ( $p_ref_norm, $p_alt ) = map{substr( $_, 0, -1 )} ( $p_ref_norm, $p_alt );
|
254 | 256 | }
|
255 | 257 | # Make sure that both REF and ALT alleles match, because deletions can vary
|
|
0 commit comments