Skip to content

Commit 2f82fa4

Browse files
committed
Fix: VCFs need semicolon delim FILTER data, and header lines
1 parent a507111 commit 2f82fa4

File tree

3 files changed

+10
-6
lines changed

3 files changed

+10
-6
lines changed

maf2vcf.pl

+6-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353

5454
# Before anything, let's parse the headers of this supposed "MAF-like" file and do some checks
5555
my $maf_fh = IO::File->new( $input_maf ) or die "ERROR: Couldn't open input MAF: $input_maf!\n";
56-
my ( %uniq_regions, %flanking_bps, @tn_pair, %col_idx, $header_line );
56+
my ( %uniq_regions, %filter_tags, %flanking_bps, @tn_pair, %col_idx, $header_line );
5757
while( my $line = $maf_fh->getline ) {
5858

5959
# If the file uses Mac OS 9 newlines, quit with an error
@@ -89,10 +89,12 @@
8989
( %col_idx ) or die "ERROR: Couldn't find a header line (must start with Hugo_Symbol, Chromosome, or Tumor_Sample_Barcode): $input_maf\n";
9090

9191
# For each variant in the MAF, parse out the locus for running samtools faidx later
92-
my ( $chr, $pos, $ref ) = map{ my $c = lc; ( defined $col_idx{$c} ? $cols[$col_idx{$c}] : "" )} qw( Chromosome Start_Position Reference_Allele );
92+
my ( $chr, $pos, $ref, $filter ) = map{ my $c = lc; ( defined $col_idx{$c} ? $cols[$col_idx{$c}] : "" )} qw( Chromosome Start_Position Reference_Allele FILTER );
9393
$ref =~ s/^(\?|-|0)+$//; # Blank out the dashes (or other weird chars) used with indels
9494
my $region = "$chr:" . ( $pos - 1 ) . "-" . ( $pos + length( $ref ));
9595
$uniq_regions{$region} = 1;
96+
# Also track the unique FILTER tags seen, so we can construct VCF header lines for each
97+
map{ $filter_tags{$_} = 1 unless( $_ eq "PASS" or $_ eq "." )} split( /,|;/, $filter );
9698
}
9799
$maf_fh->close;
98100

@@ -152,6 +154,7 @@
152154
$tn_vcf{$vcf_file} .= "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n";
153155
$tn_vcf{$vcf_file} .= "##FORMAT=<ID=AD,Number=G,Type=Integer,Description=\"Allelic depths of REF and ALT(s) in the order listed\">\n";
154156
$tn_vcf{$vcf_file} .= "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Total read depth across this site\">\n";
157+
$tn_vcf{$vcf_file} .= "##FILTER=<ID=$_,Description=\"\">\n" foreach ( sort keys %filter_tags );
155158
$tn_vcf{$vcf_file} .= "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t$t_id\t$n_id\n";
156159
}
157160

@@ -304,6 +307,7 @@
304307
$vcf_fh->print( "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n" );
305308
$vcf_fh->print( "##FORMAT=<ID=AD,Number=G,Type=Integer,Description=\"Allelic Depths of REF and ALT(s) in the order listed\">\n" );
306309
$vcf_fh->print( "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read Depth\">\n" );
310+
$vcf_fh->print( "##FILTER=<ID=$_,Description=\"\">\n" ) foreach ( sort keys %filter_tags );
307311
$vcf_fh->print( "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" . join("\t", @vcf_cols) . "\n" );
308312

309313
# Write each variant into the multi-sample VCF

vcf2maf.pl

+2-2
Original file line numberDiff line numberDiff line change
@@ -769,15 +769,15 @@ sub GetBiotypePriority {
769769
# Copy FILTER from input VCF, and tag calls with high allele counts in any ExAC subpopulation
770770
my $subpop_count = 0;
771771
# Remove existing common_variant tags from input, so it's redefined by our criteria here
772-
$filter = join( ",", grep{ $_ ne "common_variant" } split( ",", $filter ));
772+
$filter = join( ";", grep{ $_ ne "common_variant" } split( /,|;/, $filter ));
773773
foreach my $subpop ( qw( AFR AMR EAS FIN NFE OTH SAS )) {
774774
if( $maf_line{"ExAC_AC_AN_$subpop"} ) {
775775
my ( $subpop_ac ) = split( "/", $maf_line{"ExAC_AC_AN_$subpop"} );
776776
$subpop_count++ if( $subpop_ac > $max_filter_ac );
777777
}
778778
}
779779
if( $subpop_count > 0 ) {
780-
$filter = (( $filter eq "PASS" or $filter eq "." or !$filter ) ? "common_variant" : "$filter,common_variant" );
780+
$filter = (( $filter eq "PASS" or $filter eq "." or !$filter ) ? "common_variant" : "$filter;common_variant" );
781781
}
782782
$maf_line{FILTER} = $filter;
783783

vcf2vcf.pl

+2-2
Original file line numberDiff line numberDiff line change
@@ -300,15 +300,15 @@
300300
# Add more filter tags to the FILTER field, if --add-filters was specified
301301
if( $add_filters ) {
302302
my %tags = ();
303-
map{ $tags{$_} = 1 unless( $_ eq "PASS" or $_ eq "." )} split( ",", $filter );
303+
map{ $tags{$_} = 1 unless( $_ eq "PASS" or $_ eq "." )} split( /,|;/, $filter );
304304
$tags{LowTotalDepth} = 1 if(( $tum_info{DP} ne "." and $tum_info{DP} < $min_tum_depth ) or ( $nrm_info{DP} ne "." and $nrm_info{DP} < $min_nrm_depth ));
305305
my @tum_depths = split( /,/, $tum_info{AD} );
306306
my @nrm_depths = split( /,/, $nrm_info{AD} );
307307
my $tum_alt_depth = $tum_depths[$var_allele_idx];
308308
my $nrm_alt_depth = $nrm_depths[$var_allele_idx];
309309
$tags{LowTumorSupport} = 1 if( $tum_alt_depth ne "." and $tum_alt_depth < $min_tum_support );
310310
$tags{HighNormalSupport} = 1 if( $nrm_alt_depth ne "." and $nrm_alt_depth > $max_nrm_support );
311-
my $tags_to_add = join( ",", sort keys %tags );
311+
my $tags_to_add = join( ";", sort keys %tags );
312312
$filter = ( $tags_to_add ? $tags_to_add : $filter );
313313
}
314314

0 commit comments

Comments
 (0)