-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathProtExcluder.npl
47 lines (24 loc) · 1.41 KB
/
ProtExcluder.npl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#! /usr/bin/perl -w
$usage = "Protexcluder.pl -f bpsofflankingtoremove(default=50) blastx/blastpfile fastafile \n";
# to exclude the portion matching protein subject in a nucleotide sequence file
if (@ARGV < 2) {die "$usage";}
use Getopt::Std;
getopts("f:");
$Len = defined $opt_f ? $opt_f : 50;
`rm -f $ARGV[1].ssi`;
`_prexc_matchtract.pl $ARGV[0] > $ARGV[0].mt`;
`_prexc_countaanu.pl $ARGV[0].mt > $ARGV[0].mtca`;
`_prexc_rmlowcomplexitymathc.pl $ARGV[0].mtca 3 60 > $ARGV[0].mtca_3_60`;
`_prexc_blastformatProt.pl $ARGV[0] > $ARGV[0].f`;
`_prexc_rmlowcomfromBF.pl $ARGV[0].mtca_3_60 $ARGV[0].f > $ARGV[0].fnolow`;
`sort -k 6,6 -k 3,3n $ARGV[0].fnolow > $ARGV[0].fnolows`;
`_prexc_mergequeryBF.pl $ARGV[0].fnolows $Len > $ARGV[0].fnolowm50`;
`_prexc_unmatchedregionBF.pl $ARGV[0].fnolowm50 $Len > $ARGV[0].fnolowm50MSP`;
`_prexc_mspesl-sfetch.pl $ARGV[1] $ARGV[0].fnolowm50MSP 0 $ARGV[0].fnolowm50seq`;
`_prexc_mergeunmatchedregion.pl $ARGV[0].fnolowm50seq > $ARGV[0].fnolowm50seqm`;
`_prexc_GCcontent.pl $ARGV[0].fnolowm50seqm > $ARGV[0].fnolowm50seqmGC`;
`_prexc_rmshortseq_noN.pl $ARGV[0].fnolowm50seqmGC $ARGV[0].fnolowm50seqm 50 > $ARGV[0].fnolowm50seqmns`;
`_prexc_getanycolumnuni.pl $ARGV[0].fnolow 6 > $ARGV[0].fnolowlist`;
`_prexc_rmlistedseq.pl $ARGV[0].fnolowlist $ARGV[1] > $ARGV[1]nPr`;
`cat $ARGV[1]nPr $ARGV[0].fnolowm50seqmns > temp`;
`_prexc_fasta-reformat.pl temp 50 > $ARGV[1]noProtFinal`;