Add files via upload

genemine · web-flow · commit c729a6978746 · 2021-02-07T18:19:24.000+08:00
added -s and -m to specify the number of threads and the memory.
diff --git a/bam2intron b/bam2intron
@@ -7,6 +7,9 @@ intronbed=$2
 output=$3
 purename=$4
 MAPQ=$5
+threads=$6
+mem=$7
+
 
 # prepare out directory
 if [ ! -d $output ];then 
@@ -33,11 +36,11 @@ if [ ! -e "$bai" ]; then
 fi 
 
 # pre-filter bam using intron bed files
-samtools view -b -q $MAPQ -@ 20  -L $intronbed $bam > $umapbam
+samtools view -b -q $MAPQ -@ $threads  -L $intronbed $bam > $umapbam
 
 # covert to bed format
 #echo '--->   Converting bam to bed file'
-bam2bed —input bam -r tmp  -m 30G < "${umapbam}" |bedscore > $bambed
+bam2bed —input bam -r tmp  -m $mem < "${umapbam}" |bedscore > $bambed
 
 # get intron-spanning reads
 bedmap --bp-ovr 1  --skip-unmapped --echo --sum --echo-overlap-size --echo-map-id-uniq  $intronbed $bambed > $overlap
diff --git a/iread.py b/iread.py
@@ -24,6 +24,8 @@
 parser.add_argument('-t','--total_reads',help="The total number of mapped reads/fragments from read aligners, say 50 million. This is used to calculate FPKM. Users can use samtools to calculate the number of mapped reads from the input bam file. It's needed to be provided by user.")
 parser.add_argument('-k','--n_cores',help="The number of CPU cores to use. Default to n-2, where n is the total number of cores available.")
 parser.add_argument('-q','--MAPQ',help="The MAPQ score for retrieving uniquely mapped reads. Default to 255, which is the score for unique mapping reads in STAR. If other aligners such as Hisat or TopHat are used, change this score accordingly.")
+parser.add_argument('-s','--threads', help="Number of threads to use. Default to 20.")
+parser.add_argument('-m','--mem', help="memory to use (unit: G). Default to 30.")
 parser.add_argument('-b','--bias',help="an intron-length correction term for calculating FPKM of introns. This means that the length of intron used for calculating FPKM will be the true intron length plus this correction term. This is used to prevent very high FPKM for short introns. Default: 100.")
 
 args = parser.parse_args()   # parse command-line arguments
@@ -109,17 +111,30 @@
 	MAPQ = args.MAPQ
 else:
 	MAPQ = 255   # 255 for STAR
+# number of threads
+if args.threads:
+	threads = args.threads
+else:
+	threads = 20
+
 
 
 
+# memory settings
+if args.mem:
+	mem = args.mem
+else:
+	mem = 30
+
 # correction term for intron lengths
 if args.bias:
 	bias = args.bias
 else:
 	bias = 100
 
 # reduce bam to intronic regions
-cmd_reduce = 'bam2intron ' + ' ' + bam_file + ' ' + intron_file + ' ' + output_folder + ' ' + pure_file_name + ' ' +str(MAPQ)
+cmd_reduce = 'bam2intron ' + ' ' + bam_file + ' ' + intron_file + ' ' + output_folder + ' ' + pure_file_name + ' ' +str(MAPQ) + ' '+str(threads)+' '+str(mem)+'G'
+#print(cmd_reduce)
 os.system(cmd_reduce)
 
 # count intronic reads