Skip to content

Commit 2b3f2ab

Browse files
committed
DGEN2.jar reinstantiates GT probability class for case found of bug when used for many taxa
Found internally that gene tree probabilities were not being calculated correctly in a case with many taxa. This was due to not reinstantiating the class that calculates the gene tree probabilities before passing in a network with new branch lengths. The class requires reinstantiating because of its typical use case where it saves things in an internal state.
1 parent 5a1feb7 commit 2b3f2ab

File tree

3 files changed

+13
-5
lines changed

3 files changed

+13
-5
lines changed

CommandLineFiles/DoWholePaper.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
from RunDGEN import *
22

3-
i = 0.01
4-
j = 'cat'
5-
6-
h = j/i
7-
83
k = 0
94

5+
#8 tax - sites ignored? (was just using this line for tsting, prolly shouldnt rerun)
6+
#run_saved_dgen('/Users/leo/rice/res/data/dgen/tmp/figBig8/stat_8tax.txt', ['/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim1/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim2/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim3/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim4/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim5/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim6/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim7/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim8/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim9/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim10/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim11/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim12/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim13/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim14/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim15/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim16/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim17/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim18/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim19/seqfileNamed','/Users/leo/rice/res/data/dgen/simulations/12taxa/bigNetwork/firstTry/sim20/seqfileNamed'], verbose=True, plot='/Users/leo/rice/res/data/dgen/tmp/figBig8/plot_big_8', meta='Dgen')
7+
108
# cichlid alignment - i put the commands for these in the java code
119
#run_saved_dgen('/Users/leo/rice/res/data/dgen/tmp/figC/stat_cichlid.txt', ['/Users/leo/rice/res/data/cichlid/alignment/cichlid6tax.phylip-sequential.txt'], verbose=False, plot='/Users/leo/rice/res/data/dgen/tmp/figC/plot_figC', meta='Dgen')
1210
run_saved_dgen('/Users/leo/rice/res/data/dgen/tmp/figC/stat_cichlid.txt', ['/Users/leo/rice/res/data/cichlid/alignment/cichlid6tax.phylip-sequential.txt'], verbose=True, plot='/Users/leo/rice/res/data/dgen/tmp/figC/plot_figCVerbose', meta='Dgen')

CommandLineFiles/RunDGEN.py

+10
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,13 @@ def calculate_windows_to_DGEN(alignments, taxa_order, outgroup, list_of_tree_and
283283
elif bases.issubset(possibleBases) == False:
284284
num_ignored += 1
285285

286+
#final catch all else for ignored sites (forgot to add back in check for non biallelic (they were ignored but not counted in verbose output. this line now properly counts them for verbose mode))
287+
#includes also sites that only have all A's or whatever, basically anything non strictly biallelic
288+
else:
289+
num_ignored += 1
290+
291+
#should i add count here for sites that specifically violate biallelic as in 3 or 4 dif letters? (len(bases)>2)?
292+
286293
# Increment the site index
287294
site_idx += 1
288295

@@ -297,6 +304,9 @@ def calculate_windows_to_DGEN(alignments, taxa_order, outgroup, list_of_tree_and
297304
lineAdd.append(groupCount)
298305
list_of_tree_and_net_invariants_counts.append(lineAdd)
299306

307+
#for debugging, how many sites are actually observed that go into the observed calculations?
308+
309+
300310
# calculate new version of chi squared test
301311
# chi squared = sum of (obs - exp)^2 / exp
302312
# obs is Na and exp is Nt * |Na| / |Nt|

module/DGEN2.jar

43.3 MB
Binary file not shown.

0 commit comments

Comments
 (0)