|
1 | 1 | import argparse
|
2 | 2 | import math
|
| 3 | +import os |
3 | 4 |
|
4 | 5 | import MDAnalysis as mda
|
5 | 6 |
|
6 | 7 | # import numpy as np
|
7 | 8 | import pandas as pd
|
| 9 | +import yaml |
8 | 10 |
|
9 | 11 | from CodeEntropy import EntropyFunctions as EF
|
10 | 12 | from CodeEntropy import LevelFunctions as LF
|
11 | 13 | from CodeEntropy import MDAUniverseHelper as MDAHelper
|
12 | 14 |
|
13 | 15 | # from datetime import datetime
|
14 | 16 |
|
| 17 | +arg_map = { |
| 18 | + "top_traj_file": { |
| 19 | + "type": str, |
| 20 | + "nargs": "+", |
| 21 | + "help": "Path to Structure/topology file followed by Trajectory file(s)", |
| 22 | + "default": [], |
| 23 | + }, |
| 24 | + "selection_string": { |
| 25 | + "type": str, |
| 26 | + "help": "Selection string for CodeEntropy", |
| 27 | + "default": "all", |
| 28 | + }, |
| 29 | + "start": { |
| 30 | + "type": int, |
| 31 | + "help": "Start analysing the trajectory from this frame index", |
| 32 | + "default": 0, |
| 33 | + }, |
| 34 | + "end": { |
| 35 | + "type": int, |
| 36 | + "help": "Stop analysing the trajectory at this frame index", |
| 37 | + "default": -1, |
| 38 | + }, |
| 39 | + "step": { |
| 40 | + "type": int, |
| 41 | + "help": "Interval between two consecutive frames to be read index", |
| 42 | + "default": 1, |
| 43 | + }, |
| 44 | + "bin_width": { |
| 45 | + "type": int, |
| 46 | + "help": "Bin width in degrees for making the histogram", |
| 47 | + "default": 30, |
| 48 | + }, |
| 49 | + "tempra": { |
| 50 | + "type": float, |
| 51 | + "help": "Temperature for entropy calculation (K)", |
| 52 | + "default": 298.0, |
| 53 | + }, |
| 54 | + "verbose": { |
| 55 | + "type": bool, |
| 56 | + "help": "True/False flag for noisy or quiet output", |
| 57 | + "default": False, |
| 58 | + }, |
| 59 | + "thread": {"type": int, "help": "How many multiprocess to use", "default": 1}, |
| 60 | + "outfile": { |
| 61 | + "type": str, |
| 62 | + "help": "Name of the file where the output will be written", |
| 63 | + "default": "outfile.out", |
| 64 | + }, |
| 65 | + "resfile": { |
| 66 | + "type": str, |
| 67 | + "help": "Name of the file where the residue entropy output will be written", |
| 68 | + "default": "res_outfile.out", |
| 69 | + }, |
| 70 | + "mout": { |
| 71 | + "type": str, |
| 72 | + "help": "Name of the file where certain matrices will be written", |
| 73 | + "default": None, |
| 74 | + }, |
| 75 | + "force_partitioning": {"type": float, "help": "Force partitioning", "default": 0.5}, |
| 76 | + "waterEntropy": {"type": bool, "help": "Calculate water entropy", "default": False}, |
| 77 | +} |
| 78 | + |
| 79 | + |
| 80 | +def load_config(file_path): |
| 81 | + """Load YAML configuration file.""" |
| 82 | + if not os.path.exists(file_path): |
| 83 | + raise FileNotFoundError(f"Configuration file '{file_path}' not found.") |
| 84 | + |
| 85 | + with open(file_path, "r") as file: |
| 86 | + config = yaml.safe_load(file) |
| 87 | + |
| 88 | + # If YAML content is empty, return an empty dictionary |
| 89 | + if config is None: |
| 90 | + config = {} |
| 91 | + |
| 92 | + return config |
| 93 | + |
| 94 | + |
| 95 | +def setup_argparse(): |
| 96 | + """Setup argument parsing dynamically based on arg_map.""" |
| 97 | + parser = argparse.ArgumentParser( |
| 98 | + description="CodeEntropy: Entropy calculation with MCC method." |
| 99 | + ) |
| 100 | + |
| 101 | + for arg, properties in arg_map.items(): |
| 102 | + kwargs = {key: properties[key] for key in properties if key != "help"} |
| 103 | + parser.add_argument(f"--{arg}", **kwargs, help=properties.get("help")) |
| 104 | + |
| 105 | + return parser |
| 106 | + |
| 107 | + |
| 108 | +def merge_configs(args, run_config): |
| 109 | + """Merge CLI arguments with YAML configuration.""" |
| 110 | + if run_config is None: |
| 111 | + run_config = {} |
| 112 | + |
| 113 | + if not isinstance(run_config, dict): |
| 114 | + raise TypeError("run_config must be a dictionary or None.") |
| 115 | + |
| 116 | + # Step 1: Merge YAML configuration into args |
| 117 | + for key, value in run_config.items(): |
| 118 | + if getattr(args, key, None) is None: |
| 119 | + setattr(args, key, value) |
| 120 | + |
| 121 | + # Step 2: Set default values for any missing arguments from `arg_map` |
| 122 | + for key, params in arg_map.items(): |
| 123 | + if getattr(args, key, None) is None: |
| 124 | + setattr(args, key, params.get("default")) |
| 125 | + |
| 126 | + # Step 3: Override with CLI values if provided |
| 127 | + for key in arg_map.keys(): |
| 128 | + cli_value = getattr(args, key, None) |
| 129 | + if cli_value is not None: |
| 130 | + run_config[key] = cli_value |
| 131 | + |
| 132 | + return args |
| 133 | + |
15 | 134 |
|
16 | 135 | def main():
|
17 | 136 | """
|
18 | 137 | Main function for calculating the entropy of a system using the multiscale cell
|
19 | 138 | correlation method.
|
20 | 139 | """
|
21 |
| - |
22 | 140 | try:
|
23 |
| - parser = argparse.ArgumentParser( |
24 |
| - description=""" |
25 |
| - CodeEntropy-POSEIDON is a tool to compute entropy using the |
26 |
| - multiscale-cell-correlation (MCC) theory and force/torque covariance |
27 |
| - methods with the ablity to compute solvent entropy. |
28 |
| - Version: |
29 |
| - 0.3.1; |
30 |
| -
|
31 |
| - Authors: |
32 |
| - Arghya Chakravorty (arghya90), |
33 |
| - Jas Kalayan (jkalayan), |
34 |
| - Donald Chang, |
35 |
| - Sarah Fegan |
36 |
| - Ioana Papa; |
37 |
| -
|
38 |
| - Output: |
39 |
| - *.csv = results from different calculateion, |
40 |
| - *.pkl - Pickled reduced universe for further analysis, |
41 |
| - *.out - detailed output such as matrix and spectra""" |
42 |
| - ) |
43 |
| - |
44 |
| - parser.add_argument( |
45 |
| - "-f", |
46 |
| - "--top_traj_file", |
47 |
| - required=True, |
48 |
| - dest="filePath", |
49 |
| - action="store", |
50 |
| - nargs="+", |
51 |
| - help="Path to Structure/topology file (AMBER PRMTOP, GROMACS TPR which " |
52 |
| - "contains topology and dihedral information) followed by Trajectory " |
53 |
| - "file(s) (AMBER NETCDF or GROMACS TRR) you will need to output the " |
54 |
| - "coordinates and forces to the same file. Required.", |
55 |
| - ) |
56 |
| - parser.add_argument( |
57 |
| - "-l", |
58 |
| - "--selectString", |
59 |
| - action="store", |
60 |
| - dest="selection_string", |
61 |
| - type=str, |
62 |
| - default="all", |
63 |
| - help="Selection string for CodeEntropy such as protein or resid, refer to " |
64 |
| - "MDAnalysis.select_atoms for more information.", |
65 |
| - ) |
66 |
| - parser.add_argument( |
67 |
| - "-b", |
68 |
| - "--begin", |
69 |
| - action="store", |
70 |
| - dest="start", |
71 |
| - help="Start analysing the trajectory from this frame index. Defaults to 0", |
72 |
| - default=0, |
73 |
| - type=int, |
74 |
| - ) |
75 |
| - parser.add_argument( |
76 |
| - "-e", |
77 |
| - "--end", |
78 |
| - action="store", |
79 |
| - dest="end", |
80 |
| - help="Stop analysing the trajectory at this frame index. Defaults to -1 " |
81 |
| - "(end of trajectory file)", |
82 |
| - default=-1, |
83 |
| - type=int, |
84 |
| - ) |
85 |
| - parser.add_argument( |
86 |
| - "-d", |
87 |
| - "--step", |
88 |
| - action="store", |
89 |
| - dest="step", |
90 |
| - help="interval between two consecutive frames to be read index. " |
91 |
| - "Defaults to 1", |
92 |
| - default=1, |
93 |
| - type=int, |
94 |
| - ) |
95 |
| - parser.add_argument( |
96 |
| - "-n", |
97 |
| - "--bin_width", |
98 |
| - action="store", |
99 |
| - dest="bin_width", |
100 |
| - default=30, |
101 |
| - type=int, |
102 |
| - help="Bin width in degrees for making the histogram of the dihedral angles " |
103 |
| - "for the conformational entropy. Default: 30", |
104 |
| - ) |
105 |
| - parser.add_argument( |
106 |
| - "-k", |
107 |
| - "--tempra", |
108 |
| - action="store", |
109 |
| - dest="temp", |
110 |
| - help="Temperature for entropy calculation (K). Default to 298.0 K", |
111 |
| - default=298.0, |
112 |
| - type=float, |
113 |
| - ) |
114 |
| - parser.add_argument( |
115 |
| - "-v", |
116 |
| - "--verbose", |
117 |
| - action="store", |
118 |
| - dest="verbose", |
119 |
| - default=False, |
120 |
| - type=bool, |
121 |
| - help="True/False flag for noisy or quiet output. Default: False", |
122 |
| - ) |
123 |
| - parser.add_argument( |
124 |
| - "-t", |
125 |
| - "--thread", |
126 |
| - action="store", |
127 |
| - dest="thread", |
128 |
| - help="How many multiprocess to use. Default 1 for single core execution.", |
129 |
| - default=1, |
130 |
| - type=int, |
131 |
| - ) |
132 |
| - parser.add_argument( |
133 |
| - "-o", |
134 |
| - "--out", |
135 |
| - action="store", |
136 |
| - dest="outfile", |
137 |
| - default="outfile.out", |
138 |
| - help="Name of the file where the output will be written. " |
139 |
| - "Default: outfile.out", |
140 |
| - ) |
141 |
| - parser.add_argument( |
142 |
| - "-r", |
143 |
| - "--resout", |
144 |
| - action="store", |
145 |
| - dest="resfile", |
146 |
| - default="res_outfile.out", |
147 |
| - help="Name of the file where the residue entropy output will be written. " |
148 |
| - "Default: res_outfile.out", |
149 |
| - ) |
150 |
| - parser.add_argument( |
151 |
| - "-m", |
152 |
| - "--mout", |
153 |
| - action="store", |
154 |
| - dest="moutfile", |
155 |
| - default=None, |
156 |
| - help="Name of the file where certain matrices will be written " |
157 |
| - "(default: None).", |
158 |
| - ) |
159 |
| - |
160 |
| - parser.add_argument( |
161 |
| - "-c", |
162 |
| - "--cutShell", |
163 |
| - action="store", |
164 |
| - dest="cutShell", |
165 |
| - default=None, |
166 |
| - type=float, |
167 |
| - help="include cutoff shell analysis, add cutoff distance in angstrom " |
168 |
| - "Default None will ust the RAD Algorithm", |
169 |
| - ) |
170 |
| - parser.add_argument( |
171 |
| - "-p", |
172 |
| - "--pureAtomNum", |
173 |
| - action="store", |
174 |
| - dest="puteAtomNum", |
175 |
| - default=1, |
176 |
| - type=int, |
177 |
| - help="Reference molecule resid for system of pure liquid. " "Default to 1", |
178 |
| - ) |
179 |
| - parser.add_argument( |
180 |
| - "-x", |
181 |
| - "--excludedResnames", |
182 |
| - dest="excludedResnames", |
183 |
| - action="store", |
184 |
| - nargs="+", |
185 |
| - default=None, |
186 |
| - help="exclude a list of molecule names from nearest non-like analysis. " |
187 |
| - "Default: None. Multiples are gathered into list.", |
188 |
| - ) |
189 |
| - parser.add_argument( |
190 |
| - "-w", |
191 |
| - "--water", |
192 |
| - dest="waterResnames", |
193 |
| - action="store", |
194 |
| - default="WAT", |
195 |
| - nargs="+", |
196 |
| - help="resname for water molecules. " |
197 |
| - "Default: WAT. Multiples are gathered into list.", |
198 |
| - ) |
199 |
| - parser.add_argument( |
200 |
| - "-s", |
201 |
| - "--solvent", |
202 |
| - dest="solventResnames", |
203 |
| - action="store", |
204 |
| - nargs="+", |
205 |
| - default=None, |
206 |
| - help="include resname of solvent molecules (case-sensitive) " |
207 |
| - "Default: None. Multiples are gathered into list.", |
208 |
| - ) |
209 |
| - parser.add_argument( |
210 |
| - "--solContact", |
211 |
| - action="store_true", |
212 |
| - dest="doSolContact", |
213 |
| - default=False, |
214 |
| - help="Do solute contact calculation", |
215 |
| - ) |
| 141 | + config = load_config("config.yaml") |
| 142 | + |
| 143 | + if config is None: |
| 144 | + raise ValueError( |
| 145 | + "No configuration file found, and no CLI arguments were provided." |
| 146 | + ) |
| 147 | + |
| 148 | + parser = setup_argparse() |
| 149 | + args, unknown = parser.parse_known_args() |
| 150 | + |
| 151 | + # Process each run in the YAML configuration |
| 152 | + for run_name, run_config in config.items(): |
| 153 | + if isinstance(run_config, dict): |
| 154 | + # Merging CLI arguments with YAML configuration |
| 155 | + args = merge_configs(args, run_config) |
| 156 | + |
| 157 | + # Ensure necessary arguments are provided |
| 158 | + if not getattr(args, "top_traj_file"): |
| 159 | + raise ValueError( |
| 160 | + "The 'top_traj_file' argument is required but not provided." |
| 161 | + ) |
| 162 | + if not getattr(args, "selection_string"): |
| 163 | + raise ValueError( |
| 164 | + "The 'selection_string' argument is required but not provided." |
| 165 | + ) |
216 | 166 |
|
217 |
| - args = parser.parse_args() |
218 |
| - except argparse.ArgumentError: |
219 |
| - print("Command line arguments are ill-defined, please check the arguments") |
| 167 | + # REPLACE INPUTS |
| 168 | + print(f"Printing all input for {run_name}") |
| 169 | + for arg in vars(args): |
| 170 | + print(f" {arg}: {getattr(args, arg) or ''}") |
| 171 | + else: |
| 172 | + print(f"Run configuration for {run_name} is not a dictionary.") |
| 173 | + except ValueError as e: |
| 174 | + print(e) |
220 | 175 | raise
|
221 | 176 |
|
222 |
| - # REPLACE INPUTS |
223 |
| - print("printing all input") |
224 |
| - for arg in vars(args): |
225 |
| - print(" {} {}".format(arg, getattr(args, arg) or "")) |
226 |
| - |
227 | 177 | # startTime = datetime.now()
|
228 | 178 |
|
229 | 179 | # Get topology and trajectory file names and make universe
|
230 |
| - tprfile = args.filePath[0] |
231 |
| - trrfile = args.filePath[1:] |
| 180 | + tprfile = args.top_traj_file[0] |
| 181 | + trrfile = args.top_traj_file[1:] |
232 | 182 | u = mda.Universe(tprfile, trrfile)
|
233 | 183 |
|
234 | 184 | # Define bin_width for histogram from inputs
|
|
0 commit comments