forked from multi-ego/multi-eGO
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmultiego.py
465 lines (402 loc) · 18.9 KB
/
multiego.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
import argparse
import sys
import os
import numpy as np
from src.multiego import ensemble
from src.multiego import io
from tools.face_generator import generate_face
from src.multiego.resources.type_definitions import parse_json
def meGO_parsing():
"""
Parses command-line arguments for the multi-eGO model generation.
Returns:
argparse.Namespace: An object containing parsed arguments.
This function sets up an argument parser using the argparse library to handle command-line arguments
required for generating a multi-eGO model based on training simulations and reference simulations.
"""
parser = argparse.ArgumentParser(
prog="multiego.py",
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""\
Generates a multi-eGO model based on one or more training simulations
and their corresponding reference simulations. In most cases one single
parameter is required, --epsilon, that sets the maximum interaction energy
for a contact pair.
""",
epilog="""\
example usage:
1) generate a random coil prior model to generate the reference data for a single domain intramolecular interactions
> python multiego.py --system GB1 --egos rc
2) generate a production simulation using the reference data in the reference folder and the training data in the md_monomer folder
interaction energy is set to 0.3 kJ/mol
> python multiego.py --system GB1 --egos production --train md_monomer --epsilon 0.3
""",
)
# Required arguments
required_args = parser.add_argument_group("Required arguments")
required_args.add_argument(
"--system",
type=str,
required=True,
help="Name of the system corresponding to system input folder.",
)
required_args.add_argument(
"--egos",
choices=["rc", "production"],
required=True,
help="""\
rc: creates a force-field for random coil simulations.
production: creates a force-field combining random coil simulations and training simulations.
""",
)
# Optional arguments
optional_args = parser.add_argument_group("Optional arguments")
optional_args.add_argument(
"--epsilon",
type=float,
help="Maximum interaction energy per contact. The typical range is 0.2-0.4 kJ/mol",
)
optional_args.add_argument(
"--reference",
type=str,
default="reference",
help="""\
The folder including all the reference information needed to setup multi-eGO,
corresponding to the subfolder to process.
""",
)
optional_args.add_argument(
"--train",
nargs="+",
type=str,
default=[],
help="""\
A list of the training simulations to be included in multi-eGO,
corresponding to the subfolders to process and where the contacts are learned.
""",
)
optional_args.add_argument(
"--check",
nargs="+",
type=str,
default=[],
help="""\
A list of the simulations corresponding to the subfolders used to check
whether the contacts learned are compatible with those provided in here.
""",
)
optional_args.add_argument("--out", type=str, default="", help="Suffix for the output directory name.")
optional_args.add_argument(
"--inter_epsilon",
type=float,
help="Maximum interaction energy per intermolecular contacts. The typical range is 0.2-0.4 kJ/mol",
)
optional_args.add_argument(
"--inter_domain_epsilon",
type=float,
help="Maximum interaction energy per interdomain contacts. The typical range is 0.2-0.4 kJ/mol",
)
optional_args.add_argument(
"--p_to_learn",
type=float,
default=0.9995,
help="Fraction of training simulations to learn.",
)
optional_args.add_argument(
"--epsilon_min",
type=float,
default=0.07,
help="The minimum meaningful epsilon value.",
)
optional_args.add_argument(
"--force_split",
default=False,
action="store_true",
help="Split inter and intra-molecular interactions in the ffnonbonded and topology files.",
)
optional_args.add_argument(
"--single_molecule",
default=False,
action="store_true",
help="Enable optimisations valid if you are simulating a single molecule.",
)
optional_args.add_argument(
"--custom_dict",
type=str,
help="Custom dictionary for special molecules",
)
optional_args.add_argument(
"--custom_c12",
type=str,
help="Custom dictionary of c12 for special molecules",
)
optional_args.add_argument(
"--no_header",
action="store_true",
help="Removes headers from the output files when set",
)
parser.add_argument("--multi_epsi_intra", type=str, help="Path to the input file specifying the intra epsilons")
parser.add_argument("--multi_epsi_inter_domain", type=str, help="Path to the input file specifying the intra epsilons")
parser.add_argument("--multi_epsi_inter", type=str, help="Path to the input file specifying the inter epsilons")
optional_args.add_argument(
"--symmetry",
default="",
type=str,
help="Symmetry file for the system",
)
optional_args.add_argument(
"--f",
default=1,
type=float,
help="partition function normalization",
)
optional_args.add_argument(
"--inter_f",
default=1,
type=float,
help="partition function normalization inter-molecular",
)
optional_args.add_argument(
"--inter_domain_f",
default=1,
type=float,
help="partition function normalization inter_domain",
)
optional_args.add_argument(
"--relative_c12d", default=0.001, type=float, help="Relative deviation from default to set new replulsive c12"
)
args, remaining = parser.parse_known_args()
args.root_dir = os.path.dirname(os.path.abspath(__file__))
# Set inter_epsilon default to epsilon if epsilon is provided
if args.epsilon is not None and args.inter_epsilon is None:
setattr(args, "inter_epsilon", args.epsilon)
# Set inter_domain_epsilon default to epsilon if epsilon is provided
if args.epsilon is not None and args.inter_domain_epsilon is None:
setattr(args, "inter_domain_epsilon", args.epsilon)
# checking the options provided in the commandline
if args.egos != "rc" and not args.train:
print("--egos=production requires the list of folders containing the training simulations using the --train flag")
sys.exit()
if (args.epsilon is None and args.multi_epsi_intra is None) and args.egos != "rc":
print(
"--epsilon or --multi_epsi_intra is required when using --egos=production. The typical range is between 0.2 and 0.4 kJ/mol"
)
sys.exit()
if args.p_to_learn < 0.9:
print("WARNING: --p_to_learn should be large enough (suggested value is 0.9995)")
if args.egos != "rc" and args.epsilon_min <= 0.0:
print("--epsilon_min (" + str(args.epsilon_min) + ") must be greater than 0.")
sys.exit()
# MULTI EPSILON CASES
# TODO add the option to write in the multi epsi inter file Nones in order to remove interaction between systems
# CHECK if either the single option or the multi option are provided. If both break
if args.epsilon is not None and args.multi_epsi_intra is not None:
print("""Choose either a single intra epsilon for the system or the multi-epsilon inter. Cannot choose both""")
sys.exit()
if args.inter_domain_epsilon is not None and args.multi_epsi_inter_domain is not None:
print(
"Choose either a single inter domain epsilon for the system or the multi-epsilon inter domain. Cannot choose both"
)
sys.exit()
if args.inter_epsilon is not None and args.multi_epsi_inter is not None:
print("Choose either a single inter epsilon for the system or the multi-epsilon inter. Cannot choose both")
sys.exit()
# CHECK if multi_epsi_inter_domain or multi_epsi_intra are parsed but not the multi_epsi_intra break
if args.multi_epsi_intra is None and (args.multi_epsi_inter_domain is not None or args.multi_epsi_inter is not None):
print(
"""--multi_epsi_inter_domain or --multi_epsi_inter where used, but --multi_epsi_intra was not parsed.
In order to use the multi-epsilon option --multi_epsi_intra must be parsed. Please provide one or use the single epsslon case with:
--epsilon
--inter_domain_epsilon
--inter_epsilon"""
)
exit()
# if multi-epsi intra is parsed start overwrite other parameters
if args.multi_epsi_intra is not None:
setattr(args, "multi_mode", True)
args.names, args.multi_epsilon = io.read_intra_file(args.multi_epsi_intra)
# INTER-DOMAIN
# multi_epsi inter domain
if args.multi_epsi_inter_domain is not None:
args.names_inter_domain, args.multi_epsilon_inter_domain = io.read_intra_file(args.multi_epsi_inter_domain)
# multi_inter domain None but inter domain parsed --> ERROR
if args.multi_epsi_inter_domain is None and args.inter_domain_epsilon is not None:
print(
"""Inter domain option should be parsed with --multi_epsi_inter_domain if --multi_epsi_intra is used and not with --inter_domain_epsilon
Choose either multiple epsilon options:
--multi_epsi_intra PATH_TO_FILE
--multi_epsi_inter_domain PATH_TO_FILE
Or the single epsilon options:
--epsilon VALUE
--inter_domain_epsilon VALUE
"""
)
exit()
# CASE: multi intra but no multi inter domain --> set multi_inter_domain as multi_intra
if args.multi_epsi_inter_domain is None and args.inter_domain_epsilon is None and args.multi_epsi_intra is not None:
setattr(args, "names_inter_domain", args.names)
setattr(args, "multi_epsilon_inter_domain", args.multi_epsilon)
# INTER
if args.multi_epsi_inter:
args.names_inter, args.multi_epsilon_inter = io.read_inter_file(args.multi_epsi_inter)
# No multi_epsilon_inter, no inter_epsilon --> set multi_epsilon_inter as one of the multi_epsi_intra (should not be needed if it's not defined explicetily)
if args.multi_epsi_inter is None and args.inter_epsilon is None and args.multi_epsi_intra is not None:
print(
"""--multi intra mode activated, but no information for inter epsilon was set.
Please set also the inter molecular interaction using one of the following options:
-inter_epsilon VALUE
-multi_epsi_inter PATH_TO_FILE """
)
exit()
# No multi_epsilon_inter, inter_epsilon --> set multi_epsilon_inter as inter_epsilon
if args.multi_epsi_inter is None and args.inter_epsilon is not None:
setattr(args, "names_inter", np.array(args.names))
setattr(
args, "multi_epsilon_inter", np.zeros((len(args.multi_epsilon), len(args.multi_epsilon))) + args.inter_epsilon
)
# Multi-case checks:
if args.multi_epsi_inter is not None and args.multi_epsi_intra is not None:
if np.any(np.array(args.names) != np.array(args.names_inter)):
print(
f"""ERROR: the names of the molecules in the files {args.multi_epsi_intra} and {args.multi_epsi_inter} are different.
The names of the molecules must be consistent with each other and with those in the topology"""
)
exit()
# if multi_inter and no multi intra break
if args.multi_epsi_inter is not None and args.multi_epsi_intra is None:
print(
"""if multi_epsi_inter is used, also multi_epsi must be used. define also the set of epsilons via --multi_epsi_intra """
)
# if multi_inter_domain and no multi intra break
if args.multi_epsi_inter_domain is not None and args.multi_epsi_intra is None:
print(
"""--if multi_epsi_inter_domain is used, also multi_epsi must be used. define also the set of epsilons via --multi_epsi_intra """
)
else:
setattr(args, "multi_mode", False)
# CHECK all epsilons are greater than epsilon_min
if args.epsilon is not None:
if args.egos != "rc" and args.epsilon <= args.epsilon_min:
print("--epsilon (" + str(args.epsilon) + ") must be greater than --epsilon_min (" + str(args.epsilon_min) + ")")
sys.exit()
if args.egos != "rc" and args.inter_domain_epsilon <= args.epsilon_min:
print(
"--inter_domain_epsilon ("
+ str(args.inter_domain_epsilon)
+ ") must be greater than --epsilon_min ("
+ str(args.epsilon_min)
+ ")"
)
sys.exit()
if args.egos != "rc" and args.inter_epsilon <= args.epsilon_min:
print(
"--inter_epsilon ("
+ str(args.inter_epsilon)
+ ") must be greater than --epsilon_min ("
+ str(args.epsilon_min)
+ ")"
)
sys.exit()
elif args.multi_mode is not None:
if args.egos != "rc" and np.min(args.multi_epsilon) <= args.epsilon_min:
print(
f"all epsilons in {args.multi_epsi_intra} must be greater than --epsilon_min (" + str(args.epsilon_min) + ")"
)
sys.exit()
if args.egos != "rc" and np.min(args.multi_epsilon_inter_domain) <= args.epsilon_min:
print(
f"all epsilons in {args.multi_epsi_inter_domain} must be greater than --epsilon_min ("
+ str(args.epsilon_min)
+ ")"
)
sys.exit()
if args.egos != "rc" and np.min(args.multi_epsilon_inter) <= args.epsilon_min:
print(
f"all epsilons in {args.multi_epsi_inter} must be greater than --epsilon_min (" + str(args.epsilon_min) + ")"
)
sys.exit()
if args.custom_dict:
custom_dict = parse_json(args.custom_dict)
if custom_dict == None:
print("WARNING: Custom dictionary was parsed, but the dictionary is empty")
if remaining:
print("Unknown arguments provided: " + str(remaining))
parser.print_usage()
sys.exit()
return args
def init_meGO_ensembles(args):
"""
Initializes a multi-eGO ensemble based on the provided arguments.
Args:
args (argparse.Namespace): Parsed command-line arguments.
Returns:
meGO_ensemble: Initialized multi-eGO ensemble.
This function initializes a multi-eGO ensemble by utilizing the provided arguments.
It uses these arguments to create and configure the initial ensemble,
generating bonded interactions within the ensemble.
The resulting meGO_ensemble is returned for further processing.
"""
meGO_ensemble = ensemble.init_meGO_ensemble(args)
meGO_ensemble = ensemble.generate_bonded_interactions(meGO_ensemble)
return meGO_ensemble
def get_meGO_LJ(meGO_ensemble, args):
"""
Generates Lennard-Jones (LJ) parameters for the multi-eGO ensemble based on the provided ensemble and arguments.
Args:
meGO_ensemble: Initialized multi-eGO ensemble.
args (argparse.Namespace): Parsed command-line arguments.
Returns:
tuple: A tuple containing two dataframes - meGO_LJ and meGO_LJ_14.
This function generates Lennard-Jones (LJ) parameters for the multi-eGO ensemble based on the provided ensemble
and command-line arguments.
If the argument 'egos' is 'rc' (random coil), it generates basic LJ parameters for the ensemble.
If 'egos' is 'production' or any other mode, it initializes LJ datasets, trains LJ parameters,
and creates LJ pairs for 1-4 interactions.
The resulting LJ parameters for 1-4 interactions are manipulated to get epsilon values,
and a topology for exclusion pairs is created within the multi-eGO ensemble.
The function returns two dataframes - meGO_LJ (LJ parameters) and meGO_LJ_14 (LJ parameters for 1-4 interactions).
"""
pairs14, exclusion_bonds14 = ensemble.generate_14_data(meGO_ensemble)
if args.egos == "rc":
meGO_LJ = ensemble.generate_basic_LJ(meGO_ensemble, args)
meGO_LJ_14 = pairs14
meGO_LJ_14["epsilon"] = -meGO_LJ_14["c12"]
else:
train_dataset, check_dataset = ensemble.init_LJ_datasets(meGO_ensemble, pairs14, exclusion_bonds14, args)
meGO_LJ, meGO_LJ_14 = ensemble.generate_LJ(meGO_ensemble, train_dataset, check_dataset, args)
meGO_LJ_14 = ensemble.make_pairs_exclusion_topology(meGO_ensemble, meGO_LJ_14)
return meGO_LJ, meGO_LJ_14
def main():
"""
Main function that processes command-line arguments and generates a multi-eGO model.
Parses command-line arguments and generates a multi-eGO model by invoking various functions
related to ensemble generation, LJ parameter computation, and writing the output.
Command-line Arguments:
--system: Name of the system corresponding to the system input folder.
--egos: Type of EGO. 'rc' for creating a force-field for random coil simulations,
'production' for creating a force-field combining random coil simulations and training simulations.
--epsilon: Maximum interaction energy per contact.
--reference: The folder including all the reference information needed to setup multi-eGO, corresponding to the subfolder to process.
--train: A list of the simulations to be included in multi-eGO, corresponding to the subfolders to process and where the contacts are learned.
--check: Contacts from a simulation or a structure used to check whether the contacts learned are compatible with the structures provided.
--out: Suffix for the output directory name.
--inter_epsilon: Maximum interaction energy per intermolecular contacts.
--inter_domain_epsilon: Maximum interaction energy per interdomain contacts.
--p_to_learn: Amount of the simulation to learn.
--epsilon_min: The minimum meaningful epsilon value.
--no_header: Removes headers from output when set.
"""
args = meGO_parsing()
if not args.no_header:
generate_face.print_welcome()
print("- Checking for input files and folders")
io.check_files_existence(args)
print("- Initializing Multi-eGO model")
meGO_ensembles = init_meGO_ensembles(args)
print("- Generating Multi-eGO model")
meGO_LJ, meGO_LJ_14 = get_meGO_LJ(meGO_ensembles, args)
print("- Writing Multi-eGO model")
io.write_model(meGO_ensembles, meGO_LJ, meGO_LJ_14, args)
generate_face.print_goodbye()
if __name__ == "__main__":
main()