-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpostprocess.py
103 lines (89 loc) · 4.06 KB
/
postprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import sys
import numpy as np
import csv
def _read_results(result_path):
targets, preds, poly_ids = [], [], None
with open(result_path) as f:
rows = [row for row in csv.reader(f)]
targets = np.array([float(row[1]) for row in rows])
preds = np.array([float(row[2]) for row in rows])
poly_ids = [row[0] for row in rows]
# Remove duplicates
unique_poly_ids = list(set(poly_ids))
unique_poly_id_idx = []
for poly_id in unique_poly_ids:
for idx in range(len(poly_ids)):
if poly_id == poly_ids[idx]:
unique_poly_id_idx.append(idx)
break
assert len(unique_poly_id_idx) == len(unique_poly_ids)
poly_ids = [poly_ids[idx] for idx in unique_poly_id_idx]
targets = np.array([targets[idx] for idx in unique_poly_id_idx])
preds = np.stack([preds[idx] for idx in unique_poly_id_idx])
assert len(poly_ids) == targets.shape[0] == preds.shape[0]
return poly_ids, targets, preds
def print_logp_errors(result_dir):
noises = '0.00 0.01 0.02 0.04 0.08 0.16 0.32 0.64 1.28 2.56 5.12'.split()
targets, preds = [], []
cur_poly_id = None
for noise in noises:
poly_id, target, pred = _read_results(os.path.join(
result_dir, noise, 'test_results.csv'))
targets.append(target)
preds.append(pred)
if cur_poly_id is None:
cur_poly_id = poly_id
assert cur_poly_id == poly_id
for noise, target, pred in zip(noises, targets, preds):
true_mae = np.mean(np.abs(pred - targets[0]))
apparent_mae = np.mean(np.abs(pred - target))
print('For noise = {} split 0, true MAE is {:0.3f}, apparent MAE is {:0.3f}'.format(
noise, true_mae, apparent_mae))
def _get_cond(cond_csv):
with open(cond_csv) as f:
poly_id_to_cond = {row[0]: np.log10(float(row[2]))
for row in csv.reader(f)}
return poly_id_to_cond
def print_poly_rand_errors(result_dir):
poly_id, target, pred = _read_results(
os.path.join(result_dir, 'test_results.csv'))
poly_id_to_cond = {pid: t for pid, t in zip(poly_id, target)}
poly_id_to_pred = {pid: p for pid, p in zip(poly_id, pred)}
poly_id_to_cond2 = _get_cond('data/conductivity/cond_5ns_new_config.csv')
poly_id = [pid for pid in poly_id if pid in poly_id_to_cond2]
target = np.array([poly_id_to_cond[pid] for pid in poly_id])
pred = np.array([poly_id_to_pred[pid] for pid in poly_id])
target2 = np.array([poly_id_to_cond2[pid] for pid in poly_id])
apparent_mae = np.mean(np.abs(pred - target))
simulation_mae = np.mean(np.abs(target - target2))
print('For 5 ns polymer conductivity, apparent MAE for split 0 is {:0.3f}'.format(apparent_mae))
print('MAE between two indepdent MD simulation is {:0.3f}'.format(simulation_mae))
def print_poly_systematic_errors(result_dir):
props = ['conductivity', 'li_diff', 'tfsi_diff', 'poly_diff']
if 'systematic_int' in result_dir:
mae_5ns_list = [0.528, 0.503, 0.455, 0.612]
mae_5ns_linear_list = [0.152, 0.148, 0.096, 0.072]
elif 'systematic_ext' in result_dir:
mae_5ns_list = [0.278, 0.419, 0.249, 0.528]
mae_5ns_linear_list = [0.275, 0.247, 0.297, 0.110]
else:
raise NotImplementedError
for prop, mae_5ns, mae_5ns_linear in zip(props, mae_5ns_list, mae_5ns_linear_list):
poly_id, target, pred = _read_results(
os.path.join(result_dir, prop, 'exp_test_results.csv'))
mae = np.mean(np.abs(target - pred))
print('For 50 ns polymer {}, corrected MAE for split 0 is {:0.3f}, MAE to 5ns MD is {:0.3f}, linear corrected MAE is {:0.3f}'.format(prop, mae, mae_5ns, mae_5ns_linear))
if __name__ == '__main__':
if len(sys.argv) != 3:
print('[Usage] task result_dir')
sys.exit(1)
task, result_dir = sys.argv[1:]
if task == 'logp':
print_logp_errors(result_dir)
elif task == 'poly_rand':
print_poly_rand_errors(result_dir)
elif task == 'poly_sys':
print_poly_systematic_errors(result_dir)
else:
raise NotImplementedError