-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
268 lines (229 loc) · 9.88 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
import numpy as np
import pandas as pd
from statsmodels.stats.contingency_tables import mcnemar
def assess_impact_hypotheses(
df, alpha=0.05, exact_thresh=25, group_name="practitioners"
):
"""Analysis of the impact of the device on practitioners' performance"""
total = df.shape[0]
# Get values that are correct without and with device
answers = []
query_base = '`Is correct before device`=="{}" and `Is correct after device`=="{}"'
query_args = [
["correct", "correct"],
["correct", "incorrect"],
["incorrect", "correct"],
["incorrect", "incorrect"],
]
for args in query_args:
answers.append(df.query(query_base.format(*args)).shape[0])
# Build the matrix
data = np.array([answers[:2], answers[2:]])
df = pd.DataFrame(
data,
columns=["Answer with device (correct)", "Answer with device (incorrect)"],
index=["Answer wihout device (correct)", "Answer wihout device (incorrect)"],
)
# McNemar's Test
# If the diagonal (b + c) is equal or less than 25
# then apply the Exact McNemar test (Binomial distribution)
if np.diag(data).sum() < exact_thresh:
result = mcnemar(data, exact=True, correction=False)
else:
result = mcnemar(data, exact=False, correction=False)
p_value = result.pvalue
print("\nP-Value:", p_value)
h0 = "the device has an impact on the improvement of diagnostic capabilities"
h1 = "the has an impact on the improvement of diagnostic capabilities"
print(f"H0 <- There is no significant evidence to claim that {h0}")
print(f"H1 <- There is significant evidence to claim that {h1}")
if p_value < float(alpha):
# If the p-value is less than the significance level
# then reject the null hypothesis
action = "reject"
print(
f"\nSince the p-value = {p_value:.6f} < α ({alpha:.0%}),"
f" we {action} the null hypothesis"
f" and claim that we have 95% confidence that {h0}"
)
else:
# If the p-value is greater than the significance level
# then accept the alternative hypothesis
action = "cannot reject"
print(
f"\nSince the p-value = {p_value:.6f} > α ({alpha:.0%}),"
f" we {action} the null hypothesis"
f" as there is no significant evidence to claim that {h1}"
)
# Percentage of cases that are correct without and with device
reinforcement_rate = data[0, 0] / total
print(f"- Reinforcement of {group_name}' performance: {reinforcement_rate:.2%}")
# Percentage of cases that are incorrect without device and correct with device
correction_rate = data[1, 0] / total
print(f"- Improvement of {group_name}' performance: {correction_rate:.2%}")
# Percentage of cases that are incorrect without device and with device
double_failure_rate = data[1, 1] / total
print(f"- Unaffected performance: {double_failure_rate:.2%}")
# Percentage of cases that are correct without device and correct with device
failure_rate = data[0, 1] / total
print(f"- Negative impact on performance: {failure_rate:.2%}")
def assess_impact_on_pathology(df):
df_before = df[["Correct condition", "Is correct before device"]]
table1 = (
df_before.value_counts()
.to_frame("Count_before_device")
.reset_index()
.assign(
Total_count_before_device=lambda row: row.groupby("Correct condition")[
"Count_before_device"
].transform("sum"),
Rate_before_device=lambda row: (
(row["Count_before_device"] / row["Total_count_before_device"]) * 100
).round(2),
)
)
table1.sort_values(
by=["Correct condition", "Is correct before device"],
ascending=False,
inplace=True,
)
table1.reset_index(drop=True, inplace=True)
# print(table1)
df_after = df[["Correct condition", "Is correct after device"]]
table2 = (
df_after.value_counts()
.to_frame("Count_after_device")
.reset_index()
.assign(
Total_count_after_device=lambda row: row.groupby("Correct condition")[
"Count_after_device"
].transform("sum"),
Rate_after_device=lambda row: (
(row["Count_after_device"] / row["Total_count_after_device"]) * 100
).round(2),
)
.sort_values(
by=["Correct condition", "Is correct after device"], ascending=False
)
.reset_index(drop=True)
)
# print(table2)
# Combining before and after
result = pd.concat([table1, table2.iloc[:, 1:]], axis=1)
print(result)
return result
def compute_performance_change(df):
"""Relative performance change before and after using the device"""
stage_performance = {}
for stage in ["before", "after"]:
col_name = f"Is correct {stage} device"
new_col_name = f"Record count {stage} device"
rate_table = (
df[col_name]
.value_counts()
.to_frame(new_col_name)
.reset_index()
.assign(Rate=lambda x: x[new_col_name] / x[new_col_name].sum() * 100)
.round(2)
)
print(rate_table)
stage_performance[stage] = rate_table.loc[
rate_table[col_name] == "correct", "Rate"
].values[0]
# Relative change
performance_change = (
abs(stage_performance["before"] - stage_performance["after"])
/ stage_performance["before"]
)
return performance_change
if __name__ == "__main__":
# Loading the data
df_result = pd.read_csv("data/BI_results_data.csv")
# Number of images
n_images = df_result["Image"].nunique()
n_cases = df_result.shape[0]
print(f"Number of unique images in the dataset: {n_images}\n")
print(f"Total cases analyzed in the dataset: {n_cases}\n")
# Number of practitioners per specialty
print("Number of practitioners per specialty:\n")
print(
df_result[["Fullname", "Specialty"]]
.drop_duplicates()["Specialty"]
.value_counts()
.to_frame("Nº of practioners")
.reset_index()
)
# Number of images analyzed per practitioner
print("\n\nNumber of images analyzed per practitioner\n")
print(
df_result.groupby(["Fullname"])["Image"]
.nunique()
.to_frame("Nº of images analyzed per pracitioner")
.reset_index()
.sort_values(by="Nº of images analyzed per pracitioner", ascending=True)
.reset_index(drop=True)
)
# Number of images analyzed per specialty
# Both images are analyzed by both specialties
print("\nNumber of images analyzed per specialty:\n")
print(
df_result[["Specialty", "Image"]]
.drop_duplicates()["Specialty"]
.value_counts()
.to_frame("Nº images analyzed per specialty")
.reset_index()
)
# Get overall % of accuracy
print("\nComputing the overall impact on accuracy:\n")
overall_change = compute_performance_change(df_result)
print(f"Overall accuracy change: {overall_change:.2%}\n")
######################################################################
# Analysis of the impact of the device on practitioners' performance #
######################################################################
print("Assessing the experiment hypotheses for HCPs:")
assess_impact_hypotheses(df_result)
# Analysis per pathology
# for pathology in df_result["Correct condition"].unique():
# # Get total rows (values)
# df_result_pathology = df_result.query("`Correct condition`==@pathology")
# assess_impact_hypotheses(df_result_pathology)
# Analysis per specialty
specialty_dict = {"Medicina general": "PCP", "Dermatología": "Dermatologist"}
specialty_list = df_result["Specialty"].unique()
for specialty in specialty_list:
print(f"\nAssessing performance of: {specialty_dict[specialty]}\n")
df_result_specialty = df_result.query("Specialty==@specialty")
# Get % of accuracy by specialty
print(f"\nComputing the impact on {specialty_dict[specialty]} accuracy:\n")
specialty_change = compute_performance_change(df_result_specialty)
print(f"{specialty_dict[specialty]} accuracy change: {specialty_change:.2%}")
# P-values
assess_impact_hypotheses(
df_result_specialty, group_name=specialty_dict[specialty]
)
# Uncomment this part if you want to get the p-values per pathology
# for pathology in df_result_specialty["Correct condition"].unique():
# # Get total rows (values)
# df_result_pathology = df_result_specialty.query(
# "`Correct condition`==@pathology"
# )
# assess_impact_hypotheses(df_result_pathology)
#########################
# Exporting the results #
#########################
export_cols = ["Correct condition", "Rate_before_device", "Rate_after_device"]
# All pathologies and specialties
print("\nHCP accuracy per pathology\n")
hcp_table = assess_impact_on_pathology(df_result)
hcp_table = hcp_table.query("`Is correct after device`=='correct'")
hcp_table.sort_values(by="Correct condition", inplace=True)
hcp_table[export_cols].to_csv("results/HCP_performance.csv", index=False)
# Per specialty
for specialty in specialty_list:
print(f"\n{specialty_dict[specialty]} accuracy per pathology\n")
df_result_specialty = df_result.query("Specialty==@specialty")
specialty_table = assess_impact_on_pathology(df_result_specialty)
specialty_table = specialty_table.query("`Is correct after device`=='correct'")
specialty_table.sort_values(by="Correct condition", inplace=True)
table_name = f"{specialty_dict[specialty]}_performance.csv"
specialty_table[export_cols].to_csv(f"results/{table_name}", index=False)