Skip to content

Commit ce668aa

Browse files
author
Samantha Piatt
committed
Initial source code files.
1 parent 4b1301e commit ce668aa

10 files changed

+1127
-1
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
# MMS_Magnetopause_MixtureModel
1+
# Source files for 'Large‐Scale Statistical Survey of Magnetopause Reconnection'
+208
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
---
2+
title: "Bayesian Mixture Model Testing"
3+
subtitle: "Using FGM.Bz, DIS.N, DIS.T & Clock Angle"
4+
output: pdf_document
5+
---
6+
```{r page_options, include = FALSE}
7+
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message=FALSE,
8+
fig.asp = 0.88, fig.width = 3, fig.keep='all',
9+
fig.align = "center", error = FALSE)
10+
library(ggplot2)
11+
library(rstan)
12+
13+
#STAN settings
14+
options(mc.cores = parallel::detectCores())
15+
rstan_options(auto_write = TRUE)
16+
chains = 1
17+
iters = 100
18+
stan.control = list(max_treedepth = 20)
19+
20+
source("../source_files/CommonPlots.R")
21+
source("../source_files/GetDataFunctions.R")
22+
source("../source_files/EvaluationFunctions.R")
23+
```
24+
```{r data_and_load_functions_multiple_orbits, results='hide'}
25+
sample_orbits <- sample(c(0:37), 6)
26+
print(sample_orbits)
27+
data.slice <- load_orbit("http://data.rmdp.xyz/mms/data/larger/merged/merged_201701-03.csv", "merged.Rds", sample_orbits[1])
28+
for(i in 2:length(sample_orbits)){
29+
data.slice <- rbind(data.slice, load_orbit("http://data.rmdp.xyz/mms/data/larger/merged/merged_201701-03.csv", "merged.Rds", sample_orbits[i]))
30+
}
31+
```
32+
```{r compile_stan_model}
33+
fit.compiled <- stan_model("../stan/mp_testing.stan")
34+
```
35+
```{r generate_data_to_sample}
36+
# Prior distribution knowledge
37+
# mu: MSH, MSP, std: MSH, MSP, theta
38+
Bt.mix = c(20, 60, 25, 10, 0.4852118)
39+
T.mix = c(5.9420885, 8.0473952, 0.6658014, 0.3218401, 0.8245107)
40+
N.mix = c(3.1198874, -0.2113956, 0.6748259, 0.5035918, 0.7746667)
41+
clock.mix = c(0, 0.05, -pi, pi)
42+
43+
# Data from running merge_Mixtures.stan on test data: orbit 7.
44+
mixture_alpha = 4.997109
45+
mixture_sigma = 4.319474
46+
Bt_mix_sigma = 0.03317478
47+
N_mix_sigma = 0.01227035
48+
T_mix_sigma = 0.01244464
49+
Clock_mix_sigma = 0.02266183
50+
Bt_beta = -279.0574
51+
N_beta = 29.35491
52+
T_beta = 242.1864
53+
Clock_beta = 0.7626921
54+
Clock_sigma = 0.4208149
55+
56+
# Process Data
57+
data.slice$Priority <- ifelse(data.slice$Priority < 100, 0, data.slice$Priority)
58+
data.slice$Priority[is.na(data.slice$Priority)] <- 0
59+
60+
# Assign data to list format for consumption
61+
sample.data <- list(numsteps=nrow(data.slice),
62+
Bt_mix=Bt.mix, N_mix=N.mix, T_mix=T.mix, Clock_mix=clock.mix,
63+
Bt=data.slice$FGM.Bt, By=data.slice$FGM.By, Bz=data.slice$FGM.Bz,
64+
T_perp=data.slice$DIS.T_perp, T_para=data.slice$DIS.T_para,
65+
N=data.slice$DIS.N, Priority=data.slice$Priority,
66+
mixture_alpha=mixture_alpha, mixture_sigma=mixture_sigma,
67+
Bt_mix_sigma=Bt_mix_sigma, N_mix_sigma=N_mix_sigma,
68+
T_mix_sigma=T_mix_sigma, Clock_mix_sigma=Clock_mix_sigma,
69+
Bt_beta=Bt_beta, N_beta=N_beta, T_beta=T_beta,
70+
Clock_beta=Clock_beta, Clock_sigma=Clock_sigma)
71+
```
72+
```{r sample_from_model}
73+
fit.samples <- sampling(fit.compiled, sample.data, iter = iters,
74+
chains=chains, control = stan.control)
75+
```
76+
```{r compile_plot_data}
77+
# Function to process samples for use in plots.
78+
plot_data <- function(data.slice, fit.data){
79+
# --- Populate data from sampled parameters
80+
get_data <- function(name, fun){
81+
cols = colnames(as.matrix(fit.data))
82+
if(paste(name, "[1]", sep="") %in% cols){
83+
as.vector(apply(as.array(fit.data, par=c(name)), 3, fun))
84+
} else NULL
85+
}
86+
87+
# Generate a smoothed region of TRUE/FALSE data
88+
sum_window <- function(data, window, cutval){
89+
t <- ifelse(data > cutval, TRUE, FALSE)
90+
w <- round(window /2)
91+
l <- length(data)
92+
r <- rep(0, l)
93+
for(i in 1:l){
94+
range <- max(0, i-w):min(l, i+w)
95+
r[i] <- ifelse((sum(t[range]) / length(range)) > 0.5, TRUE, FALSE)
96+
}
97+
r
98+
}
99+
100+
# Build new plot data
101+
r.data <- data.frame(Time = data.slice$Time, FGM.Bt = data.slice$FGM.Bt, Priority = data.slice$Priority, Selected = data.slice$Selected, Orbit=data.slice$Orbit)
102+
r.data$Bt_Mixture <- get_data("Bt_Mixture", "sd")
103+
r.data$Bt_Mixture <- get_data("Bt_Mixture", "mean")
104+
r.data$DIS.N <- log(data.slice$DIS.N)
105+
r.data$N_Mixture <- get_data("N_Mixture", "sd")
106+
r.data$N_Mixture <- get_data("N_Mixture", "mean")
107+
r.data$DIS.T <- log((data.slice$DIS.T_para + 2 * data.slice$DIS.T_perp) / 3)
108+
r.data$T_Mixture <- get_data("T_Mixture", "sd")
109+
r.data$T_Mixture <- get_data("T_Mixture", "mean")
110+
r.data$Clock.Angle <- atan2(data.slice$FGM.By, data.slice$FGM.Bz);
111+
r.data$Clock_Mixture <- get_data("Clock_Mixture", "sd")
112+
r.data$Clock_Mixture <- get_data("Clock_Mixture", "mean")
113+
r.data$Pred_Priority <- get_data("Priority", "sd")
114+
r.data$Pred_Priority <- get_data("Priority", "mean")
115+
116+
return(r.data)
117+
}
118+
119+
# Generate data for plots
120+
fit.data <- plot_data(data.slice, fit.samples)
121+
```
122+
```{r custom_plot_function}
123+
# Customize types plot
124+
mms_types_plot_pos <- function(data){
125+
subsets <- list(c("FGM.Bt"), c("DIS.N", "DIS.T"), c("Clock.Angle"),
126+
c("Bt_Position", "N_Position", "T_Position", "C_Position"),
127+
c("Avg_Pos"),
128+
c("Position"),
129+
c("Bt_Mixture", "N_Mixture", "T_Mixture", "Clock_Mixture"),
130+
c("Avg_Mix"),
131+
c("Mixture"),
132+
c("Priority", "Pred_Priority")
133+
)
134+
titles <- c("FGM.Bt", "DIS", "Clock Angle",
135+
"Positions", "Average Position", "Position",
136+
"Mixtures", "Average Mix", "Mixture", "Priority"
137+
)
138+
plotTitle = "Features Grouped by Type over Time with MP points Highlighted"
139+
140+
return(types_plot(data, subsets, titles, plotTitle))
141+
}
142+
```
143+
```{r plot_samples}
144+
fit.data$Highlight.Actual <- ifelse(fit.data$Priority >= 100, TRUE, FALSE)
145+
fit.data$Highlight.Predicted <- ifelse(fit.data$Pred_Priority >= 60, TRUE, FALSE)
146+
mms_types_plot_pos(fit.data[fit.data$Orbit==33,])
147+
```
148+
```{r}
149+
evaluate <- function(data){
150+
prediction <- data$predicted
151+
actual <- data$actual
152+
total = length(actual)
153+
actual_sum <- sum(actual)
154+
pred_sum <- sum(prediction)
155+
TP <- sum(ifelse(prediction & actual, 1, 0))
156+
TN <- sum(ifelse(!prediction & !actual, 1, 0))
157+
FN <- sum(ifelse(!prediction & actual, 1, 0))
158+
FP <- sum(ifelse(prediction & !actual, 1, 0))
159+
160+
c("TP" = TP, "TN" = TN, "FP" = FP, "FN" = FN,
161+
"actuals" = actual_sum, "predicted" = pred_sum,
162+
"accuracy" = (TP+TN)/total, "missclassification" = (FP+FN)/total,
163+
"precision" = TP/(TP+FP), "prevalence" = TP/total,
164+
"true.positive.rate" = TP/(TP+FN), "false.positive.rate" = FP/(FP+TN),
165+
"true.negative.rate" = TN/(FP+TN))
166+
}
167+
eval.method(fit.data$Highlight.Actual, fit.data$Highlight.Predicted, 0.5)
168+
evaluate_data_asFrame(fit.data, fit.data$Highlight.Predicted)
169+
evaluate(data.frame(actual = fit.data$Highlight.Actual, predicted = fit.data$Highlight.Predicted))
170+
```
171+
```{r}
172+
eval.method2 <- function(target, prediction, threshold){
173+
t.seg = segments(target)
174+
p.seg = segments(prediction)
175+
176+
t.count.t = 0
177+
t.count.f = 0
178+
p.count.t = 0
179+
p.count.f = 0
180+
181+
# target overlap count
182+
for(s in 1:dim(t.seg)[1]){
183+
# selected = TRUE overlap
184+
t.sum.t = sum(target[t.seg[s,1]:t.seg[s,2]], na.rm = TRUE)
185+
p.sum.t = sum(prediction[t.seg[s,1]:t.seg[s,2]], na.rm = TRUE)
186+
# selected - FALSE overlap
187+
t.sum.f = length(target) - t.sum.t
188+
p.sum.f = length(prediction) - p.sum.t
189+
if((p.sum.t / t.sum.t) > threshold) t.count.t = t.count.t + 1
190+
else if((p.sum.f / t.sum.f) > threshold) t.count.f = t.count.f + 1
191+
}
192+
193+
# prediction overlap
194+
for(s in 1:dim(p.seg)[1]){
195+
# selected = TRUE overlap
196+
t.sum.t = sum(target[p.seg[s,1]:p.seg[s,2]], na.rm = TRUE)
197+
p.sum.t = sum(prediction[p.seg[s,1]:p.seg[s,2]], na.rm = TRUE)
198+
# selected - FALSE overlap
199+
t.sum.f = length(target) - t.sum.t
200+
p.sum.f = length(prediction) - p.sum.t
201+
if((t.sum.t / p.sum.t) > threshold) p.count.t = p.count.t + 1
202+
else if((t.sum.f / p.sum.f) > threshold) p.count.f = p.count.f + 1
203+
}
204+
205+
c(Target_Ratio_Selected = t.count.t / dim(t.seg)[1], Target_Ratio_Unselected = t.count.f / dim(t.seg)[1], Prediction_Ratio_Selected = p.count.t / dim(p.seg)[1], Prediction_Ratio_Unselected = p.count.f / dim(p.seg)[1])
206+
}
207+
eval.method2(fit.data$Highlight.Actual, fit.data$Highlight.Predicted, 0.5)
208+
```
+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
---
2+
title: "Bayesian Mixture Model Training"
3+
subtitle: "Using FGM.Bz, DIS.N, DIS.T & Clock Angle"
4+
output: pdf_document
5+
---
6+
```{r page_options, include = FALSE}
7+
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message=FALSE,
8+
fig.asp = 0.88, fig.width = 3, fig.keep='all',
9+
fig.align = "center", error = FALSE)
10+
library(ggplot2)
11+
library(rstan)
12+
13+
#STAN settings
14+
options(mc.cores = parallel::detectCores())
15+
rstan_options(auto_write = TRUE)
16+
chains = 1
17+
iters = 100
18+
stan.control = list(max_treedepth = 20)
19+
20+
source("../source_files/CommonPlots.R")
21+
source("../source_files/GetDataFunctions.R")
22+
```
23+
```{r loadData_multipleOrbits_random, results='hide'}
24+
sample_orbits <- sample(c(0:37), 6)
25+
print(sample_orbits)
26+
data.slice <- load_orbit("http://data.rmdp.xyz/mms/data/larger/merged/merged_201701-03.csv", "merged.Rds", sample_orbits[1])
27+
for(i in 2:length(sample_orbits)){
28+
data.slice <- rbind(data.slice, load_orbit("http://data.rmdp.xyz/mms/data/larger/merged/merged_201701-03.csv", "merged.Rds", sample_orbits[i]))
29+
}
30+
```
31+
```{r compile_stan_model}
32+
fit.compiled <- stan_model("../stan/mp_training.stan")
33+
```
34+
```{r generate_data_to_sample}
35+
# Prior distribution knowledge
36+
# mu: MSH, MSP, std: MSH, MSP, theta
37+
Bt.mix = c(20, 60, 25, 10, 0.4852118)
38+
T.mix = c(5.9420885, 8.0473952, 0.6658014, 0.3218401, 0.8245107)
39+
N.mix = c(3.1198874, -0.2113956, 0.6748259, 0.5035918, 0.7746667)
40+
clock.mix = c(0, 0.05, -pi, pi)
41+
42+
# Process Data
43+
data.slice$Priority <- ifelse(data.slice$Priority < 100, 0, data.slice$Priority)
44+
data.slice$Priority[is.na(data.slice$Priority)] <- 0
45+
46+
# Assign data to list format for consumption
47+
sample.data <- list(numsteps=nrow(data.slice),
48+
Bt_mix=Bt.mix, N_mix=N.mix, T_mix=T.mix, Clock_mix=clock.mix,
49+
Bt=data.slice$FGM.Bt, By=data.slice$FGM.By, Bz=data.slice$FGM.Bz,
50+
T_perp=data.slice$DIS.T_perp, T_para=data.slice$DIS.T_para,
51+
N=data.slice$DIS.N, Priority=data.slice$Priority)
52+
```
53+
```{r sample_from_model}
54+
fit.samples <- sampling(fit.compiled, sample.data, iter = iters,
55+
chains=chains, control = stan.control)
56+
```
57+
```{r compile_plot_data}
58+
# Function to process samples for use in plots.
59+
plot_data <- function(data.slice, fit.samples){
60+
# --- Populate data from sampled parameters
61+
get_data <- function(name, fun){
62+
cols = colnames(as.matrix(fit.samples))
63+
if(paste(name, "[1]", sep="") %in% cols){
64+
as.vector(apply(as.array(fit.samples, par=c(name)), 3, fun))
65+
} else NULL
66+
}
67+
68+
# Generate a smoothed region of TRUE/FALSE data
69+
sum_window <- function(data, window, cutval, threshold){
70+
t <- ifelse(data > cutval, TRUE, FALSE)
71+
w <- round(window /2)
72+
l <- length(data)
73+
r <- rep(0, l)
74+
for(i in 1:l){
75+
range <- max(0, i-w):min(l, i+w)
76+
r[i] <- ifelse((sum(t[range]) / length(range)) > threshold, TRUE, FALSE)
77+
}
78+
r
79+
}
80+
81+
# Build new plot data
82+
r.data <- data.frame(Time = data.slice$Time, FGM.Bt = data.slice$FGM.Bt, Priority = data.slice$Priority)
83+
r.data$Bt_Mixture <- get_data("Bt_Mixture", "sd")
84+
r.data$Bt_Mixture <- get_data("Bt_Mixture", "mean")
85+
r.data$DIS.N <- log(data.slice$DIS.N)
86+
r.data$N_Mixture <- get_data("N_Mixture", "sd")
87+
r.data$N_Mixture <- get_data("N_Mixture", "mean")
88+
r.data$DIS.T <- log((data.slice$DIS.T_para + 2 * data.slice$DIS.T_perp) / 3)
89+
r.data$T_Mixture <- get_data("T_Mixture", "sd")
90+
r.data$T_Mixture <- get_data("T_Mixture", "mean")
91+
r.data$Clock.Angle <- atan2(data.slice$FGM.By, data.slice$FGM.Bz);
92+
r.data$Clock_Mixture <- get_data("Clock_Mixture", "sd")
93+
r.data$Clock_Mixture <- get_data("Clock_Mixture", "mean")
94+
r.data$Highlight.Actual <- ifelse(grepl("MP", data.slice$Comments), TRUE, FALSE)
95+
96+
return(r.data)
97+
}
98+
99+
# Generate data for plots
100+
fit.data <- plot_data(data.slice, fit.samples)
101+
```
102+
```{r custom_plot_function}
103+
# Customize types plot
104+
mms_types_plot_pos <- function(data){
105+
subsets <- list(c("FGM.Bt"), c("DIS.N", "DIS.T"), c("Clock.Angle"),
106+
c("Bt_Position", "N_Position", "T_Position", "C_Position"),
107+
c("Avg_Pos"),
108+
c("Position"),
109+
c("Bt_Mixture", "N_Mixture", "T_Mixture", "Clock_Mixture"),
110+
c("Avg_Mix"),
111+
c("Mixture")
112+
)
113+
titles <- c("FGM.Bt", "DIS", "Clock Angle",
114+
"Positions", "Average Position", "Position",
115+
"Mixtures", "Average Mix", "Mixture"
116+
)
117+
plotTitle = "Features Grouped by Type over Time with MP points Highlighted"
118+
119+
return(types_plot(data, subsets, titles, plotTitle))
120+
}
121+
```
122+
```{r plot_samples}
123+
mms_types_plot_pos(fit.data)
124+
```
125+
```{r}
126+
mixture_alpha = as.vector(apply(as.array(fit.samples, par=c("mixture_alpha")), 3, mean))
127+
mixture_sigma = as.vector(apply(as.array(fit.samples, par=c("mixture_sigma")), 3, mean))
128+
Bt_mix_sigma = as.vector(apply(as.array(fit.samples, par=c("Bt_mix_sigma")), 3, mean))
129+
N_mix_sigma = as.vector(apply(as.array(fit.samples, par=c("N_mix_sigma")), 3, mean))
130+
T_mix_sigma = as.vector(apply(as.array(fit.samples, par=c("T_mix_sigma")), 3, mean))
131+
Clock_mix_sigma = as.vector(apply(as.array(fit.samples, par=c("Clock_mix_sigma")), 3, mean))
132+
Bt_beta = as.vector(apply(as.array(fit.samples, par=c("Bt_beta")), 3, mean))
133+
N_beta = as.vector(apply(as.array(fit.samples, par=c("N_beta")), 3, mean))
134+
T_beta = as.vector(apply(as.array(fit.samples, par=c("T_beta")), 3, mean))
135+
Clock_beta = as.vector(apply(as.array(fit.samples, par=c("Clock_beta")), 3, mean))
136+
Clock_sigma = as.vector(apply(as.array(fit.samples, par=c("Clock_sigma")), 3, mean))
137+
mixture_alpha
138+
mixture_sigma
139+
Bt_mix_sigma
140+
N_mix_sigma
141+
T_mix_sigma
142+
Clock_mix_sigma
143+
Bt_beta
144+
N_beta
145+
T_beta
146+
Clock_beta
147+
Clock_sigma
148+
```

0 commit comments

Comments
 (0)