|
| 1 | +--- |
| 2 | +title: "Bayesian Mixture Model Testing" |
| 3 | +subtitle: "Using FGM.Bz, DIS.N, DIS.T & Clock Angle" |
| 4 | +output: pdf_document |
| 5 | +--- |
| 6 | +```{r page_options, include = FALSE} |
| 7 | +knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message=FALSE, |
| 8 | + fig.asp = 0.88, fig.width = 3, fig.keep='all', |
| 9 | + fig.align = "center", error = FALSE) |
| 10 | +library(ggplot2) |
| 11 | +library(rstan) |
| 12 | +
|
| 13 | +#STAN settings |
| 14 | +options(mc.cores = parallel::detectCores()) |
| 15 | +rstan_options(auto_write = TRUE) |
| 16 | +chains = 1 |
| 17 | +iters = 100 |
| 18 | +stan.control = list(max_treedepth = 20) |
| 19 | +
|
| 20 | +source("../source_files/CommonPlots.R") |
| 21 | +source("../source_files/GetDataFunctions.R") |
| 22 | +source("../source_files/EvaluationFunctions.R") |
| 23 | +``` |
| 24 | +```{r data_and_load_functions_multiple_orbits, results='hide'} |
| 25 | +sample_orbits <- sample(c(0:37), 6) |
| 26 | +print(sample_orbits) |
| 27 | +data.slice <- load_orbit("http://data.rmdp.xyz/mms/data/larger/merged/merged_201701-03.csv", "merged.Rds", sample_orbits[1]) |
| 28 | +for(i in 2:length(sample_orbits)){ |
| 29 | + data.slice <- rbind(data.slice, load_orbit("http://data.rmdp.xyz/mms/data/larger/merged/merged_201701-03.csv", "merged.Rds", sample_orbits[i])) |
| 30 | +} |
| 31 | +``` |
| 32 | +```{r compile_stan_model} |
| 33 | +fit.compiled <- stan_model("../stan/mp_testing.stan") |
| 34 | +``` |
| 35 | +```{r generate_data_to_sample} |
| 36 | +# Prior distribution knowledge |
| 37 | +# mu: MSH, MSP, std: MSH, MSP, theta |
| 38 | +Bt.mix = c(20, 60, 25, 10, 0.4852118) |
| 39 | +T.mix = c(5.9420885, 8.0473952, 0.6658014, 0.3218401, 0.8245107) |
| 40 | +N.mix = c(3.1198874, -0.2113956, 0.6748259, 0.5035918, 0.7746667) |
| 41 | +clock.mix = c(0, 0.05, -pi, pi) |
| 42 | +
|
| 43 | +# Data from running merge_Mixtures.stan on test data: orbit 7. |
| 44 | +mixture_alpha = 4.997109 |
| 45 | +mixture_sigma = 4.319474 |
| 46 | +Bt_mix_sigma = 0.03317478 |
| 47 | +N_mix_sigma = 0.01227035 |
| 48 | +T_mix_sigma = 0.01244464 |
| 49 | +Clock_mix_sigma = 0.02266183 |
| 50 | +Bt_beta = -279.0574 |
| 51 | +N_beta = 29.35491 |
| 52 | +T_beta = 242.1864 |
| 53 | +Clock_beta = 0.7626921 |
| 54 | +Clock_sigma = 0.4208149 |
| 55 | +
|
| 56 | +# Process Data |
| 57 | +data.slice$Priority <- ifelse(data.slice$Priority < 100, 0, data.slice$Priority) |
| 58 | +data.slice$Priority[is.na(data.slice$Priority)] <- 0 |
| 59 | +
|
| 60 | +# Assign data to list format for consumption |
| 61 | +sample.data <- list(numsteps=nrow(data.slice), |
| 62 | + Bt_mix=Bt.mix, N_mix=N.mix, T_mix=T.mix, Clock_mix=clock.mix, |
| 63 | + Bt=data.slice$FGM.Bt, By=data.slice$FGM.By, Bz=data.slice$FGM.Bz, |
| 64 | + T_perp=data.slice$DIS.T_perp, T_para=data.slice$DIS.T_para, |
| 65 | + N=data.slice$DIS.N, Priority=data.slice$Priority, |
| 66 | + mixture_alpha=mixture_alpha, mixture_sigma=mixture_sigma, |
| 67 | + Bt_mix_sigma=Bt_mix_sigma, N_mix_sigma=N_mix_sigma, |
| 68 | + T_mix_sigma=T_mix_sigma, Clock_mix_sigma=Clock_mix_sigma, |
| 69 | + Bt_beta=Bt_beta, N_beta=N_beta, T_beta=T_beta, |
| 70 | + Clock_beta=Clock_beta, Clock_sigma=Clock_sigma) |
| 71 | +``` |
| 72 | +```{r sample_from_model} |
| 73 | +fit.samples <- sampling(fit.compiled, sample.data, iter = iters, |
| 74 | + chains=chains, control = stan.control) |
| 75 | +``` |
| 76 | +```{r compile_plot_data} |
| 77 | +# Function to process samples for use in plots. |
| 78 | +plot_data <- function(data.slice, fit.data){ |
| 79 | + # --- Populate data from sampled parameters |
| 80 | + get_data <- function(name, fun){ |
| 81 | + cols = colnames(as.matrix(fit.data)) |
| 82 | + if(paste(name, "[1]", sep="") %in% cols){ |
| 83 | + as.vector(apply(as.array(fit.data, par=c(name)), 3, fun)) |
| 84 | + } else NULL |
| 85 | + } |
| 86 | + |
| 87 | + # Generate a smoothed region of TRUE/FALSE data |
| 88 | + sum_window <- function(data, window, cutval){ |
| 89 | + t <- ifelse(data > cutval, TRUE, FALSE) |
| 90 | + w <- round(window /2) |
| 91 | + l <- length(data) |
| 92 | + r <- rep(0, l) |
| 93 | + for(i in 1:l){ |
| 94 | + range <- max(0, i-w):min(l, i+w) |
| 95 | + r[i] <- ifelse((sum(t[range]) / length(range)) > 0.5, TRUE, FALSE) |
| 96 | + } |
| 97 | + r |
| 98 | + } |
| 99 | + |
| 100 | + # Build new plot data |
| 101 | + r.data <- data.frame(Time = data.slice$Time, FGM.Bt = data.slice$FGM.Bt, Priority = data.slice$Priority, Selected = data.slice$Selected, Orbit=data.slice$Orbit) |
| 102 | + r.data$Bt_Mixture <- get_data("Bt_Mixture", "sd") |
| 103 | + r.data$Bt_Mixture <- get_data("Bt_Mixture", "mean") |
| 104 | + r.data$DIS.N <- log(data.slice$DIS.N) |
| 105 | + r.data$N_Mixture <- get_data("N_Mixture", "sd") |
| 106 | + r.data$N_Mixture <- get_data("N_Mixture", "mean") |
| 107 | + r.data$DIS.T <- log((data.slice$DIS.T_para + 2 * data.slice$DIS.T_perp) / 3) |
| 108 | + r.data$T_Mixture <- get_data("T_Mixture", "sd") |
| 109 | + r.data$T_Mixture <- get_data("T_Mixture", "mean") |
| 110 | + r.data$Clock.Angle <- atan2(data.slice$FGM.By, data.slice$FGM.Bz); |
| 111 | + r.data$Clock_Mixture <- get_data("Clock_Mixture", "sd") |
| 112 | + r.data$Clock_Mixture <- get_data("Clock_Mixture", "mean") |
| 113 | + r.data$Pred_Priority <- get_data("Priority", "sd") |
| 114 | + r.data$Pred_Priority <- get_data("Priority", "mean") |
| 115 | + |
| 116 | + return(r.data) |
| 117 | +} |
| 118 | +
|
| 119 | +# Generate data for plots |
| 120 | +fit.data <- plot_data(data.slice, fit.samples) |
| 121 | +``` |
| 122 | +```{r custom_plot_function} |
| 123 | +# Customize types plot |
| 124 | +mms_types_plot_pos <- function(data){ |
| 125 | + subsets <- list(c("FGM.Bt"), c("DIS.N", "DIS.T"), c("Clock.Angle"), |
| 126 | + c("Bt_Position", "N_Position", "T_Position", "C_Position"), |
| 127 | + c("Avg_Pos"), |
| 128 | + c("Position"), |
| 129 | + c("Bt_Mixture", "N_Mixture", "T_Mixture", "Clock_Mixture"), |
| 130 | + c("Avg_Mix"), |
| 131 | + c("Mixture"), |
| 132 | + c("Priority", "Pred_Priority") |
| 133 | + ) |
| 134 | + titles <- c("FGM.Bt", "DIS", "Clock Angle", |
| 135 | + "Positions", "Average Position", "Position", |
| 136 | + "Mixtures", "Average Mix", "Mixture", "Priority" |
| 137 | + ) |
| 138 | + plotTitle = "Features Grouped by Type over Time with MP points Highlighted" |
| 139 | + |
| 140 | + return(types_plot(data, subsets, titles, plotTitle)) |
| 141 | +} |
| 142 | +``` |
| 143 | +```{r plot_samples} |
| 144 | +fit.data$Highlight.Actual <- ifelse(fit.data$Priority >= 100, TRUE, FALSE) |
| 145 | +fit.data$Highlight.Predicted <- ifelse(fit.data$Pred_Priority >= 60, TRUE, FALSE) |
| 146 | +mms_types_plot_pos(fit.data[fit.data$Orbit==33,]) |
| 147 | +``` |
| 148 | +```{r} |
| 149 | +evaluate <- function(data){ |
| 150 | + prediction <- data$predicted |
| 151 | + actual <- data$actual |
| 152 | + total = length(actual) |
| 153 | + actual_sum <- sum(actual) |
| 154 | + pred_sum <- sum(prediction) |
| 155 | + TP <- sum(ifelse(prediction & actual, 1, 0)) |
| 156 | + TN <- sum(ifelse(!prediction & !actual, 1, 0)) |
| 157 | + FN <- sum(ifelse(!prediction & actual, 1, 0)) |
| 158 | + FP <- sum(ifelse(prediction & !actual, 1, 0)) |
| 159 | + |
| 160 | + c("TP" = TP, "TN" = TN, "FP" = FP, "FN" = FN, |
| 161 | + "actuals" = actual_sum, "predicted" = pred_sum, |
| 162 | + "accuracy" = (TP+TN)/total, "missclassification" = (FP+FN)/total, |
| 163 | + "precision" = TP/(TP+FP), "prevalence" = TP/total, |
| 164 | + "true.positive.rate" = TP/(TP+FN), "false.positive.rate" = FP/(FP+TN), |
| 165 | + "true.negative.rate" = TN/(FP+TN)) |
| 166 | +} |
| 167 | +eval.method(fit.data$Highlight.Actual, fit.data$Highlight.Predicted, 0.5) |
| 168 | +evaluate_data_asFrame(fit.data, fit.data$Highlight.Predicted) |
| 169 | +evaluate(data.frame(actual = fit.data$Highlight.Actual, predicted = fit.data$Highlight.Predicted)) |
| 170 | +``` |
| 171 | +```{r} |
| 172 | +eval.method2 <- function(target, prediction, threshold){ |
| 173 | + t.seg = segments(target) |
| 174 | + p.seg = segments(prediction) |
| 175 | + |
| 176 | + t.count.t = 0 |
| 177 | + t.count.f = 0 |
| 178 | + p.count.t = 0 |
| 179 | + p.count.f = 0 |
| 180 | + |
| 181 | + # target overlap count |
| 182 | + for(s in 1:dim(t.seg)[1]){ |
| 183 | + # selected = TRUE overlap |
| 184 | + t.sum.t = sum(target[t.seg[s,1]:t.seg[s,2]], na.rm = TRUE) |
| 185 | + p.sum.t = sum(prediction[t.seg[s,1]:t.seg[s,2]], na.rm = TRUE) |
| 186 | + # selected - FALSE overlap |
| 187 | + t.sum.f = length(target) - t.sum.t |
| 188 | + p.sum.f = length(prediction) - p.sum.t |
| 189 | + if((p.sum.t / t.sum.t) > threshold) t.count.t = t.count.t + 1 |
| 190 | + else if((p.sum.f / t.sum.f) > threshold) t.count.f = t.count.f + 1 |
| 191 | + } |
| 192 | + |
| 193 | + # prediction overlap |
| 194 | + for(s in 1:dim(p.seg)[1]){ |
| 195 | + # selected = TRUE overlap |
| 196 | + t.sum.t = sum(target[p.seg[s,1]:p.seg[s,2]], na.rm = TRUE) |
| 197 | + p.sum.t = sum(prediction[p.seg[s,1]:p.seg[s,2]], na.rm = TRUE) |
| 198 | + # selected - FALSE overlap |
| 199 | + t.sum.f = length(target) - t.sum.t |
| 200 | + p.sum.f = length(prediction) - p.sum.t |
| 201 | + if((t.sum.t / p.sum.t) > threshold) p.count.t = p.count.t + 1 |
| 202 | + else if((t.sum.f / p.sum.f) > threshold) p.count.f = p.count.f + 1 |
| 203 | + } |
| 204 | + |
| 205 | + c(Target_Ratio_Selected = t.count.t / dim(t.seg)[1], Target_Ratio_Unselected = t.count.f / dim(t.seg)[1], Prediction_Ratio_Selected = p.count.t / dim(p.seg)[1], Prediction_Ratio_Unselected = p.count.f / dim(p.seg)[1]) |
| 206 | +} |
| 207 | +eval.method2(fit.data$Highlight.Actual, fit.data$Highlight.Predicted, 0.5) |
| 208 | +``` |
0 commit comments