Marcio Diniz | Michael Luu
Cedars Sinai Medical Center
18 October, 2022
data_plot <- data.frame(x = rep(c(0, 20)))
ggplot(data_plot, aes(x = x)) +
stat_function(fun = dnorm, args = list(7, 2)) +
geom_area(fill = "blue",
stat = "function", fun = dnorm,
args = list(7, 2),
xlim = c(0, 15),
alpha = 0.5) +
labs(y = "Density", x = "Troponin I") +
theme_bw(base_size = 20) +
theme(legend.position = "None") +
geom_vline(xintercept =
c(qnorm(0.025, 7, 2), qnorm(0.975, 7, 2)))
Normal distribution
ggplot(data_plot, aes(x = x)) +
stat_function(fun = dnorm, args = list(10, 1.5)) +
geom_area(fill = "blue",
stat = "function", fun = dnorm,
args = list(10, 1.5),
xlim = c(0, 15),
alpha = 0.5) +
labs(y = "Density", x = "Troponin I") +
theme_bw(base_size = 20) +
theme(legend.position = "None") +
geom_vline(xintercept =
c(qnorm(0.025, 10, 1.5), qnorm(0.975, 10, 1.5)))
Normal distribution
ggplot(data_plot, aes(x = x)) +
stat_function(fun = dnorm, args = list(5, 1)) +
geom_area(fill = "blue",
stat = "function", fun = dnorm,
args = list(5, 1),
xlim = c(0, 15),
alpha = 0.5) +
labs(y = "Density", x = "Troponin I") +
theme_bw(base_size = 20) +
theme(legend.position = "None") +
geom_vline(xintercept =
c(qnorm(0.025, 5, 1), qnorm(0.975, 5, 1)))
Normal distribution
ggplot(data_plot, aes(x = x)) +
stat_function(fun = dgamma, args = list(5, 1)) +
geom_area(fill = "red", stat = "function", fun = dgamma,
args = list(5, 1),
xlim = c(0, 20),
alpha = 0.5) +
labs(y = "Density", x = "Troponin I") +
theme_bw(base_size = 20) +
geom_vline(xintercept =
c(qgamma(0.025, 5, 1),
qgamma(0.975, 5, 1)))
Gamma distribution
ggplot(data_plot, aes(x = x)) +
stat_function(fun = dgumbel, args = list(5, 1)) +
geom_area(fill = "yellow", stat = "function", fun = dgumbel,
args = list(5, 1),
xlim = c(0, 20),
alpha = 0.5) +
labs(y = "Density", x = "Troponin I") +
theme_bw(base_size = 20) +
geom_vline(xintercept =
c(qgumbel(0.025, 5, 1),
qgumbel(0.975, 5, 1)))
Gumbel distribution
likelihood_binom <- function(p, x, size){
out <- dbinom(x = x, size = size, prob = p)/choose(size, x)
return(out)
}
data_plot <- data.frame(p = c(0, 1))
#| fig-cap: "Observed sample: 1, 0, 1, 0"
ggplot(data_plot, aes(x = p)) +
stat_function(fun = likelihood_binom,
args = list(x = 2, size = 4)) +
labs(y = "Likelihood", x = "p") +
theme_bw(base_size = 20) +
theme(legend.position = "None")
Observed sample 1, 1, 1, 0
shape <- 1
rate <- 0.2
set.seed(1257)
data_plot <- data.frame(a = rgamma(100, shape, rate),
b = rgamma(100, shape, rate),
c = rgamma(100, shape, rate),
d = rgamma(100, shape, rate))
data_plot <- data_plot %>%
pivot_longer(col = a:d)
tmp <- data_plot %>% group_by(name) %>%
summarise(m = mean(value))
data_plot <- data_plot %>%
mutate(name =
factor(name,
levels =
c("a", "b",
"c", "d"),
labels =
format(round(tmp[[2]], 2), nsmall = 2)))
ggplot(data_plot, aes(x = value, y = after_stat(density))) +
geom_histogram(bins = 20) +
theme_bw() +
labs(y = "Density", x = "Troponin I") +
facet_wrap(~ name, ncol = 2,
labeller =
label_bquote(bar(x) == .(as.character(name)))) +
theme(strip.text = element_text(size = 12))
Every point estimate is also a random variable
\[ \left[\bar{X} - z_{1 - \alpha/2}\frac{\sigma}{\sqrt{n}} ; \bar{X} + z_{1 - \alpha/2}\frac{\sigma}{\sqrt{n}} \right] \]
set.seed(1234)
female <- data.frame(sex = "Female", measure = rnorm(100, 4, 1))
male <- data.frame(sex = "Male", measure = rnorm(100, 8, 1))
dataset <- rbind(female, male)
data_plot <- dataset %>%
select(sex, measure) %>%
group_by(sex) %>%
summarize(mean = mean(measure, na.rm = TRUE),
sd = sd(measure, na.rm = TRUE))
ggplot(data_plot, aes(x = sex, y = mean, fill = sex)) +
geom_bar(position = position_dodge(), stat = "identity") +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd),
width = .1) +
scale_fill_brewer("Sex", palette = "Set1") +
theme_bw(base_size = 20) +
labs(x = "Sex", y = "Troponin I") +
theme(legend.position = "none")
Mean and SD
se <- function(x, na.rm = TRUE){
out <- sd(x, na.rm = na.rm)/sqrt(length(na.omit(x)))
}
data_plot <- dataset %>%
select(sex, measure) %>%
group_by(sex) %>%
summarize(mean = mean(measure, na.rm = TRUE),
se = se(measure, na.rm = TRUE))
ggplot(data_plot, aes(x = sex, y = mean, fill = sex)) +
geom_bar(position = position_dodge(), stat = "identity") +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = .1) +
scale_fill_brewer("Sex", palette = "Set1") +
theme_bw(base_size = 20) + labs(x = "Sex", y = "Troponin I") +
theme(legend.position = "none")
Mean and SE
se <- function(x, na.rm = TRUE){
out <- sd(x, na.rm = na.rm)/sqrt(length(na.omit(x)))
}
data_plot <- dataset %>%
select(sex, measure) %>%
group_by(sex) %>%
summarize(mean = mean(measure, na.rm = TRUE),
se = se(measure, na.rm = TRUE))
ggplot(data_plot, aes(x = sex, y = mean, fill = sex)) +
geom_bar(position = position_dodge(), stat = "identity") +
geom_errorbar(aes(ymin = mean - 2*se, ymax = mean + 2*se), width = .1) +
scale_fill_brewer("Sex", palette = "Set1") +
theme_bw(base_size = 20) + labs(x = "Sex", y = "Troponin I") +
theme(legend.position = "none")
set.seed(2847)
female <- data.frame(sex = "Female", measure = rnorm(100, 4, 1))
male <- data.frame(sex = "Male", measure = rnorm(100, 4, 1))
dataset <- rbind(female, male)
se <- function(x, na.rm = TRUE){
out <- sd(x, na.rm = na.rm)/sqrt(length(na.omit(x)))
}
data_plot <- dataset %>%
select(sex, measure) %>%
group_by(sex) %>%
summarize(mean = mean(measure, na.rm = TRUE),
se = se(measure, na.rm = TRUE))
ggplot(data_plot, aes(x = sex, y = mean, fill = sex)) +
geom_bar(position = position_dodge(), stat = "identity") +
geom_errorbar(aes(ymin = mean - 2*se, ymax = mean + 2*se), width = .1) +
scale_fill_brewer("Sex", palette = "Set1") +
theme_bw(base_size = 20) + labs(x = "Sex", y = "Troponin I") +
theme(legend.position = "none")
set.seed(2847)
female <- data.frame(sex = "Female", measure = rnorm(100, 4, 1))
male <- data.frame(sex = "Male", measure = rnorm(100, 4.3, 1))
dataset <- rbind(female, male)
se <- function(x, na.rm = TRUE){
out <- sd(x, na.rm = na.rm)/sqrt(length(na.omit(x)))
}
data_plot <- dataset %>%
select(sex, measure) %>%
group_by(sex) %>%
summarize(mean = mean(measure, na.rm = TRUE),
se = se(measure, na.rm = TRUE))
ggplot(data_plot, aes(x = sex, y = mean, fill = sex)) +
geom_bar(position = position_dodge(), stat = "identity") +
geom_errorbar(aes(ymin = mean - 2*se, ymax = mean + 2*se), width = .1) +
scale_fill_brewer("Sex", palette = "Set1") +
theme_bw(base_size = 20) + labs(x = "Sex", y = "Troponin I") +
theme(legend.position = "none")
\[ \left[\bar{X}_M - z_{1 - \alpha/2}\sqrt{\frac{\sigma_M^2}{n_M}} ; \bar{X}_M + z_{1 - \alpha/2}\sqrt{\frac{\sigma_M^2}{n_M}} \right] \]
\[ \left[\bar{X}_F - z_{1 - \alpha/2}\sqrt{\frac{\sigma_F^2}{n_F}} ; \bar{X}_F + z_{1 - \alpha/2}\sqrt\frac{\sigma_F^2}{{n_F}} \right] \]
where \(Z \sim N(0, 1)\).
\[ \left[\bar{X}_M - \bar{X}_F - z_{1 - \alpha/2}\sqrt{\frac{\sigma_M^2}{n_M} + \frac{\sigma_F^2}{n_F}} ; \bar{X}_M - \bar{X}_F + z_{1 - \alpha/2}\sqrt{\frac{\sigma_M^2}{n_M} + \frac{\sigma_F^2}{n_F}} \right] \]
where \(Z \sim N(0, 1)\).
\[ \left[\bar{X}_M - t_{1 - \alpha/2, n - 1}\sqrt{\frac{S_M^2}{n_M}} ; \bar{X}_M + t_{1 - \alpha/2, n - 1}\sqrt{\frac{S_M^2}{n_M}} \right] \]
\[ \left[\bar{X}_F - t_{1 - \alpha/2, n - 1}\sqrt{\frac{S_F^2}{n_F}} ; \bar{X}_F + t_{1 - \alpha/2, n - 1}\sqrt\frac{S_F^2}{{n_F}} \right] \]
\[ \left[\bar{X}_M - \bar{X}_F - t_{1 - \alpha/2, n - 1}\sqrt{\frac{S_M^2}{n_M} + \frac{S_F^2}{n_F}} ; \bar{X}_M - \bar{X}_F + z_{1 - \alpha/2, n - 1}\sqrt{\frac{S_M^2}{n_M} + \frac{S_F^2}{n_F}} \right] \]
where \(T \sim t-student(n - 1)\).