Marcio Diniz | Michael Luu
Cedars Sinai Medical Center
11 October, 2022
dice
1 2 3 4 5 6
161 153 188 149 157 192
It is hard to say, let’s calculate the proportions:
Are the proportions equal to 1/6, i.e., 0.166…?
dice
1 2 3 4 5 6
16745 16806 16453 16757 16714 16525
It is hard to say, let’s calculate the proportions:
dice
1 2 3 4 5 6
0.16745 0.16806 0.16453 0.16757 0.16714 0.16525
Are the proportions equal to 1/6, i.e., 0.166…?
dice
1 2 3 4 5 6
1666872 1668473 1667449 1665194 1667349 1664663
It is hard to say, let’s calculate the proportions:
dice
1 2 3 4 5 6
0.1666872 0.1668473 0.1667449 0.1665194 0.1667349 0.1664663
Are the proportions equal to 1/6, i.e., 0.166…?
Experiment: Roll a dice;
\(X \sim U\{1, \ldots, 6\} \rightarrow a = 1, b = 6\)
\(E(X) = 3.5\)
\(Var(X) = 2.916667 \rightarrow SD(X) = 1.70825\)
In a sample size of 10,
n <- 6
data_plot <- data.frame(x = seq(1:n), y = 1/n)
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = seq(1:n)) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
n <- 10
data_plot <- data.frame(x = seq(1:n), y = 1/n)
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = seq(1:n)) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
size <- 1
p <- 0.2
data_plot <- data.frame(x = 0:1, y = dbinom(0:1, size, p))
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = 0:1) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
size <- 1
p <- 0.8
data_plot <- data.frame(x = 0:1, y = dbinom(0:1, size, p))
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = 0:1) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
n <- 10
size <- 10
p <- 0.2
data_plot <- data.frame(x = seq(1:n), y = dbinom(seq(1:n), size, p))
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0:n)) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
n <- 10
size <- 10
p <- 0.8
data_plot <- data.frame(x = seq(1:n), y = dbinom(seq(1:n), size, p))
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0:n)) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
Experiment: Observe the number of mutations in a genome with about \(10^4\) nucleotides such that the mutation rate of \(9\times10^{-4}\) per nucleotide in a replication cycle.
\(X \in \{0, 1, 2, 3, \ldots, \}\);
\(X \sim Po(9) \rightarrow \lambda = 9\) ;
\(E(X) = \lambda = 9\);
\(Var(Y) = \lambda = 9 \rightarrow = SD(X) = 3\);
In a sample of 10 cell-culture dishes,
n <- 20
lambda <- 1
data_plot <- data.frame(x = seq(1:n), y = dpois(seq(1:n), lambda))
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0:n)) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
n <- 20
lambda <- 9
data_plot <- data.frame(x = seq(1:n), y = dpois(seq(1:n), lambda))
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0:n)) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
n <- 20
mu <- 2
sigma2 <- 9
size <- mu^2/(sigma2 - mu)
data_plot <- data.frame(x = seq(1:n), y = dnbinom(seq(1:n),
size = size,
mu = mu))
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0:n)) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
n <- 20
mu <- 5
sigma2 <- 9
size <- mu^2/(sigma2 - mu)
data_plot <- data.frame(x = seq(1:n), y = dnbinom(seq(1:n),
size = size,
mu = mu))
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = seq(0:n)) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
n <- 20
s <- 1:20
data_plot <- data.frame(x = s, y = dhyper(s, k = 10, m = 300, n = 700))
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = seq(1, 20, by = 2)) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
n <- 20
s <- 1:26
data_plot <- data.frame(x = s, y = dhyper(s, k = 50, m = 300, n = 700))
ggplot(data_plot, aes(x = x, y = y)) +
geom_bar(stat="identity") +
scale_y_continuous(limits = c(0, 1)) +
scale_x_continuous(breaks = seq(1, 26, by = 2)) +
labs(x = "X", y = "P(X = x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12))
\(U(0, 1) \rightarrow a = 0, b = 1\);
\(E(X) = 0.5\);
\(Var(X) = 0.08333 \rightarrow SD(X) = 0.288\)
In a sample of 10 Uniform distributed variables,
ggplot(data = data.frame(x = c(-10, 10)), aes(x)) +
stat_function(fun = dunif, n = 1001, args = list(min = 0, max = 5), aes(colour = "b")) +
stat_function(fun = dunif, args = list(min = -6, max = 1), aes(colour = "a")) +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("U(-6, 1)", "U(0, 5)"))
dataset <- read_csv(file = "data/emergency.csv")
data_plot <- dataset
ggplot(data_plot, aes(x = creatinine)) +
geom_histogram(breaks = seq(0, 12, by = 0.5)) +
theme_bw(base_size = 25) +
labs(x = "Creatinine", y = "Frequency") +
theme(legend.position = "none")
ggplot(data_plot, aes(x = creatinine, y = after_stat(density))) +
geom_histogram(breaks = seq(0, 12, by = 0.5)) +
theme_bw(base_size = 25) +
labs(x = "Creatinine", y = "Density") +
theme(legend.position = "none")
tab <- data_plot %>%
mutate(creatinine_bin = cut(creatinine, seq(0, 12, 0.5))) %>%
group_by(creatinine_bin) %>%
summarize(frequency = n()) %>%
mutate(relative_frequency = frequency/sum(frequency),
interval_width = 0.5,
density = relative_frequency/interval_width) %>%
mutate(relative_frequency =
format(relative_frequency, digits = 3, nsmall = 3),
density =
format(density, digits = 3, nsmall = 3),
interval_width = as.character(interval_width)) %>%
add_row(creatinine_bin = "Total", frequency = 145,
relative_frequency = "1", interval_width = "",
density = "")
gt(tab) %>%
cols_label(
creatinine_bin = md("**Creatinine Bin**"),
frequency = md("**Frequency**"),
relative_frequency = md("**Relative Frequency**"),
interval_width = md("**Interval Width**"),
density = md("**Density**"),
) %>%
tab_footnote(
footnote = "Relative Frequency = Density $\times$ Interval Width",
locations = cells_column_labels(
columns = density
)
)
Creatinine Bin | Frequency | Relative Frequency | Interval Width | Density1 |
---|---|---|---|---|
(0,0.5] | 2 | 0.01342 | 0.5 | 0.0268 |
(0.5,1] | 50 | 0.33557 | 0.5 | 0.6711 |
(1,1.5] | 40 | 0.26846 | 0.5 | 0.5369 |
(1.5,2] | 11 | 0.07383 | 0.5 | 0.1477 |
(2,2.5] | 17 | 0.11409 | 0.5 | 0.2282 |
(2.5,3] | 11 | 0.07383 | 0.5 | 0.1477 |
(3,3.5] | 4 | 0.02685 | 0.5 | 0.0537 |
(3.5,4] | 2 | 0.01342 | 0.5 | 0.0268 |
(4,4.5] | 2 | 0.01342 | 0.5 | 0.0268 |
(4.5,5] | 4 | 0.02685 | 0.5 | 0.0537 |
(5,5.5] | 2 | 0.01342 | 0.5 | 0.0268 |
(5.5,6] | 1 | 0.00671 | 0.5 | 0.0134 |
(6,6.5] | 2 | 0.01342 | 0.5 | 0.0268 |
(11,11.5] | 1 | 0.00671 | 0.5 | 0.0134 |
Total | 145 | 1 | ||
1 Relative Frequency = Density $ imes$ Interval Width |
data_plot <- dataset %>%
mutate(indicator = (creatinine > 3 & creatinine <= 3.5))
gp <- ggplot(data_plot, aes(x = creatinine, y = after_stat(density))) +
geom_histogram(breaks = seq(0, 12, 0.5)) +
theme_bw() +
labs(x = "Creatinine", y = "Density") +
theme(text = element_text(size=20),
legend.position = "none") +
scale_x_continuous(breaks = seq(0, 12, 0.5)) +
theme(legend.position = "none") +
scale_fill_manual(values = c("grey", gg_color_hue(2)[2]))
data_plot <- dataset %>%
mutate(indicator = (creatinine <= 3.5))
gp <- ggplot(data_plot, aes(x = creatinine, y = after_stat(density))) +
geom_histogram(breaks = seq(0, 12, 0.5)) +
theme_bw() +
labs(x = "Creatinine", y = "Density") +
theme(text = element_text(size=20),
legend.position = "none") +
scale_x_continuous(breaks = seq(0, 12, 0.5)) +
theme(legend.position = "none") +
scale_fill_manual(values = c("grey", gg_color_hue(2)[2]))
ggplot(data = data.frame(x = c(-6, 6)), aes(x)) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1), aes(colour = "a")) +
stat_function(fun = dnorm, args = list(mean = 3, sd = 1), aes(colour = "b")) +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("N(0, 1)", "N(3, 1)"))
ggplot(data = data.frame(x = c(-6, 6)), aes(x)) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1), aes(colour = "a")) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 2), aes(colour = "b")) +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("N(0, 1)", "N(0, 2)"))
ggplot(data = data.frame(x = c(-6, 6)), aes(x)) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1), aes(colour = "a")) +
stat_function(fun = dnorm, args = list(mean = 3, sd = 1), aes(colour = "b")) +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("N(0, 1)", "N(3, 1)"))
ggplot(data = data.frame(x = c(-6, 6)), aes(x)) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1), aes(colour = "a")) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 4), aes(colour = "b")) +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("N(0, 1)", "N(0, 4)"))
ggplot(data = data.frame(x = c(-6, 6)), aes(x)) +
stat_function(fun = dnorm, args = list(mean = -1, sd = 1), aes(colour = "a")) +
stat_function(fun = dnorm, args = list(mean = 1, sd = 1), aes(colour = "b")) +
stat_function(fun = dnorm, args = list(mean = 0, sd = sqrt(2)), aes(colour = "c")) +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("N(-1, 1)", "N(1, 1)", "N(0, 2)"))
ggplot(data = data.frame(x = c(-6, 6)), aes(x)) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1), aes(colour = "a"), size = 1) +
stat_function(fun = dt, args = list(df = 5), aes(colour = "b"), size = 1, linetype = "dashed") +
stat_function(fun = dt, args = list(df = 20), aes(colour = "c"), size = 1, linetype = "dashed") +
stat_function(fun = dt, args = list(df = 100), aes(colour = "d"), size = 1, linetype = "dashed") +
stat_function(fun = dt, args = list(df = 50), aes(colour = "e"), size = 1, linetype = "dashed") +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("N(0, 1)", "t(5)", "t(20)", "t(50)", "t(100)"))
where \(\delta = \frac{\lambda}{\sqrt{1 + \delta^2}}\)
ggplot(data = data.frame(x = c(-6, 6)), aes(x)) +
stat_function(fun = dSN1, args = list(mu = 0, sigma = 1, nu = 0), aes(colour = "a"), size = 1) +
stat_function(fun = dSN1, args = list(mu = 0, sigma = 1, nu = 1), aes(colour = "b"), size = 1, linetype = "dashed") +
stat_function(fun = dSN1, args = list(mu = 0, sigma = 1, nu = -1), aes(colour = "c"), size = 1, linetype = "dashed") +
stat_function(fun = dSN1, args = list(mu = 0, sigma = 1, nu = 3), aes(colour = "d"), size = 1, linetype = "dashed") +
stat_function(fun = dSN1, args = list(mu = 0, sigma = 1, nu = -3), aes(colour = "e"), size = 1, linetype = "dashed") +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("N(0, 1)", "SN(0, 1, 1)", "SN(0, 1, -1)", "SN(0, 1, 3)", "SN(0, 1, -3)"))
ggplot(data = data.frame(x = c(0, 50)), aes(x)) +
stat_function(fun = dlnorm, args = list(meanlog = 0, sdlog = 1), aes(colour = "a")) +
stat_function(fun = dlnorm, args = list(meanlog = 3, sdlog = 1), aes(colour = "b")) +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("LN(0, 1)", "LN(3, 1)"))
ggplot(data = data.frame(x = c(0, 30)), aes(x)) +
stat_function(fun = dlnorm, args = list(meanlog = 0, sdlog = 1), aes(colour = "a")) +
stat_function(fun = dlnorm, args = list(meanlog = 0, sdlog = 2), aes(colour = "b")) +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("LN(0, 1)", "LN(0, 2)"))
ggplot(data = data.frame(x = c(0, 50)), aes(x)) +
stat_function(fun = dchisq, args = list(df = 1), aes(colour = "a")) +
stat_function(fun = dchisq, args = list(df = 5), aes(colour = "b")) +
stat_function(fun = dchisq, args = list(df = 20), aes(colour = "c")) +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("Chi-Squared(1)",
"Chi-Squared(5)",
"Chi-Squared(20)"))
ggplot(data = data.frame(x = c(0, 20)), aes(x)) +
stat_function(fun = df, args = list(df1 = 1, df2 = 1), aes(colour = "a")) +
stat_function(fun = df, args = list(df1 = 1, df2 = 5), aes(colour = "b")) +
stat_function(fun = df, args = list(df1 = 5, df2 = 1), aes(colour = "c")) +
labs(x = "X", y = "f(x)") +
theme_bw() + theme(text = element_text(size=20),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.position = "bottom") +
scale_colour_discrete("Distribution", labels = c("F(1, 1)",
"F(1, 40)",
"F(40, 1)"))