Variáveis obtidas por transformações.
Alguns exemplos;
\[ \begin{align} Z \sim {\rm N}(0,1) & \longrightarrow V = Z^2 \sim \chi^2(1) \\ U \sim \chi^2(m) \mbox{ e } V \sim \chi^2(n) & \longrightarrow F = \frac{U/m}{V/n} \sim {\rm F}(m, n)\\ Z \sim {\rm N}(0,1) \mbox{ e } V \sim \chi^2(n) & \longrightarrow T = \frac{Z}{\sqrt{V/n}} ~ {\rm t}(n) \\ U,V \stackrel{ind}{\sim} {\rm U}(0,1) \\ & \longrightarrow Z_1 = \sqrt{-2 \log(U)} \; \cos(2 \pi V)\\ & \longrightarrow Z_2 = \sqrt{-2 \log(U)} \; \sin(2 \pi V)\\ U \sim {\rm G}(a, \lambda) \mbox{ e } V \sim {\rm G}(b, \lambda) (U, V ind.) &\longrightarrow \frac{U}{U+V} \sim {\rm B}(a,b) \end{align} \]
## [1] 2.978533
## [1] 5.846565
## [1] 5.84598
Simule:
\[ \begin{align}
Y_1 &\sim N(175, 4) \\
Y_2 &\sim N(165, 4) \\
Y_3 &= Y_1 + Y_2 \\
Y_4 &= \begin{cases} Y_1 \mbox{ c/ prob } 0.3 \\ e Y_2 \mbox{ c/ prob} 0.7 \end{cases}
\end{align}\]
N <- 1000
y1 <- rnorm(N, 175, 4)
y2 <- rnorm(N, 165, 4)
y3 <- y1 + y2
y3a <- (y1 + y2)/2
par(mfrow=c(1,2))
plot(density(y3), col=2, main="")
lines(density(y1))
lines(density(y2))
plot(density(y3a), col=2, main="")
lines(density(y1))
lines(density(y2))
Colocando todas em um mesmo gráfico.
u <- runif(N)
y4 <- ifelse(u < 0.3, y1, y2)
plot(density(y3a), col=2, main="", xlim=c(145, 195))
lines(density(y1))
lines(density(y2))
lines(density(y4), col=4)
legend("topright", c(expression(Y[1]),expression(Y[2]),expression(Y[3]),expression(Y[4])), lty=1, col=c(1,1,2,4), cex=0.8)
\[ \begin{align} X_1 &\sim G(3, 2) \\ X_2 &\sim G(3, 2) \\ X_3 &= X_1 + X_2\\ X_4 &= \begin{cases} X_1 \mbox{ c/ prob } 0,5 \\ X_2 \mbox{ c/ prob }0,5 \end{cases} \end{align}\]
N <- 1000
x1 <- rgamma(N, 3, 2)
x2 <- rgamma(N, 3, 5)
x3 <- x1 + x2
u <- runif(n)
#k <- as.integer(u > 0.5)
#x3 <- k * x1 + (1-k) * x2
x4 <- ifelse(u < 0.5, x1, x2)
par(mfcol=c(1,2))
hist(x3, prob=TRUE, xlim=c(0,5), ylim=c(0,1))
hist(x4, prob=TRUE, xlim=c(0,5), ylim=c(0,1))
\[ \begin{align} X &= \sum_{j=1}^{5} w_j F(x_j) \\ X_j &\sim {\rm G}(shape=3, rate = 1/j) \; j = 1, \ldots, 5 \\ w_j &= j/15 \end{align} \]
Programação evitando for()
fmix <- function(x, lambda, theta) {
sum(dgamma(x, 3, lambda) * theta)
}
p <- c(0.1, 0.2, 0.2, 0.3, 0.2)
lambda <- c(1, 1.5, 2, 2.5, 3)
x <- seq(0, 8, length=200)
dim(x) <- length(x)
y <- apply(x, 1, fmix, lambda=lambda, theta=p)
plot(x, y, type="l", ylim=c(0,.85), lwd=3, ylab="Densidade")
for (j in 1:5) {
y <- apply(x, 1, dgamma, shape=3, rate=lambda[j])
lines(x, y)
}
\[ \begin{align} Y $\sim P(\lambda) \\ \lambda $\sim {\rm G}(4, 3) \end{align} \]
sh <- 7
rate <- 8
N <- 1000
lambda <- rgamma(N, sh, rate)
y <- rpois(N, lambda)
plot(prop.table(table(y)), ty="h")
lines((0:7)+0.1, dpois(0:7, lambda=mean(y)), type="h", col=2)
mix <- tabulate(y+1) / N
negbin <- round(dnbinom(0:max(y), sh, rate/(1+rate)), 3)
se <- sqrt(negbin * (1 - negbin) / N)
round(rbind(mix, negbin, se), 3)
## [,1] [,2] [,3] [,4] [,5] [,6]
## mix 0.445 0.326 0.160 0.054 0.010 0.005
## negbin 0.438 0.341 0.152 0.051 0.014 0.003
## se 0.016 0.015 0.011 0.007 0.004 0.002