Simulação de variáveis

Simulando variáveis obtidas a partir de operações com outras variáveis

Variáveis obtidas por transformações.
Alguns exemplos;

\[ \begin{align} Z \sim {\rm N}(0,1) & \longrightarrow V = Z^2 \sim \chi^2(1) \\ U \sim \chi^2(m) \mbox{ e } V \sim \chi^2(n) & \longrightarrow F = \frac{U/m}{V/n} \sim {\rm F}(m, n)\\ Z \sim {\rm N}(0,1) \mbox{ e } V \sim \chi^2(n) & \longrightarrow T = \frac{Z}{\sqrt{V/n}} ~ {\rm t}(n) \\ U,V \stackrel{ind}{\sim} {\rm U}(0,1) \\ & \longrightarrow Z_1 = \sqrt{-2 \log(U)} \; \cos(2 \pi V)\\ & \longrightarrow Z_2 = \sqrt{-2 \log(U)} \; \sin(2 \pi V)\\ U \sim {\rm G}(a, \lambda) \mbox{ e } V \sim {\rm G}(b, \lambda) (U, V ind.) &\longrightarrow \frac{U}{U+V} \sim {\rm B}(a,b) \end{align} \]

Exemplo: chi-quadrado como soma de quadrados de normais

n <- 10000
nu <- 3
X <- matrix(rnorm(n*nu), n, nu)^2 
y <- rowSums(X)
mean(y)              ## E[y] = nu

## [1] 2.978533

var(y)               ## V[y] = 2 nu

## [1] 5.846565

mean(y^2) - (mean(y))^2

## [1] 5.84598

par(mfrow=c(1,2), mar=c(3,3,0,0), mgp=c(2,1,0))
plot(density(y), main="")
curve(dchisq(x, df=nu), from=0, to=25, col=4, add=T)
#
qTEO <- qchisq(ppoints(n), nu)
qqplot(qTEO, y)
abline(0, 1)

Convoluções e misturas

Simule:
\[ \begin{align} Y_1 &\sim N(175, 4) \\ Y_2 &\sim N(165, 4) \\ Y_3 &= Y_1 + Y_2 \\ Y_4 &= \begin{cases} Y_1 \mbox{ c/ prob } 0.3 \\ e Y_2 \mbox{ c/ prob} 0.7 \end{cases} \end{align}\]

N <- 1000
y1 <- rnorm(N, 175, 4)
y2 <- rnorm(N, 165, 4)
y3 <- y1 + y2
y3a <- (y1 + y2)/2
par(mfrow=c(1,2))
plot(density(y3), col=2, main="")
lines(density(y1))
lines(density(y2))
plot(density(y3a), col=2, main="")
lines(density(y1))
lines(density(y2))

Colocando todas em um mesmo gráfico.

u <- runif(N)
y4 <- ifelse(u < 0.3, y1, y2)
plot(density(y3a), col=2, main="", xlim=c(145, 195))
lines(density(y1))
lines(density(y2))
lines(density(y4), col=4)
legend("topright", c(expression(Y[1]),expression(Y[2]),expression(Y[3]),expression(Y[4])), lty=1, col=c(1,1,2,4), cex=0.8)

\[ \begin{align} X_1 &\sim G(3, 2) \\ X_2 &\sim G(3, 2) \\ X_3 &= X_1 + X_2\\ X_4 &= \begin{cases} X_1 \mbox{ c/ prob } 0,5 \\ X_2 \mbox{ c/ prob }0,5 \end{cases} \end{align}\]

N <- 1000
x1 <- rgamma(N, 3, 2)
x2 <- rgamma(N, 3, 5)
x3 <- x1 + x2              
u <- runif(n)
#k <- as.integer(u > 0.5)  
#x3 <- k * x1 + (1-k) * x2  
x4 <- ifelse(u < 0.5, x1, x2)

par(mfcol=c(1,2))         
hist(x3, prob=TRUE, xlim=c(0,5), ylim=c(0,1))
hist(x4, prob=TRUE, xlim=c(0,5), ylim=c(0,1))

par(mfcol=c(1,1))

Mixtura de varias gamma’s

\[ \begin{align} X &= \sum_{j=1}^{5} w_j F(x_j) \\ X_j &\sim {\rm G}(shape=3, rate = 1/j) \; j = 1, \ldots, 5 \\ w_j &= j/15 \end{align} \]

Programação evitando for()

n <- 5000
k <- sample(1:5, size=n, replace=TRUE, prob=(1:5)/15)
rate <- 1/k
x <- rgamma(n, shape=3, rate=rate)

plot(density(x), xlim=c(0,40), ylim=c(0,.3),
     lwd=3, xlab="x", main="")
for (i in 1:5)
  lines(density(rgamma(n, 3, 1/i)))

Mixtura de várias gamma’s (2)

n <- 5000
p <- c(.1,.2,.2,.3,.2)
lambda <- c(1,1.5,2,2.5,3)
k <- sample(1:5, size=n, replace=TRUE, prob=p)
rate <- lambda[k]
x <- rgamma(n, shape=3, rate=rate)

Densidades da mistura

fmix <- function(x, lambda, theta) {
    sum(dgamma(x, 3, lambda) * theta)
}

p <- c(0.1, 0.2, 0.2, 0.3, 0.2)
lambda <- c(1, 1.5, 2, 2.5, 3)

x <- seq(0, 8, length=200)
dim(x) <- length(x)  

y <- apply(x, 1, fmix, lambda=lambda, theta=p)

plot(x, y, type="l", ylim=c(0,.85), lwd=3, ylab="Densidade")

for (j in 1:5) {
  y <- apply(x, 1, dgamma, shape=3, rate=lambda[j])
  lines(x, y)
}

Mistura contínua (Poisson-Gamma)

\[ \begin{align} Y $\sim P(\lambda) \\ \lambda $\sim {\rm G}(4, 3) \end{align} \]

sh <- 7
rate <- 8

N <- 1000
lambda <- rgamma(N, sh, rate) 
y <- rpois(N, lambda) 

plot(prop.table(table(y)), ty="h")
lines((0:7)+0.1, dpois(0:7, lambda=mean(y)), type="h", col=2)

mix <- tabulate(y+1) / N
negbin <- round(dnbinom(0:max(y), sh, rate/(1+rate)), 3)
se <- sqrt(negbin * (1 - negbin) / N)

round(rbind(mix, negbin, se), 3)

##         [,1]  [,2]  [,3]  [,4]  [,5]  [,6]
## mix    0.445 0.326 0.160 0.054 0.010 0.005
## negbin 0.438 0.341 0.152 0.051 0.014 0.003
## se     0.016 0.015 0.011 0.007 0.004 0.002