Multivariate Covariance Generalized Linear Models for the Analysis of Experimental Data

github.com/leg-ufpr/mcglm4aed

1 Univariate linear models

#-----------------------------------------------------------------------
# A univariate linear model.

rm(list = ls())

# Using `mtcars` as toy data.
# Model with numeric and categorical predictors.
m0 <- lm(qsec ~ mpg + factor(gear) + hp, data = mtcars)

# Vector with the estimated parameters.
b <- cbind(coef(m0))
b
##                       [,1]
## (Intercept)   20.805502199
## mpg           -0.009802228
## factor(gear)4 -0.098158200
## factor(gear)5 -1.673647461
## hp            -0.016780588
# Covariance matrix of the estimates.
V <- vcov(m0)
round(V, digits = 5)
##               (Intercept)      mpg factor(gear)4 factor(gear)5       hp
## (Intercept)       4.56465 -0.14111       0.01402       0.89307 -0.01248
## mpg              -0.14111  0.00506      -0.01333      -0.03327  0.00034
## factor(gear)4     0.01402 -0.01333       0.37630       0.15247  0.00061
## factor(gear)5     0.89307 -0.03327       0.15247       0.60114 -0.00256
## hp               -0.01248  0.00034       0.00061      -0.00256  0.00004
# Term labels.
tl <- c(if (attr(terms(m0), "intercept")) "(Intercept)",
        attr(terms(m0), "term.labels"))
tl
## [1] "(Intercept)"  "mpg"          "factor(gear)" "hp"
# Choleski of the Fisher information matrix (inverse of the covariance).
G <- chol(solve(V))
G
##               (Intercept)      mpg factor(gear)4 factor(gear)5         hp
## (Intercept)      4.765046 95.73275      1.786892     0.7445384  698.97267
## mpg              0.000000 28.26639      1.338267     0.1618315 -249.58465
## factor(gear)4    0.000000  0.00000      1.879010    -0.8232970  -81.38276
## factor(gear)5    0.000000  0.00000      0.000000     1.5130812   97.09873
## hp               0.000000  0.00000      0.000000     0.0000000  158.29395

1.1 Sequential F test

#--------------------------------------------
# Sequential F tests.

Rb <- G %*% b
Rb
##                    [,1]
## (Intercept)   85.050112
## mpg            3.508893
## factor(gear)4  2.559119
## factor(gear)5 -4.161738
## hp            -2.656266
sF <- tapply(Rb,
             INDEX = m0$assign,
             FUN = function(x) {
                 ndf <- length(x)
                 fval <- crossprod(x)/ndf
                 data.frame(nDF = ndf,
                            `F value` = fval,
                            check.names = FALSE)
             })
sF <- do.call(rbind, sF)
rownames(sF) <- tl
sF
##              nDF     F value
## (Intercept)    1 7233.521614
## mpg            1   12.312329
## factor(gear)   2   11.934579
## hp             1    7.055747
anova(m0)
## Analysis of Variance Table
## 
## Response: qsec
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## mpg           1 17.352 17.3523 12.3123 0.0015966 ** 
## factor(gear)  2 33.640 16.8199 11.9346 0.0001933 ***
## hp            1  9.944  9.9439  7.0557 0.0130995 *  
## Residuals    27 38.052  1.4093                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

1.2 Marginal F test

#--------------------------------------------
# Marginal F tests.

# Position with the same number are columns of the same term in the
# design matrix.
m0$assign
## [1] 0 1 2 2 3
# Split indices by terms
a <- split(x = seq_along(m0$assign),
           f = m0$assign)
names(a) <- tl

mF <- lapply(a,
             FUN = function(i) {
                 ndf <- length(i)
                 QR <- qr(t(solve(G))[, i, drop = FALSE])
                 x <- qr.qty(qr = QR, Rb)[1:ndf]
                 fval <- sum(x^2)/ndf
                 data.frame(nDF = ndf,
                            `F value` = fval,
                            check.names = FALSE)
             })
mF <- do.call(rbind, mF)
mF
##              nDF     F value
## (Intercept)    1 94.83059183
## mpg            1  0.01898563
## factor(gear)   2  2.48751136
## hp             1  7.05574695
drop1(m0, test = "F")
## Single term deletions
## 
## Model:
## qsec ~ mpg + factor(gear) + hp
##              Df Sum of Sq    RSS    AIC F value Pr(>F)  
## <none>                    38.052 15.543                 
## mpg           1    0.0268 38.079 13.566  0.0190 0.8914  
## factor(gear)  2    7.0115 45.064 16.955  2.4875 0.1020  
## hp            1    9.9439 47.996 20.972  7.0557 0.0131 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2 Multivariate linear models

#-----------------------------------------------------------------------
# A multivariate linear model.

rm(list = ls())

# Full model.
m_full <- lm(cbind(qsec, wt) ~ mpg + factor(gear) + hp,
             data = mtcars)

# Is this manova a sequential device of hypotheses tests?
anova(m_full)
## Analysis of Variance Table
## 
##              Df  Pillai approx F num Df den Df    Pr(>F)    
## (Intercept)   1 0.99629   3487.4      2     26 < 2.2e-16 ***
## mpg           1 0.86699     84.7      2     26 4.077e-12 ***
## factor(gear)  2 0.50649      4.6      4     54  0.002951 ** 
## hp            1 0.33357      6.5      2     26  0.005115 ** 
## Residuals    27                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# TODO: Calculate each Pillai for nested models in sequence.

# Error SSP.
S_full <- crossprod(residuals(m_full))
S_full
##           qsec       wt
## qsec 38.052183 6.364296
## wt    6.364296 5.916464
# Terms to be dropped out (reversed order).
# tm <- rev(attr(terms(m_full), "term.labels"))
tm <- attr(terms(m_full), "term.labels")
tm
## [1] "mpg"          "factor(gear)" "hp"

2.1 Sequential tests by refitting

#--------------------------------------------
# Sequential tests.

# Empty list to keep extra SPP and Pillai statistic.
drops <- replicate(length(tm),
                   list(S = NULL,
                        Pillai = numeric(0)),
                   simplify = FALSE)
names(drops) <- tm

# In the loop, the model m1 has one more term than the model m0.
m1 <- m_full

# ATTENTION: loop runs in the reversed order of the terms.
for (i in rev(tm)) {
    # Dropping one term out sequentially.
    m0 <- update(m1,
                 formula = as.formula(sprintf(". ~ . -%s", i)))
    # Extra SSP matrix (difference in the error SSP between models).
    drops[[i]]$S <- crossprod(residuals(m0)) -
        crossprod(residuals(m1))
    m1 <- m0
    # Pillai statistic.
    drops[[i]]$Pillai <-
        sum(diag(drops[[i]]$S %*% solve(drops[[i]]$S + S_full)))
}

cbind(Pillai = sapply(drops, "[[", "Pillai"))
##                 Pillai
## mpg          0.8669934
## factor(gear) 0.5064920
## hp           0.3335700
anova(m_full, test = "Pillai")
## Analysis of Variance Table
## 
##              Df  Pillai approx F num Df den Df    Pr(>F)    
## (Intercept)   1 0.99629   3487.4      2     26 < 2.2e-16 ***
## mpg           1 0.86699     84.7      2     26 4.077e-12 ***
## factor(gear)  2 0.50649      4.6      4     54  0.002951 ** 
## hp            1 0.33357      6.5      2     26  0.005115 ** 
## Residuals    27                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.2 Sequential tests by QR

#-----------------------------------------------------------------------
# How get the same results without refitting the model?

# QR decomposition of the design matrix X.
QR <- m_full$qr
R <- qr.R(QR)
Q <- qr.Q(QR)

# Matrix of responses.
y <- as.matrix(m_full$model[1])
colnames(y) <- sub(x = colnames(y),
                   pattern = "^[^.]*\\.",
                   replacement = "")
dim(y)
## [1] 32  2
head(y)
##                    qsec    wt
## Mazda RX4         16.46 2.620
## Mazda RX4 Wag     17.02 2.875
## Datsun 710        18.61 2.320
## Hornet 4 Drive    19.44 3.215
## Hornet Sportabout 17.02 3.440
## Valiant           20.22 3.460
# Error SSP: y' [I - QQ'] y.
S_full
##           qsec       wt
## qsec 38.052183 6.364296
## wt    6.364296 5.916464
t(y) %*% (diag(nrow(y)) - Q %*% t(Q)) %*% y
##           qsec       wt
## qsec 38.052183 6.364296
## wt    6.364296 5.916464
# Model SSP: y' [QQ'] y.
crossprod(fitted(m_full))
##          qsec        wt
## qsec 10255.43 1821.7303
## wt    1821.73  354.9846
t(y) %*% (Q %*% t(Q)) %*% y
##          qsec        wt
## qsec 10255.43 1821.7303
## wt    1821.73  354.9846
# IMPORTANT: Columns of Q are ortogonal.
a <- split(x = seq_along(m_full$assign),
           f = m_full$assign)
tl <- c(if (attr(terms(m_full), "intercept")) "(Intercept)",
        attr(terms(m_full), "term.labels"))
names(a) <- tl

pil <- sapply(a,
              FUN = function(i) {
                  # Projection matrix.
                  QQl <- Q[, i] %*% t(Q[, i])
                  # Extra SSP.
                  S <- t(y) %*% QQl %*% y
                  # Pillai statistic.
                  sum(diag(S %*% solve(S + S_full)))
              })
cbind(Pillai = pil)
##                 Pillai
## (Intercept)  0.9962862
## mpg          0.8669934
## factor(gear) 0.5064920
## hp           0.3335700
anova(m_full)
## Analysis of Variance Table
## 
##              Df  Pillai approx F num Df den Df    Pr(>F)    
## (Intercept)   1 0.99629   3487.4      2     26 < 2.2e-16 ***
## mpg           1 0.86699     84.7      2     26 4.077e-12 ***
## factor(gear)  2 0.50649      4.6      4     54  0.002951 ** 
## hp            1 0.33357      6.5      2     26  0.005115 ** 
## Residuals    27                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.3 Marginal tests by Wald

#-----------------------------------------------------------------------
# How get the Extra SSP matrix using a Wald construction (Mahalanobis
# like distance)?

# Design matrix.
X <- model.matrix(m_full)
XlX <- crossprod(X) # X'X
XlXi <- solve(XlX)  # (X'X)^{-1}

# Estimated parameters.
B <- coef(m_full)
B
##                       qsec           wt
## (Intercept)   20.805502199  5.044044246
## mpg           -0.009802228 -0.103489708
## factor(gear)4 -0.098158200 -0.150339524
## factor(gear)5 -1.673647461 -0.771230797
## hp            -0.016780588  0.002926363
# L matricex.
# For `mpg`.
# L <- rbind(c(0, 1, 0, 0, 1))
# For `gear`.
# L <- rbind(c(0, 0, 1, 0, 0),
#            c(0, 0, 0, 1, 0))
# For `hp`.
# L <- rbind(c(0, 0, 0, 0, 1))

# Construction of th L matrix to test each model term.
L <- by(seq_along(m_full$assign),
        m_full$assign,
        FUN = function(i) {
            L <- matrix(0,
                        nrow = length(i),
                        ncol = length(m_full$assign))
            L[, i] <- diag(i * 0 + 1)
            return(L)
        })
names(L) <- tl
L
## : (Intercept)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    0    0    0    0
## -------------------------------------------------------- 
## : mpg
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    0    1    0    0    0
## -------------------------------------------------------- 
## : factor(gear)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    0    0    1    0    0
## [2,]    0    0    0    1    0
## -------------------------------------------------------- 
## : hp
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    0    0    0    0    1
# Marginal hypotheses test using Pillai statistic.
pil <- sapply(L,
              FUN = function(Li) {
                  LB <- Li %*% B
                  v <- solve(Li %*% XlXi %*% t(Li))
                  S_extr <- t(LB) %*% v %*% LB
                  sum(diag(S_extr %*% solve(S_extr + S_full)))
              })
cbind(Pillai = pil)
##                 Pillai
## (Intercept)  0.7857734
## mpg          0.3734653
## factor(gear) 0.2335691
## hp           0.3335700
car::Anova(m_full, type = 3)
## 
## Type III MANOVA Tests: Pillai test statistic
##              Df test stat approx F num Df den Df    Pr(>F)    
## (Intercept)   1   0.78577   47.683      2     26 2.001e-09 ***
## mpg           1   0.37347    7.749      2     26  0.002292 ** 
## factor(gear)  2   0.23357    1.785      4     54  0.145272    
## hp            1   0.33357    6.507      2     26  0.005115 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3 Session information

# devtools::session_info()
Sys.time()
## [1] "2017-07-26 19:38:15 BRT"
cbind(Sys.info())
##                [,1]                                          
## sysname        "Linux"                                       
## release        "4.4.0-87-generic"                            
## version        "#110-Ubuntu SMP Tue Jul 18 12:55:35 UTC 2017"
## nodename       "class"                                       
## machine        "x86_64"                                      
## login          "unknown"                                     
## user           "walmes"                                      
## effective_user "walmes"
sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.2 LTS
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=pt_BR.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  base     
## 
## other attached packages:
## [1] rmarkdown_1.3
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.9        knitr_1.15.1       magrittr_1.5      
##  [4] splines_3.4.1      MASS_7.3-47        lattice_0.20-35   
##  [7] minqa_1.2.4        stringr_1.2.0      car_2.1-4         
## [10] tools_3.4.1        nnet_7.3-12        pbkrtest_0.4-6    
## [13] parallel_3.4.1     grid_3.4.1         nlme_3.1-131      
## [16] mgcv_1.8-17        quantreg_5.29      MatrixModels_0.4-1
## [19] htmltools_0.3.5    yaml_2.1.14        lme4_1.1-12       
## [22] rprojroot_1.2      digest_0.6.12      Matrix_1.2-10     
## [25] nloptr_1.0.4       evaluate_0.10      stringi_1.1.2     
## [28] compiler_3.4.1     methods_3.4.1      backports_1.0.5   
## [31] SparseM_1.74