Multivariate Covariance Generalized Linear Models for the Analysis of Experimental Data

github.com/leg-ufpr/mcglm4aed

1 Data description and objectives

Pests and diseases of foliage, phytotoxicity of chemicals, hail and some mechanical injuries are the main agents of defoliation in cotton. The purpose of this research was to study the effect of defoliation levels at different growth stages of cotton. The study was conducted in a greenhouse and the experimental design was completely randomized in factorial 5 defoliation levels x 5 growth stages. The response variables were: number of bolls produced (nb), number of bolls viable, total boll weight (wb), plant height (hei) and number of plant nodes (nn). The last two variables indicates the vegetative growth and the first three indicate the plant production (or results of the reproduction).

For more information, see Silva et al. (2012). Zeviani et al. (2014) used the Gamma-Count distribution to analyse the number of balls and found that it is a subdispersed count variable.

2 Analysis with MLM

#-----------------------------------------------------------------------
# Packages.

# rm(list = ls())
library(lattice)
library(latticeExtra)
library(gridExtra)
library(car)
library(candisc)
library(doBy)
library(multcomp)
library(mcglm)
library(Matrix)
# Loading the data.
url <- "http://www.leg.ufpr.br/~walmes/data/desfolha_algodao.txt"
cot <- read.table(url, header = TRUE, sep = "\t")

# Position: between and on the crop lines.
# Depth: soil layers.
str(cot)
## 'data.frame':    250 obs. of  9 variables:
##  $ fase    : Factor w/ 5 levels "botflor","capulho",..: 5 5 5 5 5 5 5 5 5 5 ...
##  $ desf    : int  0 0 0 0 0 25 25 25 25 25 ...
##  $ planta  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ bloco   : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ nestrrep: int  5 4 4 6 5 5 5 6 5 5 ...
##  $ ncapu   : int  5 4 4 5 5 5 5 5 5 5 ...
##  $ alt     : num  73 63 65.5 85 72 77 74 59 81 75 ...
##  $ nnos    : int  16 17 14 15 15 15 17 10 14 13 ...
##  $ pesocap : num  33.2 28.7 31.5 28.9 36.4 ...
# GS: growth stage.
# def: artificial defoliation.
# plant: plant inside plot.
# rept: treatment cellrepetition.
# nrs: number of reproductive structures (n).
# nb: number of produced balls.
# hei: plant height at hasvest.
# nn: number of plant nodes.
# wb: weight of the balls.
names(cot) <- c("GS", "def", "plant", "rept",
                "nrs", "nb", "hei", "nn", "wb")
cot <- transform(cot,
                 GS = factor(GS,
                             levels = unique(GS),
                             labels = 1:nlevels(GS)),
                 Def = factor(def))

# nrs and nb are very similar.
cot$nrs <- NULL
str(cot)
## 'data.frame':    250 obs. of  9 variables:
##  $ GS   : Factor w/ 5 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ def  : int  0 0 0 0 0 25 25 25 25 25 ...
##  $ plant: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ rept : int  1 2 3 4 5 1 2 3 4 5 ...
##  $ nb   : int  5 4 4 5 5 5 5 5 5 5 ...
##  $ hei  : num  73 63 65.5 85 72 77 74 59 81 75 ...
##  $ nn   : int  16 17 14 15 15 15 17 10 14 13 ...
##  $ wb   : num  33.2 28.7 31.5 28.9 36.4 ...
##  $ Def  : Factor w/ 5 levels "0","25","50",..: 1 1 1 1 1 2 2 2 2 2 ...
#-----------------------------------------------------------------------
# Visualizing the data.

v <- c("wb", "hei", "nb", "nn")
cot <- aggregate(as.matrix(cot[, v]) ~ GS + Def + rept,
                 data = cot,
                 FUN = sum,
                 na.rm = TRUE)

combineLimits(
    useOuterStrips(
        xyplot(wb + hei + nb + nn ~ Def | GS,
               outer = TRUE,
               jitter.x = TRUE,
               as.table = TRUE,
               type = c("p", "smooth"),
               scales = list(y = "free"),
               data = cot)))

sp1 <- splom(~cot[v],
             groups = GS,
             data = cot,
             auto.key = list(title = "Growth stage",
                             cex.title = 1,
                             columns = 3),
             par.settings = list(superpose.symbol = list(pch = 4)),
             as.matrix = TRUE)

sp2 <- splom(~cot[v],
             groups = Def,
             data = cot,
             auto.key = list(title = "Artificial defoliation",
                             cex.title = 1,
                             columns = 3),
             as.matrix = TRUE)

# c("Depth" = sp1, "Position" = sp2, merge.legends = TRUE)
grid.arrange(sp1, sp2, ncol = 1)

#-----------------------------------------------------------------------
# Fitting the multivariate linear model.

m0 <- lm(cbind(wb, hei, nb, nn) ~ GS * Def,
         data = cot)

# Manova table.
anova(m0)
## Analysis of Variance Table
## 
##              Df  Pillai approx F num Df den Df    Pr(>F)    
## (Intercept)   1 0.99685   7682.6      4     97 < 2.2e-16 ***
## GS            4 0.75617      5.8     16    400 1.856e-11 ***
## Def           4 0.82726      6.5     16    400 4.070e-13 ***
## GS:Def       16 0.92777      1.9     64    400 0.0001388 ***
## Residuals   100                                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary.aov(m0)
##  Response wb :
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## GS            4 1467.4  366.85 29.5034 3.342e-16 ***
## Def           4 2287.2  571.80 45.9866 < 2.2e-16 ***
## GS:Def       16  789.6   49.35  3.9689 9.375e-06 ***
## Residuals   100 1243.4   12.43                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response hei :
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## GS            4 1180.8 295.190  6.2442 0.0001583 ***
## Def           4  922.2 230.555  4.8770 0.0012363 ** 
## GS:Def       16  893.3  55.833  1.1810 0.2962400    
## Residuals   100 4727.4  47.274                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nb :
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## GS            4 24.448   6.112  7.2762 3.482e-05 ***
## Def           4 28.608   7.152  8.5143 5.920e-06 ***
## GS:Def       16 35.872   2.242  2.6690  0.001543 ** 
## Residuals   100 84.000   0.840                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response nn :
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## GS            4  52.688  13.172  5.2104 0.0007456 ***
## Def           4  66.048  16.512  6.5316 0.0001035 ***
## GS:Def       16  61.312   3.832  1.5158 0.1088437    
## Residuals   100 252.800   2.528                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Extract the raw residuals.
r <- residuals(m0)

# Checking the models assumptions on the residuals.
scatterplotMatrix(r,
                  gap = 0,
                  smooth = FALSE,
                  reg.line = FALSE,
                  ellipse = TRUE,
                  diagonal = "qqplot")

# Test on the interaction term. Preparing the hypothesis matrix.
a <- m0$assign
A <- diag(length(a))
A <- A[a == 3, ]

linearHypothesis(m0,
                 hypothesis.matrix = A,
                 test = "Pillai")
## 
## Sum of squares and products for the hypothesis:
##            wb      hei       nb        nn
## wb  789.59661 326.2598 138.3355 -88.03828
## hei 326.25982 893.3200  25.0200  -4.78000
## nb  138.33552  25.0200  35.8720 -20.63200
## nn  -88.03828  -4.7800 -20.6320  61.31200
## 
## Sum of squares and products for error:
##            wb       hei       nb        nn
## wb  1243.4139 -389.1649 119.4454 -178.9038
## hei -389.1649 4727.4000  47.0000  472.2000
## nb   119.4454   47.0000  84.0000  -12.8000
## nn  -178.9038  472.2000 -12.8000  252.8000
## 
## Multivariate Test: 
##        Df test stat approx F num Df den Df     Pr(>F)    
## Pillai 16 0.9277695  1.88741     64    400 0.00013882 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(m0)
## 
## Type II MANOVA Tests: Pillai test statistic
##        Df test stat approx F num Df den Df    Pr(>F)    
## GS      4   0.75617   5.8278     16    400 1.856e-11 ***
## Def     4   0.82726   6.5185     16    400 4.070e-13 ***
## GS:Def 16   0.92777   1.8874     64    400 0.0001388 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Testing only for the `hei` and `nn` responses.
linearHypothesis(m0,
                 hypothesis.matrix = A,
                 test = "Hotelling-Lawley",
                 P = cbind(c(0, 1, 0, 0),
                           c(0, 0, 0, 1)))
## 
##  Response transformation matrix:
##     [,1] [,2]
## wb     0    0
## hei    1    0
## nb     0    0
## nn     0    1
## 
## Sum of squares and products for the hypothesis:
##        [,1]   [,2]
## [1,] 893.32 -4.780
## [2,]  -4.78 61.312
## 
## Sum of squares and products for error:
##        [,1]  [,2]
## [1,] 4727.4 472.2
## [2,]  472.2 252.8
## 
## Multivariate Test: 
##                  Df test stat approx F num Df den Df   Pr(>F)  
## Hotelling-Lawley 16 0.5351141 1.638787     32    196 0.022751 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(candisc)

# For the effect of Position.
cd <- candisc(m0, term = "GS:Def")

cd
## 
## Canonical Discriminant Analysis for GS:Def:
## 
##     CanRsq Eigenvalue Difference Percent Cumulative
## 1 0.445271   0.802683    0.51868 57.3260     57.326
## 2 0.221187   0.284005    0.51868 20.2831     77.609
## 3 0.195662   0.243259    0.51868 17.3730     94.982
## 4 0.065649   0.070262    0.51868  5.0179    100.000
## 
## Test of H0: The canonical correlations in the 
## current row and all that follow are zero
## 
##   LR test stat approx F numDF  denDF   Pr(> F)    
## 1      0.32469  2.14986    64 413.33 4.291e-06 ***
## 2      0.58530  1.38601    45 315.68   0.05917 .  
## 3      0.75153  1.17334    28 214.00   0.25942    
## 4      0.93435  0.58371    13 108.00   0.86253    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# summary(cd)

# Biplot.
plot(cd)

# Correlations between responses and canonical scores.
cd$structure
##           Can1       Can2       Can3        Can4
## wb  -0.9583554 -0.1069632 -0.1156343  0.52736069
## hei -0.2571794  0.4732892 -0.5083136 -0.66099469
## nb  -0.7729356 -0.2704224  0.3528778 -0.09984327
## nn   0.5207399 -0.3967256 -0.5112794 -0.59729981
combineLimits(
    useOuterStrips(
        xyplot(Can1 + Can2 + Can3 ~ Def | GS,
               outer = TRUE,
               jitter.x = TRUE,
               as.table = TRUE,
               type = c("p", "smooth"),
               scales = list(y = "free"),
               data = cd$scores)))

3 Modelling multivariate continuous and counting outcomes using the mcglm package

source("../review/functions.R")
# Linear predictors
f.wb <- wb ~ GS * Def
f.hei <- hei ~ GS * Def
f.nb <- nb ~ GS * Def
f.nn <- nn ~ GS * Def

# Matrix linear predictor
Z0 <- mc_id(cot)

# Fitting
fit1 <- mcglm(c(f.wb, f.hei, f.nb, f.nn), list(Z0,Z0,Z0,Z0),
              link = c("identity","identity","log","log"),
              variance = c("constant","constant",
                           "poisson_tweedie","poisson_tweedie"),
              control_algorithm = list(tunning = 0.8),
              data = cot)
## Automatic initial values selected.
# Dispersion structure
summary(fit1, print = "Dispersion")
## Dispersion:
##   Estimates Std.error  Z value
## 1  8.289426  1.209611 6.852971
## 
## Dispersion:
##   Estimates Std.error  Z value
## 1    31.516   4.96653 6.345678
## 
## Dispersion:
##    Estimates  Std.error   Z value
## 1 -0.8465823 0.02147031 -39.43038
## 
## Dispersion:
##    Estimates  Std.error   Z value
## 1 -0.8859486 0.02199154 -40.28587
## 
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 7
# Correlation between outcomes
summary(fit1, print = "Correlation")
## Correlation matrix:
##   Parameters   Estimates  Std.error    Z value
## 1      rho12 -0.16051462 0.08765881 -1.8311293
## 2      rho13  0.37256171 0.07900238  4.7158289
## 3      rho14 -0.31793636 0.08445952 -3.7643639
## 4      rho23  0.05375402 0.08923456  0.6023902
## 5      rho24  0.43861701 0.08161796  5.3740254
## 6      rho34 -0.08981478 0.08900223 -1.0091296
## 
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 7
# Multivariate Hotelling-Lawley test
manova.mcglm(fit1)
##     Effects Df Hotelling.Lawley Qui.square      p_value
## 1 Intercept  4           60.748   7593.476 0.000000e+00
## 2        GS 16            0.288     36.000 2.893376e-03
## 3       Def 16            0.529     66.079 4.800272e-08
## 4    GS:Def 64            1.682    210.294 1.759020e-17

4 References

Silva, A.M., Degrande, P.E., Suekane, R., Fernandes, M.G., Zeviani, W.M., 2012. Impacto de diferentes níveis de desfolha artificial nos estádios fenológicos do algodoeiro. Revista de Ciências Agrárias 35, 163–172.

Zeviani, W.M., Ribeiro, P.J., Bonat, W.H., Shimakura, S.E., Muniz, J.A., 2014. The Gamma-count distribution in the analysis of experimental underdispersed data. Journal of Applied Statistics 41, 1–11. doi:10.1080/02664763.2014.922168

5 Session information

# devtools::session_info()
Sys.time()
## [1] "2017-07-26 19:37:54 BRT"
cbind(Sys.info())
##                [,1]                                          
## sysname        "Linux"                                       
## release        "4.4.0-87-generic"                            
## version        "#110-Ubuntu SMP Tue Jul 18 12:55:35 UTC 2017"
## nodename       "class"                                       
## machine        "x86_64"                                      
## login          "unknown"                                     
## user           "walmes"                                      
## effective_user "walmes"
sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.2 LTS
## 
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=pt_BR.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] methods   stats     graphics  grDevices utils     datasets  base     
## 
## other attached packages:
##  [1] Matrix_1.2-10       mcglm_0.4.0         multcomp_1.4-5     
##  [4] TH.data_1.0-8       MASS_7.3-47         survival_2.40-1    
##  [7] mvtnorm_1.0-5       doBy_4.5-15         candisc_0.7-2      
## [10] heplots_1.3-3       car_2.1-4           gridExtra_2.2.1    
## [13] latticeExtra_0.6-28 RColorBrewer_1.1-2  lattice_0.20-35    
## [16] knitr_1.15.1        rmarkdown_1.3      
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.9        compiler_3.4.1     nloptr_1.0.4      
##  [4] tools_3.4.1        digest_0.6.12      lme4_1.1-12       
##  [7] evaluate_0.10      nlme_3.1-131       gtable_0.2.0      
## [10] mgcv_1.8-17        yaml_2.1.14        parallel_3.4.1    
## [13] SparseM_1.74       stringr_1.2.0      MatrixModels_0.4-1
## [16] rprojroot_1.2      grid_3.4.1         nnet_7.3-12       
## [19] minqa_1.2.4        magrittr_1.5       codetools_0.2-15  
## [22] backports_1.0.5    htmltools_0.3.5    splines_3.4.1     
## [25] assertthat_0.2.0   pbkrtest_0.4-6     quantreg_5.29     
## [28] sandwich_2.3-4     stringi_1.1.2      zoo_1.7-14