Modelos semiparamétricos
##=============================================================================
## Aplicação de modelos de regressão linear e
## não linear em ciências agrárias
##
## 09 à 11 de Dezembro de 2014 - Goiânia/GO
## Embrapa Arroz e Feijão
##
## Prof. Dr. Walmes M. Zeviani
## LEG/DEST/UFPR
##=============================================================================
##-----------------------------------------------------------------------------
## Definições da sessão.
pkg <- c("lattice", "latticeExtra", "gridExtra", "car", "alr3",
"plyr", "reshape", "splines", "mgcv", "wzRfun")
sapply(pkg, require, character.only=TRUE)
## lattice latticeExtra gridExtra car alr3 plyr
## TRUE TRUE TRUE TRUE TRUE TRUE
## reshape splines mgcv wzRfun
## TRUE TRUE TRUE TRUE
trellis.device(color=FALSE)
##-----------------------------------------------------------------------------
## "~/Dropbox/Consultorias/Leonardo Seno/suino_pre_abate"
##
## Ver: http://cran.r-project.org/web/packages/lmeSplines/lmeSplines.pdf
##
## É possível fazer!
## http://stackoverflow.com/questions/24750002/slopes-for-lme-linear-b-spline-model
## http://www.ats.ucla.edu/stat/r/examples/alda/ch6.htm
##-----------------------------------------------------------------------------
## Leitura.
sui <- read.table("http://www.leg.ufpr.br/~walmes/data/preabate.txt",
header=TRUE, dec=",")
sui$trat <- factor(-1*sui$trat)
levels(sui$trat) <- c("Sem aspersão","Com aspersão")
str(sui)
## 'data.frame': 384 obs. of 5 variables:
## $ trat: Factor w/ 2 levels "Sem aspersão",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ asp : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hora: int 0 0 0 0 5 5 5 5 10 10 ...
## $ rep : int 1 2 3 4 1 2 3 4 1 2 ...
## $ temp: num 29 30.9 34.5 31.3 29.6 ...
xyplot(temp~hora, groups=trat, data=sui,
xlab="Minutos à partir das 07:00 da manhã",
ylab="Temperatura corporal (ºC)",
auto.key=TRUE)+
glayer(panel.smoother(..., span=0.4))

xyplot(temp~hora|trat, groups=asp, data=sui, type=c("p","smooth"))

## Efeito de animal?
xyplot(temp~hora|trat, groups=rep, data=sui, type=c("b"))

Regressão local
##-----------------------------------------------------------------------------
## Loess.
str(sui)
## 'data.frame': 384 obs. of 5 variables:
## $ trat: Factor w/ 2 levels "Sem aspersão",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ asp : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hora: int 0 0 0 0 5 5 5 5 10 10 ...
## $ rep : int 1 2 3 4 1 2 3 4 1 2 ...
## $ temp: num 29 30.9 34.5 31.3 29.6 ...
ca <- subset(sui, trat=="Com aspersão")
## m0 <- loess(temp~hora, data=ca)
m0 <- loess(temp~hora, data=ca, span=0.25)
## m0 <- loess(temp~hora, data=ca, span=0.25, degree=1)
## Medidas de ajuste.
summary(m0)
## Call:
## loess(formula = temp ~ hora, data = ca, span = 0.25)
##
## Number of Observations: 192
## Equivalent Number of Parameters: 11.87
## Residual Standard Error: 1.307
## Trace of smoother matrix: 13.12
##
## Control settings:
## normalize: TRUE
## span : 0.25
## degree : 2
## family : gaussian
## surface : interpolate cell = 0.2
pred <- data.frame(hora=seq(min(ca$hora), max(ca$hora), l=200))
aux <- predict(m0, newdata=pred, se=TRUE)
aux$me <- outer(aux$se.fit, qnorm(0.975)*c(lwr=-1,fit=0,upr=1), FUN="*")
aux <- sweep(aux$me, MARGIN=1, STATS=aux$fit, FUN="+")
pred <- cbind(pred, aux)
str(pred)
## 'data.frame': 200 obs. of 4 variables:
## $ hora: num 0 1.18 2.36 3.54 4.72 ...
## $ lwr : num 30.5 30.2 30 29.8 29.5 ...
## $ fit : num 31.5 31.2 30.8 30.5 30.2 ...
## $ upr : num 32.5 32.1 31.7 31.3 30.9 ...
p1 <- xyplot(temp~hora, data=ca,
ylab="Temperatura do corpo",
xlab="Minutos a partir das 7h00")
p2 <- xyplot(fit~hora, data=pred, type="l",
ly=pred$lwr, uy=pred$upr, cty="bands",
prepanel=prepanel.cbH, panel=panel.cbH)
p1+as.layer(p2)

Splines
##-----------------------------------------------------------------------------
## Splines.
## m0 <- lm(temp~bs(hora), data=ca)
## m0 <- lm(temp~bs(hora, degree=13, df=1), data=ca)
m0 <- lm(temp~ns(hora, df=13), data=ca)
summary(m0)
##
## Call:
## lm(formula = temp ~ ns(hora, df = 13), data = ca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7022 -0.9033 -0.0045 0.7134 3.8193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.66367 0.55157 57.406 < 2e-16 ***
## ns(hora, df = 13)1 -2.27264 0.85325 -2.664 0.008442 **
## ns(hora, df = 13)2 -1.22015 0.97361 -1.253 0.211771
## ns(hora, df = 13)3 -3.37650 0.89413 -3.776 0.000217 ***
## ns(hora, df = 13)4 -1.89242 0.98766 -1.916 0.056959 .
## ns(hora, df = 13)5 1.24033 0.91510 1.355 0.177005
## ns(hora, df = 13)6 -2.62154 0.92759 -2.826 0.005249 **
## ns(hora, df = 13)7 -3.10635 0.96947 -3.204 0.001605 **
## ns(hora, df = 13)8 2.75124 0.92410 2.977 0.003314 **
## ns(hora, df = 13)9 0.08984 0.92098 0.098 0.922400
## ns(hora, df = 13)10 -4.65297 0.96546 -4.819 3.07e-06 ***
## ns(hora, df = 13)11 1.10806 0.80262 1.381 0.169149
## ns(hora, df = 13)12 -3.62265 1.42213 -2.547 0.011701 *
## ns(hora, df = 13)13 3.78342 0.66592 5.682 5.36e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.304 on 178 degrees of freedom
## Multiple R-squared: 0.5533, Adjusted R-squared: 0.5207
## F-statistic: 16.96 on 13 and 178 DF, p-value: < 2.2e-16
pred <- data.frame(hora=seq(min(ca$hora), max(ca$hora), l=200))
aux <- predict(m0, newdata=pred, interval="confidence")
pred <- cbind(pred, aux)
str(pred)
## 'data.frame': 200 obs. of 4 variables:
## $ hora: num 0 1.18 2.36 3.54 4.72 ...
## $ fit : num 31.7 31.3 31 30.6 30.3 ...
## $ lwr : num 30.6 30.3 30.1 29.8 29.6 ...
## $ upr : num 32.8 32.3 31.8 31.4 31 ...
p1 <- xyplot(temp~hora, data=ca,
ylab="Temperatura do corpo",
xlab="Minutos a partir das 7h00")
p2 <- xyplot(fit~hora, data=pred, type="l",
ly=pred$lwr, uy=pred$upr, cty="bands",
prepanel=prepanel.cbH, panel=panel.cbH)
p1+as.layer(p2)

Modelos aditivos generalizados (gam)
##-----------------------------------------------------------------------------
## Ajustar gam.
m0 <- gam(temp~s(hora), data=ca)
summary(m0)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## temp ~ s(hora)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.2432 0.0951 318 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(hora) 8.92 8.998 22.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.511 Deviance explained = 53.3%
## GCV = 1.8311 Scale est. = 1.7365 n = 192
pred <- data.frame(hora=seq(min(ca$hora), max(ca$hora), l=200))
aux <- predict(m0, newdata=pred, se=TRUE)
aux$me <- outer(aux$se.fit, c(lwr=-1,fit=0,upr=1)*qnorm(0.975), "*")
aux <- sweep(aux$me, MARGIN=1, STATS=aux$fit, FUN="+")
pred <- cbind(pred, aux)
str(pred)
## 'data.frame': 200 obs. of 4 variables:
## $ hora: num 0 1.18 2.36 3.54 4.72 ...
## $ lwr : num 30.5 30.3 30.1 29.8 29.6 ...
## $ fit : num 31.5 31.2 30.9 30.6 30.3 ...
## $ upr : num 32.5 32.1 31.7 31.4 31 ...
p1 <- xyplot(temp~hora, data=ca,
ylab="Temperatura do corpo",
xlab="Minutos a partir das 7h00")
p2 <- xyplot(fit~hora, data=pred, type="l",
ly=pred$lwr, uy=pred$upr, cty="bands",
prepanel=prepanel.cbH, panel=panel.cbH)
p1+as.layer(p2)

##-----------------------------------------------------------------------------
## Ajuste para os dois níveis de tratamento.
## Modelo sem interação trat com hora.
g1 <- gam(temp~trat+s(hora), data=sui)
## Modelo em que em cada trat a hora tem um efeito diferente.
g0 <- gam(temp~trat+s(hora, by=trat), data=sui)
## Para hipótese de interação nula.
anova(g0, g1, test="F")
## Analysis of Deviance Table
##
## Model 1: temp ~ trat + s(hora, by = trat)
## Model 2: temp ~ trat + s(hora)
## Resid. Df Resid. Dev Df Deviance F Pr(>F)
## 1 368.77 562.50
## 2 373.20 824.09 -4.4341 -261.59 38.677 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Resumo do ajuste.
summary(g0)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## temp ~ trat + s(hora, by = trat)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.76667 0.08913 390.06 <2e-16 ***
## tratCom aspersão -4.52344 0.12605 -35.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(hora):tratSem aspersão 4.302 5.295 92.57 <2e-16 ***
## s(hora):tratCom aspersão 8.930 8.999 25.76 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.84 Deviance explained = 84.5%
## GCV = 1.5883 Scale est. = 1.5253 n = 384
pred <- expand.grid(hora=seq(min(sui$hora), max(sui$hora), l=100),
trat=levels(sui$trat))
aux <- predict(g0, newdata=pred, se=TRUE)
aux$me <- outer(aux$se.fit, c(lwr=-1,fit=0,upr=1)*qnorm(0.975), "*")
aux <- sweep(aux$me, MARGIN=1, STATS=aux$fit, FUN="+")
pred <- cbind(pred, aux)
str(pred)
## 'data.frame': 200 obs. of 5 variables:
## $ hora: num 0 2.37 4.75 7.12 9.49 ...
## $ trat: Factor w/ 2 levels "Sem aspersão",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ lwr : num 30.3 30.4 30.6 30.7 30.8 ...
## $ fit : num 31 31 31.1 31.2 31.3 ...
## $ upr : num 31.6 31.7 31.7 31.7 31.8 ...
xmn <- seq(0, 4*60, by=30)
xhr <- seq(as.POSIXct("07:00", format="%H:%M"),
as.POSIXct("11:00", format="%H:%M"), "30 min")
format(xhr, "%H:%M")
## [1] "07:00" "07:30" "08:00" "08:30" "09:00" "09:30" "10:00" "10:30" "11:00"
p1 <- xyplot(temp~hora, groups=trat, data=sui,
xlab="Minutos à partir das 07:00 da manhã",
ylab="Temperatura corporal (ºC)",
scales=list(x=list(at=xmn, labels=format(xhr, "%H:%M"))),
auto.key=list(columns=2),
panel=function(...){
panel.abline(v=(0:9)*30, lty=3, col="gray50")
panel.rect((0:3)*60, 22, ((0:3)+0.5)*60, 40,
col="gray55", border="transparent", alpha=0.1)
panel.xyplot(...)
})
p2 <- xyplot(fit~hora, groups=trat, data=pred, type="l",
prepanel=prepanel.cbH, cty="bands",
ly=pred$lwr, uy=pred$upr, alpha=0.1, fill=1,
panel.groups=panel.cbH,
panel=panel.superpose)
p1+as.layer(p2)

##-----------------------------------------------------------------------------
## Informações da sessão.
Sys.time()
## [1] "2014-12-11 21:00:38 BRST"
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: i686-pc-linux-gnu (32-bit)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=pt_BR.UTF-8
## [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=pt_BR.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=pt_BR.UTF-8 LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines grid stats graphics grDevices utils datasets base
##
## other attached packages:
## [1] wzRfun_0.4 mgcv_1.8-3 nlme_3.1-118 reshape_0.8.5
## [5] plyr_1.8.1 alr3_2.0.5 car_2.0-22 gridExtra_0.9.1
## [9] latticeExtra_0.6-26 RColorBrewer_1.0-5 lattice_0.20-29 rmarkdown_0.3.3
## [13] knitr_1.8
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.4 doBy_4.5-12 evaluate_0.5.5 formatR_1.0 htmltools_0.2.6
## [6] MASS_7.3-35 Matrix_1.1-4 methods_3.1.2 multcomp_1.3-7 mvtnorm_1.0-1
## [11] nnet_7.3-8 Rcpp_0.11.3 sandwich_2.3-2 stringr_0.6.2 survival_2.37-7
## [16] TH.data_1.0-5 tools_3.1.2 yaml_2.1.13 zoo_1.7-11