Modelos de Regressão e aplicações no ambiente R

13 a 17 de Abril de 2015 - Manaus - AM
Prof. Dr. Walmes M. Zeviani
Fundação Oswaldo Cruz - FIOCRUZ
Lab. de Estatística e Geoinformação - LEG
Departamento de Estatística - UFPR

Métodos de seleção de variáveis

##=============================================================================
## Modelos de Regressão e aplicações no ambiente R
##
##   13 a 17 de Abril de 2015 - Manaus/AM
##   Fundação Oswaldo Cruz - FIOCRUZ
## 
##                                                  Prof. Dr. Walmes M. Zeviani
##                                                                LEG/DEST/UFPR
##=============================================================================

##-----------------------------------------------------------------------------
## Definições da sessão.

pkg <- c("lattice", "latticeExtra", "gridExtra", "car", "alr3",
         "plyr", "reshape", "doBy", "multcomp", "asbio", "wzRfun")

sapply(pkg, require, character.only=TRUE)
##      lattice latticeExtra    gridExtra          car         alr3         plyr 
##         TRUE         TRUE         TRUE         TRUE         TRUE         TRUE 
##      reshape         doBy     multcomp        asbio       wzRfun 
##         TRUE         TRUE         TRUE         TRUE         TRUE
trellis.device(color=FALSE)

Stepwise

##-----------------------------------------------------------------------------
## Dados.

url <- "http://www.leg.ufpr.br/~walmes/data/areafoliarUva.txt"
uva <- read.table(url, header=TRUE, sep="\t", stringsAsFactors=FALSE)
uva$cult <- factor(uva$cult)
str(uva)
## 'data.frame':    300 obs. of  9 variables:
##  $ id  : chr  "malbec_1.jpg" "malbec_10.jpg" "malbec_11.jpg" "malbec_12.jpg" ...
##  $ cult: Factor w/ 3 levels "malbec","merlot",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ area: num  100.8 85.8 119.5 137 84.7 ...
##  $ mc  : num  12 11.5 12.5 15.5 10 12 15.5 17.5 13.5 13.3 ...
##  $ nc  : num  7.5 9 8.5 10 7 8.5 11 13 10 9.5 ...
##  $ ml  : num  12.8 10.5 13 14.4 11 12 14 14 12 15 ...
##  $ nld : num  6.4 8.5 8.6 9 6.5 9 10 9.5 9 9.2 ...
##  $ nle : num  7.5 7 9 10 7 8.2 11 11 8.5 8.3 ...
##  $ cll : num  9.5 9.5 10.2 12 7.5 8.9 13.5 10.8 9.7 10.3 ...
##-----------------------------------------------------------------------------
## Comprimento da nervura lateral: média dos lados direito e esquerdo.

uva$nl <- with(uva, apply(cbind(nld, nle), 1, mean))
uva <- subset(uva, select=-c(nld, nle))
str(uva)
## 'data.frame':    300 obs. of  8 variables:
##  $ id  : chr  "malbec_1.jpg" "malbec_10.jpg" "malbec_11.jpg" "malbec_12.jpg" ...
##  $ cult: Factor w/ 3 levels "malbec","merlot",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ area: num  100.8 85.8 119.5 137 84.7 ...
##  $ mc  : num  12 11.5 12.5 15.5 10 12 15.5 17.5 13.5 13.3 ...
##  $ nc  : num  7.5 9 8.5 10 7 8.5 11 13 10 9.5 ...
##  $ ml  : num  12.8 10.5 13 14.4 11 12 14 14 12 15 ...
##  $ cll : num  9.5 9.5 10.2 12 7.5 8.9 13.5 10.8 9.7 10.3 ...
##  $ nl  : num  6.95 7.75 8.8 9.5 6.75 ...
##-----------------------------------------------------------------------------
## Ver.

## splom(uva[,3:8], groups=uva$cult)

Uva <- split(uva, f=uva$cult)
## str(Uva)

with(Uva[[1]], splom(cbind(area,mc,nc,ml,nl,cll)))

## with(Uva[[2]], splom(cbind(area,mc,nc,ml,nl,cll)))
## with(Uva[[3]], splom(cbind(area,mc,nc,ml,nl,cll)))

##-----------------------------------------------------------------------------
## Malbec.

mal <- subset(uva, cult=="malbec")

##-----------------------------------------------------------------------------
## Modelos.

## Apenas efeitos aditivos.
m0 <- lm(area~mc+ml+nc+nl+cll, data=mal)

## Diagnóstico.
par(mfrow=c(2,2)); plot(m0); layout(1)

## Transformação.
MASS::boxcox(m0); abline(v=0.5, col=2)

m1 <- update(m0, sqrt(area)~.)
par(mfrow=c(2,2)); plot(m1); layout(1)

## Inferência.
summary(m1)
## 
## Call:
## lm(formula = sqrt(area) ~ mc + ml + nc + nl + cll, data = mal)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84138 -0.23439 -0.02851  0.14774  1.87394 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.20824    0.16965   1.227    0.223    
## mc           0.27039    0.04612   5.862 6.73e-08 ***
## ml           0.35067    0.03092  11.340  < 2e-16 ***
## nc           0.02091    0.04581   0.456    0.649    
## nl           0.32234    0.07408   4.351 3.43e-05 ***
## cll         -0.03684    0.03926  -0.938    0.350    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3698 on 94 degrees of freedom
## Multiple R-squared:  0.9784, Adjusted R-squared:  0.9773 
## F-statistic: 853.3 on 5 and 94 DF,  p-value: < 2.2e-16
vif(m1)
##        mc        ml        nc        nl       cll 
## 17.848533  5.518162 10.653027 15.858220  8.085096
##-----------------------------------------------------------------------------
## Não poderia tentar um modelo maior? Com interações e termos
## quadráticos, por exemplo?

## Modelo quadrático completo.
m2 <- update(m1, .~(mc+nc+ml+nl+cll)^2+
                 I(mc^2)+I(nc^2)+I(ml^2)+I(nl^2)+I(cll^2))

## Diagnóstico.
par(mfrow=c(2,2)); plot(m2); layout(1)
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Altos leverages porque o modelo tem muitos termos. Simplificar o
## modelo.
summary(m2)
## 
## Call:
## lm(formula = sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + 
##     I(nc^2) + I(ml^2) + I(nl^2) + I(cll^2) + mc:nc + mc:ml + 
##     mc:nl + mc:cll + nc:ml + nc:nl + nc:cll + ml:nl + ml:cll + 
##     nl:cll, data = mal)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54580 -0.17053  0.01312  0.13645  0.56176 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.455695   0.310257   1.469  0.14587    
## mc           0.182184   0.264190   0.690  0.49247    
## nc           0.226102   0.348965   0.648  0.51891    
## ml           0.366351   0.148907   2.460  0.01607 *  
## nl           1.226574   0.378141   3.244  0.00173 ** 
## cll         -0.902841   0.201932  -4.471 2.57e-05 ***
## I(mc^2)      0.072435   0.039310   1.843  0.06913 .  
## I(nc^2)      0.001509   0.023232   0.065  0.94838    
## I(ml^2)      0.058711   0.018965   3.096  0.00272 ** 
## I(nl^2)     -0.009540   0.080695  -0.118  0.90619    
## I(cll^2)     0.032119   0.025333   1.268  0.20856    
## mc:nc       -0.041382   0.040880  -1.012  0.31450    
## mc:ml       -0.091415   0.037340  -2.448  0.01657 *  
## mc:nl        0.003009   0.109233   0.028  0.97809    
## mc:cll      -0.030896   0.053601  -0.576  0.56598    
## nc:ml        0.059610   0.048743   1.223  0.22499    
## nc:nl       -0.083565   0.119891  -0.697  0.48784    
## nc:cll       0.018013   0.059472   0.303  0.76277    
## ml:nl       -0.068842   0.064273  -1.071  0.28739    
## ml:cll      -0.028612   0.028890  -0.990  0.32502    
## nl:cll       0.094022   0.062817   1.497  0.13844    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2402 on 79 degrees of freedom
## Multiple R-squared:  0.9924, Adjusted R-squared:  0.9904 
## F-statistic: 513.1 on 20 and 79 DF,  p-value: < 2.2e-16
##-----------------------------------------------------------------------------
## Stepwise.

## Critério de AIC.
m3 <- step(m2, k=2)
## Start:  AIC=-266.85
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2) + mc:nc + mc:ml + mc:nl + mc:cll + nc:ml + 
##     nc:nl + nc:cll + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - mc:nl     1   0.00004 4.5570 -268.85
## - I(nc^2)   1   0.00024 4.5572 -268.85
## - I(nl^2)   1   0.00081 4.5578 -268.83
## - nc:cll    1   0.00529 4.5623 -268.73
## - mc:cll    1   0.01917 4.5762 -268.43
## - nc:nl     1   0.02802 4.5850 -268.24
## - ml:cll    1   0.05658 4.6136 -267.62
## - mc:nc     1   0.05911 4.6161 -267.56
## - ml:nl     1   0.06618 4.6232 -267.41
## - nc:ml     1   0.08627 4.6433 -266.98
## <none>                  4.5570 -266.85
## - I(cll^2)  1   0.09273 4.6497 -266.84
## - nl:cll    1   0.12923 4.6862 -266.05
## - I(mc^2)   1   0.19587 4.7529 -264.64
## - mc:ml     1   0.34573 4.9027 -261.54
## - I(ml^2)   1   0.55284 5.1098 -257.40
## 
## Step:  AIC=-268.85
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2) + mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + 
##     nc:cll + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(nc^2)   1   0.00022 4.5573 -270.85
## - I(nl^2)   1   0.00076 4.5578 -270.83
## - nc:cll    1   0.00528 4.5623 -270.73
## - mc:cll    1   0.01951 4.5766 -270.42
## - ml:cll    1   0.05870 4.6157 -269.57
## - nc:nl     1   0.06884 4.6259 -269.35
## - ml:nl     1   0.07461 4.6317 -269.23
## <none>                  4.5570 -268.85
## - I(cll^2)  1   0.09269 4.6497 -268.84
## - nc:ml     1   0.09674 4.6538 -268.75
## - mc:nc     1   0.10526 4.6623 -268.57
## - nl:cll    1   0.12941 4.6865 -268.05
## - mc:ml     1   0.38703 4.9441 -262.70
## - I(ml^2)   1   0.58958 5.1466 -258.68
## - I(mc^2)   1   0.60915 5.1662 -258.30
## 
## Step:  AIC=-270.84
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(nl^2) + 
##     I(cll^2) + mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + nc:cll + 
##     ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(nl^2)   1   0.00087 4.5581 -272.82
## - nc:cll    1   0.00613 4.5634 -272.71
## - mc:cll    1   0.02110 4.5784 -272.38
## - ml:cll    1   0.05911 4.6164 -271.56
## - ml:nl     1   0.07439 4.6317 -271.23
## - nc:nl     1   0.07442 4.6317 -271.23
## <none>                  4.5573 -270.85
## - I(cll^2)  1   0.09443 4.6517 -270.79
## - mc:nc     1   0.11328 4.6706 -270.39
## - nl:cll    1   0.12983 4.6871 -270.04
## - nc:ml     1   0.13125 4.6885 -270.00
## - mc:ml     1   0.51202 5.0693 -262.20
## - I(ml^2)   1   0.58939 5.1467 -260.68
## - I(mc^2)   1   0.86389 5.4212 -255.49
## 
## Step:  AIC=-272.83
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + nc:cll + ml:nl + 
##     ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - nc:cll    1   0.00738 4.5655 -274.66
## - mc:cll    1   0.02100 4.5791 -274.37
## - ml:cll    1   0.05974 4.6179 -273.52
## <none>                  4.5581 -272.82
## - I(cll^2)  1   0.09907 4.6572 -272.68
## - mc:nc     1   0.12869 4.6868 -272.04
## - ml:nl     1   0.13044 4.6886 -272.00
## - nc:ml     1   0.13871 4.6969 -271.83
## - nc:nl     1   0.16108 4.7192 -271.35
## - nl:cll    1   0.17479 4.7329 -271.06
## - mc:ml     1   0.51177 5.0699 -264.19
## - I(ml^2)   1   0.61947 5.1776 -262.08
## - I(mc^2)   1   0.91497 5.4731 -256.53
## 
## Step:  AIC=-274.66
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + ml:nl + ml:cll + 
##     nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - mc:cll    1   0.02169 4.5872 -276.19
## - ml:cll    1   0.07384 4.6394 -275.06
## <none>                  4.5655 -274.66
## - I(cll^2)  1   0.09969 4.6652 -274.50
## - mc:nc     1   0.12428 4.6898 -273.98
## - ml:nl     1   0.13367 4.6992 -273.78
## - nc:nl     1   0.15570 4.7212 -273.31
## - nl:cll    1   0.16929 4.7348 -273.02
## - nc:ml     1   0.39676 4.9623 -268.33
## - I(ml^2)   1   0.65650 5.2220 -263.23
## - mc:ml     1   1.14985 5.7154 -254.20
## - I(mc^2)   1   1.49542 6.0610 -248.33
## 
## Step:  AIC=-276.19
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - ml:cll    1   0.07030 4.6575 -276.67
## - I(cll^2)  1   0.07811 4.6653 -276.50
## <none>                  4.5872 -276.19
## - ml:nl     1   0.11425 4.7015 -275.73
## - mc:nc     1   0.14213 4.7294 -275.14
## - nl:cll    1   0.14783 4.7351 -275.02
## - nc:nl     1   0.14884 4.7361 -275.00
## - nc:ml     1   0.40464 4.9919 -269.74
## - I(ml^2)   1   0.64508 5.2323 -265.03
## - I(mc^2)   1   1.50089 6.0881 -249.88
## - mc:ml     1   1.51718 6.1044 -249.62
## 
## Step:  AIC=-276.67
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(cll^2)  1   0.03019 4.6877 -278.02
## <none>                  4.6575 -276.67
## - ml:nl     1   0.09566 4.7532 -276.64
## - nl:cll    1   0.12936 4.7869 -275.93
## - nc:nl     1   0.13195 4.7895 -275.88
## - mc:nc     1   0.14474 4.8023 -275.61
## - nc:ml     1   0.34893 5.0065 -271.44
## - I(ml^2)   1   0.63604 5.2936 -265.87
## - I(mc^2)   1   1.54683 6.2044 -249.99
## - mc:ml     1   1.64835 6.3059 -248.37
## 
## Step:  AIC=-278.02
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:nc + 
##     mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## <none>                 4.6877 -278.02
## - mc:nc    1   0.12074 4.8085 -277.48
## - ml:nl    1   0.16116 4.8489 -276.64
## - nc:nl    1   0.28518 4.9729 -274.12
## - nc:ml    1   0.50310 5.1908 -269.83
## - I(ml^2)  1   0.69800 5.3857 -266.14
## - I(mc^2)  1   1.52137 6.2091 -251.92
## - mc:ml    1   1.66571 6.3534 -249.62
## - nl:cll   1   1.71357 6.4013 -248.87
anova(m3)
## Analysis of Variance Table
## 
## Response: sqrt(area)
##           Df Sum Sq Mean Sq    F value    Pr(>F)    
## mc         1 547.86  547.86 10050.9224 < 2.2e-16 ***
## nc         1   0.06    0.06     1.0534  0.307599    
## ml         1  33.08   33.08   606.9500 < 2.2e-16 ***
## nl         1   2.48    2.48    45.4234 1.721e-09 ***
## cll        1   0.12    0.12     2.2097  0.140806    
## I(mc^2)    1   0.40    0.40     7.2614  0.008472 ** 
## I(ml^2)    1   1.73    1.73    31.7438 2.181e-07 ***
## mc:nc      1   0.59    0.59    10.8010  0.001470 ** 
## mc:ml      1   2.91    2.91    53.3713 1.313e-10 ***
## nc:ml      1   0.82    0.82    14.9803  0.000211 ***
## nc:nl      1   0.01    0.01     0.1729  0.678578    
## ml:nl      1   0.01    0.01     0.1131  0.737496    
## nl:cll     1   1.71    1.71    31.4370 2.448e-07 ***
## Residuals 86   4.69    0.05                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Critério de BIC.
m4 <- step(m2, k=log(nrow(mal)))
## Start:  AIC=-212.14
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2) + mc:nc + mc:ml + mc:nl + mc:cll + nc:ml + 
##     nc:nl + nc:cll + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - mc:nl     1   0.00004 4.5570 -216.75
## - I(nc^2)   1   0.00024 4.5572 -216.74
## - I(nl^2)   1   0.00081 4.5578 -216.73
## - nc:cll    1   0.00529 4.5623 -216.63
## - mc:cll    1   0.01917 4.5762 -216.33
## - nc:nl     1   0.02802 4.5850 -216.13
## - ml:cll    1   0.05658 4.6136 -215.51
## - mc:nc     1   0.05911 4.6161 -215.46
## - ml:nl     1   0.06618 4.6232 -215.31
## - nc:ml     1   0.08627 4.6433 -214.87
## - I(cll^2)  1   0.09273 4.6497 -214.73
## - nl:cll    1   0.12923 4.6862 -213.95
## - I(mc^2)   1   0.19587 4.7529 -212.54
## <none>                  4.5570 -212.14
## - mc:ml     1   0.34573 4.9027 -209.43
## - I(ml^2)   1   0.55284 5.1098 -205.30
## 
## Step:  AIC=-216.75
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2) + mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + 
##     nc:cll + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(nc^2)   1   0.00022 4.5573 -221.35
## - I(nl^2)   1   0.00076 4.5578 -221.34
## - nc:cll    1   0.00528 4.5623 -221.24
## - mc:cll    1   0.01951 4.5766 -220.92
## - ml:cll    1   0.05870 4.6157 -220.07
## - nc:nl     1   0.06884 4.6259 -219.85
## - ml:nl     1   0.07461 4.6317 -219.73
## - I(cll^2)  1   0.09269 4.6497 -219.34
## - nc:ml     1   0.09674 4.6538 -219.25
## - mc:nc     1   0.10526 4.6623 -219.07
## - nl:cll    1   0.12941 4.6865 -218.55
## <none>                  4.5570 -216.75
## - mc:ml     1   0.38703 4.9441 -213.20
## - I(ml^2)   1   0.58958 5.1466 -209.19
## - I(mc^2)   1   0.60915 5.1662 -208.81
## 
## Step:  AIC=-221.35
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(nl^2) + 
##     I(cll^2) + mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + nc:cll + 
##     ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(nl^2)   1   0.00087 4.5581 -225.93
## - nc:cll    1   0.00613 4.5634 -225.82
## - mc:cll    1   0.02110 4.5784 -225.49
## - ml:cll    1   0.05911 4.6164 -224.66
## - ml:nl     1   0.07439 4.6317 -224.33
## - nc:nl     1   0.07442 4.6317 -224.33
## - I(cll^2)  1   0.09443 4.6517 -223.90
## - mc:nc     1   0.11328 4.6706 -223.50
## - nl:cll    1   0.12983 4.6871 -223.14
## - nc:ml     1   0.13125 4.6885 -223.11
## <none>                  4.5573 -221.35
## - mc:ml     1   0.51202 5.0693 -215.30
## - I(ml^2)   1   0.58939 5.1467 -213.79
## - I(mc^2)   1   0.86389 5.4212 -208.59
## 
## Step:  AIC=-225.93
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + nc:cll + ml:nl + 
##     ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - nc:cll    1   0.00738 4.5655 -230.38
## - mc:cll    1   0.02100 4.5791 -230.08
## - ml:cll    1   0.05974 4.6179 -229.24
## - I(cll^2)  1   0.09907 4.6572 -228.39
## - mc:nc     1   0.12869 4.6868 -227.75
## - ml:nl     1   0.13044 4.6886 -227.72
## - nc:ml     1   0.13871 4.6969 -227.54
## - nc:nl     1   0.16108 4.7192 -227.06
## - nl:cll    1   0.17479 4.7329 -226.77
## <none>                  4.5581 -225.93
## - mc:ml     1   0.51177 5.0699 -219.90
## - I(ml^2)   1   0.61947 5.1776 -217.79
## - I(mc^2)   1   0.91497 5.4731 -212.24
## 
## Step:  AIC=-230.38
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + ml:nl + ml:cll + 
##     nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - mc:cll    1   0.02169 4.5872 -234.51
## - ml:cll    1   0.07384 4.6394 -233.38
## - I(cll^2)  1   0.09969 4.6652 -232.82
## - mc:nc     1   0.12428 4.6898 -232.29
## - ml:nl     1   0.13367 4.6992 -232.09
## - nc:nl     1   0.15570 4.7212 -231.63
## - nl:cll    1   0.16929 4.7348 -231.34
## <none>                  4.5655 -230.38
## - nc:ml     1   0.39676 4.9623 -226.65
## - I(ml^2)   1   0.65650 5.2220 -221.55
## - mc:ml     1   1.14985 5.7154 -212.52
## - I(mc^2)   1   1.49542 6.0610 -206.65
## 
## Step:  AIC=-234.51
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - ml:cll    1   0.07030 4.6575 -237.59
## - I(cll^2)  1   0.07811 4.6653 -237.42
## - ml:nl     1   0.11425 4.7015 -236.65
## - mc:nc     1   0.14213 4.7294 -236.06
## - nl:cll    1   0.14783 4.7351 -235.94
## - nc:nl     1   0.14884 4.7361 -235.92
## <none>                  4.5872 -234.51
## - nc:ml     1   0.40464 4.9919 -230.66
## - I(ml^2)   1   0.64508 5.2323 -225.95
## - I(mc^2)   1   1.50089 6.0881 -210.81
## - mc:ml     1   1.51718 6.1044 -210.54
## 
## Step:  AIC=-237.59
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(cll^2)  1   0.03019 4.6877 -241.55
## - ml:nl     1   0.09566 4.7532 -240.16
## - nl:cll    1   0.12936 4.7869 -239.46
## - nc:nl     1   0.13195 4.7895 -239.40
## - mc:nc     1   0.14474 4.8023 -239.14
## <none>                  4.6575 -237.59
## - nc:ml     1   0.34893 5.0065 -234.97
## - I(ml^2)   1   0.63604 5.2936 -229.40
## - I(mc^2)   1   1.54683 6.2044 -213.52
## - mc:ml     1   1.64835 6.3059 -211.90
## 
## Step:  AIC=-241.55
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:nc + 
##     mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## - mc:nc    1   0.12074 4.8085 -243.61
## - ml:nl    1   0.16116 4.8489 -242.78
## <none>                 4.6877 -241.55
## - nc:nl    1   0.28518 4.9729 -240.25
## - nc:ml    1   0.50310 5.1908 -235.96
## - I(ml^2)  1   0.69800 5.3857 -232.28
## - I(mc^2)  1   1.52137 6.2091 -218.05
## - mc:ml    1   1.66571 6.3534 -215.75
## - nl:cll   1   1.71357 6.4013 -215.00
## 
## Step:  AIC=-243.61
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:ml + 
##     nc:ml + nc:nl + ml:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## - ml:nl    1   0.08114 4.8896 -246.54
## <none>                 4.8085 -243.61
## - nc:ml    1   0.46374 5.2722 -239.01
## - I(ml^2)  1   0.63334 5.4418 -235.84
## - nc:nl    1   0.90355 5.7120 -231.00
## - nl:cll   1   1.88232 6.6908 -215.18
## - mc:ml    1   1.95782 6.7663 -214.06
## - I(mc^2)  1   2.32931 7.1378 -208.72
## 
## Step:  AIC=-246.54
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:ml + 
##     nc:ml + nc:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## <none>                 4.8896 -246.54
## - nc:ml    1    0.6152 5.5048 -239.30
## - I(ml^2)  1    1.0318 5.9214 -232.00
## - nc:nl    1    1.5783 6.4679 -223.18
## - nl:cll   1    1.8375 6.7271 -219.25
## - mc:ml    1    3.8024 8.6920 -193.62
## - I(mc^2)  1    4.3803 9.2699 -187.18
m4 <- step(m2, k=12)
## Start:  AIC=-56.85
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2) + mc:nc + mc:ml + mc:nl + mc:cll + nc:ml + 
##     nc:nl + nc:cll + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - mc:nl     1   0.00004 4.5570 -68.850
## - I(nc^2)   1   0.00024 4.5572 -68.845
## - I(nl^2)   1   0.00081 4.5578 -68.833
## - nc:cll    1   0.00529 4.5623 -68.734
## - mc:cll    1   0.01917 4.5762 -68.431
## - nc:nl     1   0.02802 4.5850 -68.237
## - ml:cll    1   0.05658 4.6136 -67.617
## - mc:nc     1   0.05911 4.6161 -67.562
## - ml:nl     1   0.06618 4.6232 -67.409
## - nc:ml     1   0.08627 4.6433 -66.975
## - I(cll^2)  1   0.09273 4.6497 -66.836
## - nl:cll    1   0.12923 4.6862 -66.054
## - I(mc^2)   1   0.19587 4.7529 -64.642
## - mc:ml     1   0.34573 4.9027 -61.538
## - I(ml^2)   1   0.55284 5.1098 -57.400
## <none>                  4.5570 -56.850
## 
## Step:  AIC=-68.85
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2) + mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + 
##     nc:cll + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(nc^2)   1   0.00022 4.5573 -80.845
## - I(nl^2)   1   0.00076 4.5578 -80.833
## - nc:cll    1   0.00528 4.5623 -80.734
## - mc:cll    1   0.01951 4.5766 -80.422
## - ml:cll    1   0.05870 4.6157 -79.570
## - nc:nl     1   0.06884 4.6259 -79.350
## - ml:nl     1   0.07461 4.6317 -79.225
## - I(cll^2)  1   0.09269 4.6497 -78.836
## - nc:ml     1   0.09674 4.6538 -78.749
## - mc:nc     1   0.10526 4.6623 -78.566
## - nl:cll    1   0.12941 4.6865 -78.049
## - mc:ml     1   0.38703 4.9441 -72.698
## <none>                  4.5570 -68.850
## - I(ml^2)   1   0.58958 5.1466 -68.683
## - I(mc^2)   1   0.60915 5.1662 -68.303
## 
## Step:  AIC=-80.84
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(nl^2) + 
##     I(cll^2) + mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + nc:cll + 
##     ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(nl^2)   1   0.00087 4.5581 -92.825
## - nc:cll    1   0.00613 4.5634 -92.710
## - mc:cll    1   0.02110 4.5784 -92.383
## - ml:cll    1   0.05911 4.6164 -91.556
## - ml:nl     1   0.07439 4.6317 -91.225
## - nc:nl     1   0.07442 4.6317 -91.225
## - I(cll^2)  1   0.09443 4.6517 -90.794
## - mc:nc     1   0.11328 4.6706 -90.389
## - nl:cll    1   0.12983 4.6871 -90.036
## - nc:ml     1   0.13125 4.6885 -90.005
## - mc:ml     1   0.51202 5.0693 -82.197
## <none>                  4.5573 -80.845
## - I(ml^2)   1   0.58939 5.1467 -80.682
## - I(mc^2)   1   0.86389 5.4212 -75.486
## 
## Step:  AIC=-92.83
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + nc:cll + ml:nl + 
##     ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS      AIC
## - nc:cll    1   0.00738 4.5655 -104.664
## - mc:cll    1   0.02100 4.5791 -104.366
## - ml:cll    1   0.05974 4.6179 -103.523
## - I(cll^2)  1   0.09907 4.6572 -102.675
## - mc:nc     1   0.12869 4.6868 -102.041
## - ml:nl     1   0.13044 4.6886 -102.004
## - nc:ml     1   0.13871 4.6969 -101.828
## - nc:nl     1   0.16108 4.7192 -101.352
## - nl:cll    1   0.17479 4.7329 -101.062
## - mc:ml     1   0.51177 5.0699  -94.185
## <none>                  4.5581  -92.825
## - I(ml^2)   1   0.61947 5.1776  -92.083
## - I(mc^2)   1   0.91497 5.4731  -86.532
## 
## Step:  AIC=-104.66
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + ml:nl + ml:cll + 
##     nl:cll
## 
##            Df Sum of Sq    RSS      AIC
## - mc:cll    1   0.02169 4.5872 -116.190
## - ml:cll    1   0.07384 4.6394 -115.059
## - I(cll^2)  1   0.09969 4.6652 -114.504
## - mc:nc     1   0.12428 4.6898 -113.978
## - ml:nl     1   0.13367 4.6992 -113.778
## - nc:nl     1   0.15570 4.7212 -113.310
## - nl:cll    1   0.16929 4.7348 -113.023
## - nc:ml     1   0.39676 4.9623 -108.330
## <none>                  4.5655 -104.664
## - I(ml^2)   1   0.65650 5.2220 -103.228
## - mc:ml     1   1.14985 5.7154  -94.201
## - I(mc^2)   1   1.49542 6.0610  -88.330
## 
## Step:  AIC=-116.19
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS      AIC
## - ml:cll    1   0.07030 4.6575 -126.669
## - I(cll^2)  1   0.07811 4.6653 -126.501
## - ml:nl     1   0.11425 4.7015 -125.729
## - mc:nc     1   0.14213 4.7294 -125.138
## - nl:cll    1   0.14783 4.7351 -125.018
## - nc:nl     1   0.14884 4.7361 -124.996
## - nc:ml     1   0.40464 4.9919 -119.736
## <none>                  4.5872 -116.190
## - I(ml^2)   1   0.64508 5.2323 -115.032
## - I(mc^2)   1   1.50089 6.0881  -99.883
## - mc:ml     1   1.51718 6.1044  -99.616
## 
## Step:  AIC=-126.67
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(cll^2)  1   0.03019 4.6877 -138.02
## - ml:nl     1   0.09566 4.7532 -136.64
## - nl:cll    1   0.12936 4.7869 -135.93
## - nc:nl     1   0.13195 4.7895 -135.88
## - mc:nc     1   0.14474 4.8023 -135.61
## - nc:ml     1   0.34893 5.0065 -131.44
## <none>                  4.6575 -126.67
## - I(ml^2)   1   0.63604 5.2936 -125.87
## - I(mc^2)   1   1.54683 6.2044 -109.99
## - mc:ml     1   1.64835 6.3059 -108.37
## 
## Step:  AIC=-138.02
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:nc + 
##     mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## - mc:nc    1   0.12074 4.8085 -147.48
## - ml:nl    1   0.16116 4.8489 -146.64
## - nc:nl    1   0.28518 4.9729 -144.12
## - nc:ml    1   0.50310 5.1908 -139.83
## <none>                 4.6877 -138.02
## - I(ml^2)  1   0.69800 5.3857 -136.14
## - I(mc^2)  1   1.52137 6.2091 -121.92
## - mc:ml    1   1.66571 6.3534 -119.62
## - nl:cll   1   1.71357 6.4013 -118.87
## 
## Step:  AIC=-147.48
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:ml + 
##     nc:ml + nc:nl + ml:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## - ml:nl    1   0.08114 4.8896 -157.81
## - nc:ml    1   0.46374 5.2722 -150.27
## <none>                 4.8085 -147.48
## - I(ml^2)  1   0.63334 5.4418 -147.11
## - nc:nl    1   0.90355 5.7120 -142.26
## - nl:cll   1   1.88232 6.6908 -126.44
## - mc:ml    1   1.95782 6.7663 -125.32
## - I(mc^2)  1   2.32931 7.1378 -119.98
## 
## Step:  AIC=-157.81
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:ml + 
##     nc:ml + nc:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## - nc:ml    1    0.6152 5.5048 -157.96
## <none>                 4.8896 -157.81
## - I(ml^2)  1    1.0318 5.9214 -150.66
## - nc:nl    1    1.5783 6.4679 -141.83
## - nl:cll   1    1.8375 6.7271 -137.90
## - mc:ml    1    3.8024 8.6920 -112.28
## - I(mc^2)  1    4.3803 9.2699 -105.84
## 
## Step:  AIC=-157.95
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:ml + 
##     nc:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## <none>                 5.5048 -157.96
## - nc:nl    1    1.0120 6.5168 -153.08
## - I(ml^2)  1    1.3041 6.8089 -148.69
## - nl:cll   1    1.7846 7.2894 -141.88
## - I(mc^2)  1    3.8417 9.3465 -117.02
## - mc:ml    1    4.2401 9.7449 -112.84
anova(m4)
## Analysis of Variance Table
## 
## Response: sqrt(area)
##           Df Sum Sq Mean Sq   F value    Pr(>F)    
## mc         1 547.86  547.86 8857.5924 < 2.2e-16 ***
## nc         1   0.06    0.06    0.9283   0.33790    
## ml         1  33.08   33.08  534.8878 < 2.2e-16 ***
## nl         1   2.48    2.48   40.0304 9.822e-09 ***
## cll        1   0.12    0.12    1.9473   0.16635    
## I(mc^2)    1   0.40    0.40    6.3993   0.01318 *  
## I(ml^2)    1   1.73    1.73   27.9749 8.717e-07 ***
## mc:ml      1   3.43    3.43   55.5113 5.737e-11 ***
## nc:nl      1   0.01    0.01    0.1368   0.71239    
## nl:cll     1   1.78    1.78   28.8528 6.183e-07 ***
## Residuals 89   5.50    0.06                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m4)
## 
## Call:
## lm(formula = sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + 
##     I(ml^2) + mc:ml + nc:nl + nl:cll, data = mal)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55490 -0.19004  0.04119  0.16520  0.61451 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.798594   0.285393   2.798 0.006298 ** 
## mc          -0.109223   0.200735  -0.544 0.587723    
## nc           0.961161   0.229840   4.182 6.76e-05 ***
## ml           0.642210   0.105488   6.088 2.84e-08 ***
## nl           0.308865   0.155011   1.993 0.049377 *  
## cll         -0.863242   0.165664  -5.211 1.21e-06 ***
## I(mc^2)      0.052710   0.006688   7.881 7.61e-12 ***
## I(ml^2)      0.036515   0.007952   4.592 1.44e-05 ***
## mc:ml       -0.090003   0.010870  -8.280 1.16e-12 ***
## nc:nl       -0.103543   0.025598  -4.045 0.000111 ***
## nl:cll       0.098658   0.018367   5.371 6.18e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2487 on 89 degrees of freedom
## Multiple R-squared:  0.9908, Adjusted R-squared:  0.9897 
## F-statistic: 955.4 on 10 and 89 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2)); plot(m4); layout(1)

## k maior que log(nrow(mal)).
step(m2, k=6)
## Start:  AIC=-182.85
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2) + mc:nc + mc:ml + mc:nl + mc:cll + nc:ml + 
##     nc:nl + nc:cll + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - mc:nl     1   0.00004 4.5570 -188.85
## - I(nc^2)   1   0.00024 4.5572 -188.84
## - I(nl^2)   1   0.00081 4.5578 -188.83
## - nc:cll    1   0.00529 4.5623 -188.73
## - mc:cll    1   0.01917 4.5762 -188.43
## - nc:nl     1   0.02802 4.5850 -188.24
## - ml:cll    1   0.05658 4.6136 -187.62
## - mc:nc     1   0.05911 4.6161 -187.56
## - ml:nl     1   0.06618 4.6232 -187.41
## - nc:ml     1   0.08627 4.6433 -186.97
## - I(cll^2)  1   0.09273 4.6497 -186.84
## - nl:cll    1   0.12923 4.6862 -186.05
## - I(mc^2)   1   0.19587 4.7529 -184.64
## <none>                  4.5570 -182.85
## - mc:ml     1   0.34573 4.9027 -181.54
## - I(ml^2)   1   0.55284 5.1098 -177.40
## 
## Step:  AIC=-188.85
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2) + mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + 
##     nc:cll + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(nc^2)   1   0.00022 4.5573 -194.84
## - I(nl^2)   1   0.00076 4.5578 -194.83
## - nc:cll    1   0.00528 4.5623 -194.73
## - mc:cll    1   0.01951 4.5766 -194.42
## - ml:cll    1   0.05870 4.6157 -193.57
## - nc:nl     1   0.06884 4.6259 -193.35
## - ml:nl     1   0.07461 4.6317 -193.22
## - I(cll^2)  1   0.09269 4.6497 -192.84
## - nc:ml     1   0.09674 4.6538 -192.75
## - mc:nc     1   0.10526 4.6623 -192.57
## - nl:cll    1   0.12941 4.6865 -192.05
## <none>                  4.5570 -188.85
## - mc:ml     1   0.38703 4.9441 -186.70
## - I(ml^2)   1   0.58958 5.1466 -182.68
## - I(mc^2)   1   0.60915 5.1662 -182.30
## 
## Step:  AIC=-194.84
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(nl^2) + 
##     I(cll^2) + mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + nc:cll + 
##     ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(nl^2)   1   0.00087 4.5581 -200.82
## - nc:cll    1   0.00613 4.5634 -200.71
## - mc:cll    1   0.02110 4.5784 -200.38
## - ml:cll    1   0.05911 4.6164 -199.56
## - ml:nl     1   0.07439 4.6317 -199.22
## - nc:nl     1   0.07442 4.6317 -199.22
## - I(cll^2)  1   0.09443 4.6517 -198.79
## - mc:nc     1   0.11328 4.6706 -198.39
## - nl:cll    1   0.12983 4.6871 -198.04
## - nc:ml     1   0.13125 4.6885 -198.00
## <none>                  4.5573 -194.84
## - mc:ml     1   0.51202 5.0693 -190.20
## - I(ml^2)   1   0.58939 5.1467 -188.68
## - I(mc^2)   1   0.86389 5.4212 -183.49
## 
## Step:  AIC=-200.83
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + nc:cll + ml:nl + 
##     ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - nc:cll    1   0.00738 4.5655 -206.66
## - mc:cll    1   0.02100 4.5791 -206.37
## - ml:cll    1   0.05974 4.6179 -205.52
## - I(cll^2)  1   0.09907 4.6572 -204.68
## - mc:nc     1   0.12869 4.6868 -204.04
## - ml:nl     1   0.13044 4.6886 -204.00
## - nc:ml     1   0.13871 4.6969 -203.83
## - nc:nl     1   0.16108 4.7192 -203.35
## - nl:cll    1   0.17479 4.7329 -203.06
## <none>                  4.5581 -200.82
## - mc:ml     1   0.51177 5.0699 -196.19
## - I(ml^2)   1   0.61947 5.1776 -194.08
## - I(mc^2)   1   0.91497 5.4731 -188.53
## 
## Step:  AIC=-206.66
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + ml:nl + ml:cll + 
##     nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - mc:cll    1   0.02169 4.5872 -212.19
## - ml:cll    1   0.07384 4.6394 -211.06
## - I(cll^2)  1   0.09969 4.6652 -210.50
## - mc:nc     1   0.12428 4.6898 -209.98
## - ml:nl     1   0.13367 4.6992 -209.78
## - nc:nl     1   0.15570 4.7212 -209.31
## - nl:cll    1   0.16929 4.7348 -209.02
## <none>                  4.5655 -206.66
## - nc:ml     1   0.39676 4.9623 -204.33
## - I(ml^2)   1   0.65650 5.2220 -199.23
## - mc:ml     1   1.14985 5.7154 -190.20
## - I(mc^2)   1   1.49542 6.0610 -184.33
## 
## Step:  AIC=-212.19
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - ml:cll    1   0.07030 4.6575 -216.67
## - I(cll^2)  1   0.07811 4.6653 -216.50
## - ml:nl     1   0.11425 4.7015 -215.73
## - mc:nc     1   0.14213 4.7294 -215.14
## - nl:cll    1   0.14783 4.7351 -215.02
## - nc:nl     1   0.14884 4.7361 -215.00
## <none>                  4.5872 -212.19
## - nc:ml     1   0.40464 4.9919 -209.74
## - I(ml^2)   1   0.64508 5.2323 -205.03
## - I(mc^2)   1   1.50089 6.0881 -189.88
## - mc:ml     1   1.51718 6.1044 -189.62
## 
## Step:  AIC=-216.67
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(cll^2)  1   0.03019 4.6877 -222.02
## - ml:nl     1   0.09566 4.7532 -220.64
## - nl:cll    1   0.12936 4.7869 -219.93
## - nc:nl     1   0.13195 4.7895 -219.88
## - mc:nc     1   0.14474 4.8023 -219.61
## <none>                  4.6575 -216.67
## - nc:ml     1   0.34893 5.0065 -215.44
## - I(ml^2)   1   0.63604 5.2936 -209.87
## - I(mc^2)   1   1.54683 6.2044 -193.99
## - mc:ml     1   1.64835 6.3059 -192.37
## 
## Step:  AIC=-222.02
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:nc + 
##     mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## - mc:nc    1   0.12074 4.8085 -225.48
## - ml:nl    1   0.16116 4.8489 -224.64
## - nc:nl    1   0.28518 4.9729 -222.12
## <none>                 4.6877 -222.02
## - nc:ml    1   0.50310 5.1908 -217.83
## - I(ml^2)  1   0.69800 5.3857 -214.14
## - I(mc^2)  1   1.52137 6.2091 -199.92
## - mc:ml    1   1.66571 6.3534 -197.62
## - nl:cll   1   1.71357 6.4013 -196.87
## 
## Step:  AIC=-225.48
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:ml + 
##     nc:ml + nc:nl + ml:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## - ml:nl    1   0.08114 4.8896 -229.81
## <none>                 4.8085 -225.48
## - nc:ml    1   0.46374 5.2722 -222.27
## - I(ml^2)  1   0.63334 5.4418 -219.11
## - nc:nl    1   0.90355 5.7120 -214.26
## - nl:cll   1   1.88232 6.6908 -198.44
## - mc:ml    1   1.95782 6.7663 -197.32
## - I(mc^2)  1   2.32931 7.1378 -191.98
## 
## Step:  AIC=-229.81
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:ml + 
##     nc:ml + nc:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## <none>                 4.8896 -229.81
## - nc:ml    1    0.6152 5.5048 -223.96
## - I(ml^2)  1    1.0318 5.9214 -216.66
## - nc:nl    1    1.5783 6.4679 -207.83
## - nl:cll   1    1.8375 6.7271 -203.90
## - mc:ml    1    3.8024 8.6920 -178.28
## - I(mc^2)  1    4.3803 9.2699 -171.84
## 
## Call:
## lm(formula = sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + 
##     I(ml^2) + mc:ml + nc:ml + nc:nl + nl:cll, data = mal)
## 
## Coefficients:
## (Intercept)           mc           nc           ml           nl          cll  
##     0.69573      0.32544      0.23507      0.47770      0.72972     -0.86188  
##     I(mc^2)      I(ml^2)        mc:ml        nc:ml        nc:nl       nl:cll  
##     0.05808      0.03283     -0.12890      0.07700     -0.14639      0.10014
##-----------------------------------------------------------------------------
## Qual o melhor modelo partindo das interações duplas?

m5 <- update(m2, .~(mc+nc+ml+nl+cll)^2)
m5 <- step(m5, k=log(nrow(mal)))
## Start:  AIC=-210.98
## sqrt(area) ~ mc + nc + ml + nl + cll + mc:nc + mc:ml + mc:nl + 
##     mc:cll + nc:ml + nc:nl + nc:cll + ml:nl + ml:cll + nl:cll
## 
##          Df Sum of Sq    RSS     AIC
## - mc:cll  1   0.00040 5.8042 -215.58
## - nc:cll  1   0.00223 5.8061 -215.55
## - ml:nl   1   0.00504 5.8089 -215.50
## - nl:cll  1   0.02681 5.8306 -215.13
## <none>                5.8038 -210.98
## - ml:cll  1   0.28764 6.0915 -210.75
## - mc:nc   1   0.37047 6.1743 -209.40
## - nc:ml   1   0.56275 6.3666 -206.33
## - mc:nl   1   0.74116 6.5450 -203.57
## - nc:nl   1   0.96897 6.7728 -200.15
## - mc:ml   1   1.99222 7.7960 -186.08
## 
## Step:  AIC=-215.58
## sqrt(area) ~ mc + nc + ml + nl + cll + mc:nc + mc:ml + mc:nl + 
##     nc:ml + nc:nl + nc:cll + ml:nl + ml:cll + nl:cll
## 
##          Df Sum of Sq    RSS     AIC
## - nc:cll  1    0.0047 5.8089 -220.10
## - ml:nl   1    0.0082 5.8124 -220.04
## - nl:cll  1    0.0285 5.8328 -219.69
## <none>                5.8042 -215.58
## - ml:cll  1    0.3384 6.1426 -214.52
## - mc:nc   1    0.3859 6.1901 -213.75
## - nc:ml   1    0.7306 6.5348 -208.33
## - mc:nl   1    1.3568 7.1611 -199.18
## - nc:nl   1    1.9281 7.7324 -191.50
## - mc:ml   1    3.2034 9.0076 -176.24
## 
## Step:  AIC=-220.11
## sqrt(area) ~ mc + nc + ml + nl + cll + mc:nc + mc:ml + mc:nl + 
##     nc:ml + nc:nl + ml:nl + ml:cll + nl:cll
## 
##          Df Sum of Sq    RSS     AIC
## - ml:nl   1    0.0046 5.8136 -224.63
## - nl:cll  1    0.0974 5.9063 -223.05
## <none>                5.8089 -220.10
## - ml:cll  1    0.3441 6.1531 -218.96
## - mc:nc   1    0.5055 6.3144 -216.37
## - nc:ml   1    0.8787 6.6876 -210.62
## - mc:nl   1    1.4031 7.2120 -203.07
## - nc:nl   1    1.9274 7.7364 -196.06
## - mc:ml   1    3.2269 9.0358 -180.53
## 
## Step:  AIC=-224.63
## sqrt(area) ~ mc + nc + ml + nl + cll + mc:nc + mc:ml + mc:nl + 
##     nc:ml + nc:nl + ml:cll + nl:cll
## 
##          Df Sum of Sq    RSS     AIC
## - nl:cll  1    0.0937 5.9073 -227.64
## <none>                5.8136 -224.63
## - ml:cll  1    0.4493 6.2628 -221.79
## - nc:ml   1    0.8893 6.7028 -215.00
## - mc:nc   1    0.8930 6.7066 -214.95
## - mc:nl   1    1.7463 7.5598 -202.97
## - nc:nl   1    1.9757 7.7892 -199.98
## - mc:ml   1    3.6427 9.4563 -180.59
## 
## Step:  AIC=-227.64
## sqrt(area) ~ mc + nc + ml + nl + cll + mc:nc + mc:ml + mc:nl + 
##     nc:ml + nc:nl + ml:cll
## 
##          Df Sum of Sq    RSS     AIC
## <none>                5.9073 -227.64
## - mc:nc   1    0.8025 6.7098 -219.50
## - nc:ml   1    0.8069 6.7142 -219.44
## - ml:cll  1    1.3442 7.2515 -211.74
## - mc:nl   1    1.8283 7.7355 -205.28
## - nc:nl   1    2.0702 7.9775 -202.20
## - mc:ml   1    3.8203 9.7276 -182.36
m6 <- update(m2, .~mc+nc+ml+nl+cll+
                 I(mc^2)+I(nc^2)+I(ml^2)+I(nl^2)+I(cll^2))
m6 <- step(m6, k=log(nrow(mal)))
## Start:  AIC=-183.95
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2)
## 
##            Df Sum of Sq     RSS     AIC
## - mc        1   0.00060  9.5755 -188.54
## - I(mc^2)   1   0.13388  9.7088 -187.16
## - I(ml^2)   1   0.13549  9.7104 -187.15
## - nc        1   0.28772  9.8626 -185.59
## - I(nc^2)   1   0.34887  9.9238 -184.97
## - cll       1   0.36986  9.9448 -184.76
## - I(cll^2)  1   0.38788  9.9628 -184.58
## <none>                   9.5749 -183.94
## - ml        1   0.79353 10.3685 -180.59
## - I(nl^2)   1   0.86751 10.4424 -179.88
## - nl        1   1.40576 10.9807 -174.85
## 
## Step:  AIC=-188.54
## sqrt(area) ~ nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2)
## 
##            Df Sum of Sq     RSS     AIC
## - I(ml^2)   1    0.1825  9.7580 -191.26
## - cll       1    0.3701  9.9456 -189.36
## - I(cll^2)  1    0.3885  9.9640 -189.17
## <none>                   9.5755 -188.54
## - nc        1    0.5803 10.1559 -187.26
## - I(nc^2)   1    0.6956 10.2712 -186.14
## - I(nl^2)   1    0.8669 10.4425 -184.48
## - ml        1    1.1470 10.7225 -181.84
## - nl        1    1.4063 10.9818 -179.45
## - I(mc^2)   1    5.2718 14.8474 -149.29
## 
## Step:  AIC=-191.26
## sqrt(area) ~ nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(nl^2) + 
##     I(cll^2)
## 
##            Df Sum of Sq    RSS     AIC
## - I(cll^2)  1    0.4178 10.176 -191.67
## - cll       1    0.4248 10.183 -191.61
## <none>                   9.758 -191.26
## - nc        1    0.6649 10.423 -189.28
## - I(nc^2)   1    0.7614 10.520 -188.35
## - I(nl^2)   1    2.7963 12.554 -170.67
## - nl        1    4.1070 13.865 -160.74
## - I(mc^2)   1    5.6184 15.376 -150.39
## - ml        1    9.2839 19.042 -129.01
## 
## Step:  AIC=-191.67
## sqrt(area) ~ nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(nl^2)
## 
##           Df Sum of Sq    RSS     AIC
## - cll      1    0.0075 10.183 -196.21
## <none>                 10.176 -191.67
## - nc       1    1.4250 11.601 -183.17
## - I(nc^2)  1    1.5190 11.695 -182.37
## - I(nl^2)  1    2.4805 12.656 -174.47
## - nl       1    3.9786 14.155 -163.28
## - I(mc^2)  1    6.7216 16.898 -145.56
## - ml       1   10.5656 20.741 -125.07
## 
## Step:  AIC=-196.21
## sqrt(area) ~ nc + ml + nl + I(mc^2) + I(nc^2) + I(nl^2)
## 
##           Df Sum of Sq    RSS     AIC
## <none>                 10.183 -196.21
## - nc       1    1.5769 11.760 -186.41
## - I(nc^2)  1    1.6390 11.822 -185.89
## - I(nl^2)  1    2.6823 12.866 -177.43
## - nl       1    4.1370 14.320 -166.72
## - I(mc^2)  1    6.7175 16.901 -150.15
## - ml       1   11.5384 21.722 -125.06
summary(m1)
## 
## Call:
## lm(formula = sqrt(area) ~ mc + ml + nc + nl + cll, data = mal)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84138 -0.23439 -0.02851  0.14774  1.87394 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.20824    0.16965   1.227    0.223    
## mc           0.27039    0.04612   5.862 6.73e-08 ***
## ml           0.35067    0.03092  11.340  < 2e-16 ***
## nc           0.02091    0.04581   0.456    0.649    
## nl           0.32234    0.07408   4.351 3.43e-05 ***
## cll         -0.03684    0.03926  -0.938    0.350    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3698 on 94 degrees of freedom
## Multiple R-squared:  0.9784, Adjusted R-squared:  0.9773 
## F-statistic: 853.3 on 5 and 94 DF,  p-value: < 2.2e-16
m7 <- update(m1, .~mc+ml+nl)

## summary(m4)$adj.r.squared
## summary(m5)$adj.r.squared
## summary(m6)$adj.r.squared
## summary(m7)$adj.r.squared

coef(m4)
## (Intercept)          mc          nc          ml          nl         cll     I(mc^2) 
##  0.79859412 -0.10922320  0.96116145  0.64221019  0.30886492 -0.86324198  0.05270993 
##     I(ml^2)       mc:ml       nc:nl      nl:cll 
##  0.03651507 -0.09000315 -0.10354332  0.09865800
coef(m7)
## (Intercept)          mc          ml          nl 
##   0.2024610   0.2725136   0.3434172   0.3077210

Forward e backward

##-----------------------------------------------------------------------------
## Forward e Backward.

m8 <- step(m7, direction="forward",
           scope=list(lower=formula(m7), upper=formula(m2)))
## Start:  AIC=-196.11
## sqrt(area) ~ mc + ml + nl
## 
##            Df Sum of Sq    RSS     AIC
## + I(mc^2)   1   0.33288 12.655 -196.71
## <none>                  12.988 -196.11
## + I(nc^2)   1   0.16166 12.826 -195.37
## + mc:ml     1   0.15677 12.831 -195.33
## + ml:nl     1   0.10368 12.884 -194.91
## + cll       1   0.10219 12.886 -194.90
## + I(ml^2)   1   0.07337 12.915 -194.68
## + mc:nl     1   0.03604 12.952 -194.39
## + nc        1   0.01025 12.978 -194.19
## + I(cll^2)  1   0.00729 12.981 -194.17
## + I(nl^2)   1   0.00014 12.988 -194.11
## 
## Step:  AIC=-196.71
## sqrt(area) ~ mc + ml + nl + I(mc^2)
## 
##            Df Sum of Sq     RSS     AIC
## + mc:ml     1    4.1590  8.4963 -234.55
## + ml:nl     1    2.8171  9.8381 -219.89
## + mc:nl     1    2.0807 10.5746 -212.67
## + I(ml^2)   1    1.7636 10.8917 -209.72
## + I(nl^2)   1    1.4127 11.2425 -206.55
## <none>                  12.6553 -196.71
## + cll       1    0.1569 12.4983 -195.96
## + I(cll^2)  1    0.1285 12.5267 -195.73
## + I(nc^2)   1    0.0537 12.6015 -195.13
## + nc        1    0.0114 12.6438 -194.80
## 
## Step:  AIC=-234.55
## sqrt(area) ~ mc + ml + nl + I(mc^2) + mc:ml
## 
##            Df Sum of Sq    RSS     AIC
## + I(ml^2)   1   1.12437 7.3719 -246.75
## + ml:nl     1   0.43171 8.0646 -237.77
## + I(nc^2)   1   0.21154 8.2847 -235.08
## + nc        1   0.18111 8.3152 -234.71
## + I(cll^2)  1   0.17472 8.3216 -234.63
## <none>                  8.4963 -234.55
## + cll       1   0.06598 8.4303 -233.33
## + mc:nl     1   0.01145 8.4848 -232.69
## + I(nl^2)   1   0.00698 8.4893 -232.64
## 
## Step:  AIC=-246.75
## sqrt(area) ~ mc + ml + nl + I(mc^2) + I(ml^2) + mc:ml
## 
##            Df Sum of Sq    RSS     AIC
## <none>                  7.3719 -246.75
## + I(cll^2)  1  0.138611 7.2333 -246.65
## + I(nc^2)   1  0.087238 7.2847 -245.94
## + nc        1  0.061903 7.3100 -245.59
## + cll       1  0.021427 7.3505 -245.04
## + I(nl^2)   1  0.002835 7.3691 -244.79
## + ml:nl     1  0.001160 7.3707 -244.76
## + mc:nl     1  0.000044 7.3719 -244.75
m9 <- step(m2, direction="backward",
           scope=list(lower=formula(m7), upper=formula(m2)))
## Start:  AIC=-266.85
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2) + mc:nc + mc:ml + mc:nl + mc:cll + nc:ml + 
##     nc:nl + nc:cll + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - mc:nl     1   0.00004 4.5570 -268.85
## - I(nc^2)   1   0.00024 4.5572 -268.85
## - I(nl^2)   1   0.00081 4.5578 -268.83
## - nc:cll    1   0.00529 4.5623 -268.73
## - mc:cll    1   0.01917 4.5762 -268.43
## - nc:nl     1   0.02802 4.5850 -268.24
## - ml:cll    1   0.05658 4.6136 -267.62
## - mc:nc     1   0.05911 4.6161 -267.56
## - ml:nl     1   0.06618 4.6232 -267.41
## - nc:ml     1   0.08627 4.6433 -266.98
## <none>                  4.5570 -266.85
## - I(cll^2)  1   0.09273 4.6497 -266.84
## - nl:cll    1   0.12923 4.6862 -266.05
## - I(mc^2)   1   0.19587 4.7529 -264.64
## - mc:ml     1   0.34573 4.9027 -261.54
## - I(ml^2)   1   0.55284 5.1098 -257.40
## 
## Step:  AIC=-268.85
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(nc^2) + I(ml^2) + 
##     I(nl^2) + I(cll^2) + mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + 
##     nc:cll + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(nc^2)   1   0.00022 4.5573 -270.85
## - I(nl^2)   1   0.00076 4.5578 -270.83
## - nc:cll    1   0.00528 4.5623 -270.73
## - mc:cll    1   0.01951 4.5766 -270.42
## - ml:cll    1   0.05870 4.6157 -269.57
## - nc:nl     1   0.06884 4.6259 -269.35
## - ml:nl     1   0.07461 4.6317 -269.23
## <none>                  4.5570 -268.85
## - I(cll^2)  1   0.09269 4.6497 -268.84
## - nc:ml     1   0.09674 4.6538 -268.75
## - mc:nc     1   0.10526 4.6623 -268.57
## - nl:cll    1   0.12941 4.6865 -268.05
## - mc:ml     1   0.38703 4.9441 -262.70
## - I(ml^2)   1   0.58958 5.1466 -258.68
## - I(mc^2)   1   0.60915 5.1662 -258.30
## 
## Step:  AIC=-270.84
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(nl^2) + 
##     I(cll^2) + mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + nc:cll + 
##     ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(nl^2)   1   0.00087 4.5581 -272.82
## - nc:cll    1   0.00613 4.5634 -272.71
## - mc:cll    1   0.02110 4.5784 -272.38
## - ml:cll    1   0.05911 4.6164 -271.56
## - ml:nl     1   0.07439 4.6317 -271.23
## - nc:nl     1   0.07442 4.6317 -271.23
## <none>                  4.5573 -270.85
## - I(cll^2)  1   0.09443 4.6517 -270.79
## - mc:nc     1   0.11328 4.6706 -270.39
## - nl:cll    1   0.12983 4.6871 -270.04
## - nc:ml     1   0.13125 4.6885 -270.00
## - mc:ml     1   0.51202 5.0693 -262.20
## - I(ml^2)   1   0.58939 5.1467 -260.68
## - I(mc^2)   1   0.86389 5.4212 -255.49
## 
## Step:  AIC=-272.83
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + nc:cll + ml:nl + 
##     ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - nc:cll    1   0.00738 4.5655 -274.66
## - mc:cll    1   0.02100 4.5791 -274.37
## - ml:cll    1   0.05974 4.6179 -273.52
## <none>                  4.5581 -272.82
## - I(cll^2)  1   0.09907 4.6572 -272.68
## - mc:nc     1   0.12869 4.6868 -272.04
## - ml:nl     1   0.13044 4.6886 -272.00
## - nc:ml     1   0.13871 4.6969 -271.83
## - nc:nl     1   0.16108 4.7192 -271.35
## - nl:cll    1   0.17479 4.7329 -271.06
## - mc:ml     1   0.51177 5.0699 -264.19
## - I(ml^2)   1   0.61947 5.1776 -262.08
## - I(mc^2)   1   0.91497 5.4731 -256.53
## 
## Step:  AIC=-274.66
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + mc:cll + nc:ml + nc:nl + ml:nl + ml:cll + 
##     nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - mc:cll    1   0.02169 4.5872 -276.19
## - ml:cll    1   0.07384 4.6394 -275.06
## <none>                  4.5655 -274.66
## - I(cll^2)  1   0.09969 4.6652 -274.50
## - mc:nc     1   0.12428 4.6898 -273.98
## - ml:nl     1   0.13367 4.6992 -273.78
## - nc:nl     1   0.15570 4.7212 -273.31
## - nl:cll    1   0.16929 4.7348 -273.02
## - nc:ml     1   0.39676 4.9623 -268.33
## - I(ml^2)   1   0.65650 5.2220 -263.23
## - mc:ml     1   1.14985 5.7154 -254.20
## - I(mc^2)   1   1.49542 6.0610 -248.33
## 
## Step:  AIC=-276.19
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + ml:cll + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - ml:cll    1   0.07030 4.6575 -276.67
## - I(cll^2)  1   0.07811 4.6653 -276.50
## <none>                  4.5872 -276.19
## - ml:nl     1   0.11425 4.7015 -275.73
## - mc:nc     1   0.14213 4.7294 -275.14
## - nl:cll    1   0.14783 4.7351 -275.02
## - nc:nl     1   0.14884 4.7361 -275.00
## - nc:ml     1   0.40464 4.9919 -269.74
## - I(ml^2)   1   0.64508 5.2323 -265.03
## - I(mc^2)   1   1.50089 6.0881 -249.88
## - mc:ml     1   1.51718 6.1044 -249.62
## 
## Step:  AIC=-276.67
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + I(cll^2) + 
##     mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
## 
##            Df Sum of Sq    RSS     AIC
## - I(cll^2)  1   0.03019 4.6877 -278.02
## <none>                  4.6575 -276.67
## - ml:nl     1   0.09566 4.7532 -276.64
## - nl:cll    1   0.12936 4.7869 -275.93
## - nc:nl     1   0.13195 4.7895 -275.88
## - mc:nc     1   0.14474 4.8023 -275.61
## - nc:ml     1   0.34893 5.0065 -271.44
## - I(ml^2)   1   0.63604 5.2936 -265.87
## - I(mc^2)   1   1.54683 6.2044 -249.99
## - mc:ml     1   1.64835 6.3059 -248.37
## 
## Step:  AIC=-278.02
## sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:nc + 
##     mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
## 
##           Df Sum of Sq    RSS     AIC
## <none>                 4.6877 -278.02
## - mc:nc    1   0.12074 4.8085 -277.48
## - ml:nl    1   0.16116 4.8489 -276.64
## - nc:nl    1   0.28518 4.9729 -274.12
## - nc:ml    1   0.50310 5.1908 -269.83
## - I(ml^2)  1   0.69800 5.3857 -266.14
## - I(mc^2)  1   1.52137 6.2091 -251.92
## - mc:ml    1   1.66571 6.3534 -249.62
## - nl:cll   1   1.71357 6.4013 -248.87
anova(m8, m9)
## Analysis of Variance Table
## 
## Model 1: sqrt(area) ~ mc + ml + nl + I(mc^2) + I(ml^2) + mc:ml
## Model 2: sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + I(ml^2) + mc:nc + 
##     mc:ml + nc:ml + nc:nl + ml:nl + nl:cll
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     93 7.3719                                  
## 2     86 4.6877  7    2.6842 7.0348 1.251e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m8)
## 
## Call:
## lm(formula = sqrt(area) ~ mc + ml + nl + I(mc^2) + I(ml^2) + 
##     mc:ml, data = mal)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84155 -0.16950  0.04039  0.16181  0.75831 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.379139   0.292470   1.296  0.19807    
## mc          -0.004508   0.123166  -0.037  0.97088    
## ml           0.540192   0.112127   4.818 5.63e-06 ***
## nl           0.370740   0.054532   6.799 9.99e-10 ***
## I(mc^2)      0.042726   0.005229   8.170 1.49e-12 ***
## I(ml^2)      0.032688   0.008679   3.766  0.00029 ***
## mc:ml       -0.073988   0.011103  -6.664 1.86e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2815 on 93 degrees of freedom
## Multiple R-squared:  0.9876, Adjusted R-squared:  0.9868 
## F-statistic:  1239 on 6 and 93 DF,  p-value: < 2.2e-16
summary(m9)
## 
## Call:
## lm(formula = sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + 
##     I(ml^2) + mc:nc + mc:ml + nc:ml + nc:nl + ml:nl + nl:cll, 
##     data = mal)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56377 -0.16721 -0.00008  0.15014  0.54414 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.54902    0.28324   1.938 0.055864 .  
## mc           0.18649    0.24126   0.773 0.441641    
## nc           0.26781    0.31486   0.851 0.397377    
## ml           0.42136    0.13027   3.235 0.001730 ** 
## nl           1.07455    0.33956   3.165 0.002148 ** 
## cll         -0.89663    0.16679  -5.376 6.45e-07 ***
## I(mc^2)      0.06696    0.01267   5.283 9.46e-07 ***
## I(ml^2)      0.04963    0.01387   3.578 0.000571 ***
## mc:nc       -0.03509    0.02358  -1.488 0.140319    
## mc:ml       -0.10865    0.01965  -5.528 3.41e-07 ***
## nc:ml        0.07250    0.02386   3.038 0.003153 ** 
## nc:nl       -0.09098    0.03977  -2.287 0.024632 *  
## ml:nl       -0.07131    0.04147  -1.719 0.089125 .  
## nl:cll       0.10381    0.01851   5.607 2.45e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2335 on 86 degrees of freedom
## Multiple R-squared:  0.9921, Adjusted R-squared:  0.991 
## F-statistic: 835.1 on 13 and 86 DF,  p-value: < 2.2e-16

Comparando modelos

##-----------------------------------------------------------------------------
## Medidas de ajuste.

measures <- function(x){
    L <- list(logLik=logLik(x),
              AIC=AIC(x),
              BIC=BIC(x),
              PRESS=press(x),
              RMSE=summary(x)$sigma,
              R2=summary(x)$r.squared,
              R2adj=summary(x)$adj.r.squared,
              npar=length(coef(x)),
              dfres=df.residual(x),
              nobs=length(fitted(x)))
    unlist(L)
}

modl <- list(m4=m4, m5=m5, m6=m6, m7=m7, m8=m8, m9=m9)
round(t(sapply(modl, measures)), 3)
##     logLik    AIC     BIC  PRESS  RMSE    R2 R2adj npar dfres nobs
## m4   3.084 17.833  49.095  8.131 0.249 0.991 0.990   11    89  100
## m5  -0.444 26.889  60.756  9.425 0.259 0.990 0.989   12    88  100
## m6 -27.673 71.346  92.187 19.104 0.331 0.983 0.982    7    93  100
## m7 -39.837 89.674 102.700 16.323 0.368 0.978 0.978    4    96  100
## m8 -11.519 39.038  59.880  9.003 0.282 0.988 0.987    7    93  100
## m9  11.117  7.765  46.843  6.379 0.233 0.992 0.991   14    86  100
## Eis a diferença entre significância estatística e significância
## prática.

##-----------------------------------------------------------------------------
## Modelo final.

anova(m4)
## Analysis of Variance Table
## 
## Response: sqrt(area)
##           Df Sum Sq Mean Sq   F value    Pr(>F)    
## mc         1 547.86  547.86 8857.5924 < 2.2e-16 ***
## nc         1   0.06    0.06    0.9283   0.33790    
## ml         1  33.08   33.08  534.8878 < 2.2e-16 ***
## nl         1   2.48    2.48   40.0304 9.822e-09 ***
## cll        1   0.12    0.12    1.9473   0.16635    
## I(mc^2)    1   0.40    0.40    6.3993   0.01318 *  
## I(ml^2)    1   1.73    1.73   27.9749 8.717e-07 ***
## mc:ml      1   3.43    3.43   55.5113 5.737e-11 ***
## nc:nl      1   0.01    0.01    0.1368   0.71239    
## nl:cll     1   1.78    1.78   28.8528 6.183e-07 ***
## Residuals 89   5.50    0.06                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m4)
## 
## Call:
## lm(formula = sqrt(area) ~ mc + nc + ml + nl + cll + I(mc^2) + 
##     I(ml^2) + mc:ml + nc:nl + nl:cll, data = mal)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55490 -0.19004  0.04119  0.16520  0.61451 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.798594   0.285393   2.798 0.006298 ** 
## mc          -0.109223   0.200735  -0.544 0.587723    
## nc           0.961161   0.229840   4.182 6.76e-05 ***
## ml           0.642210   0.105488   6.088 2.84e-08 ***
## nl           0.308865   0.155011   1.993 0.049377 *  
## cll         -0.863242   0.165664  -5.211 1.21e-06 ***
## I(mc^2)      0.052710   0.006688   7.881 7.61e-12 ***
## I(ml^2)      0.036515   0.007952   4.592 1.44e-05 ***
## mc:ml       -0.090003   0.010870  -8.280 1.16e-12 ***
## nc:nl       -0.103543   0.025598  -4.045 0.000111 ***
## nl:cll       0.098658   0.018367   5.371 6.18e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2487 on 89 degrees of freedom
## Multiple R-squared:  0.9908, Adjusted R-squared:  0.9897 
## F-statistic: 955.4 on 10 and 89 DF,  p-value: < 2.2e-16
plot(sqrt(mal$area)~fitted(m4), asp=1)
points(sqrt(mal$area)~fitted(m7), pch=3)
abline(a=0, b=1)
grid()

## identify(y=sqrt(mal$area), x=fitted(m7))

##-----------------------------------------------------------------------------
## Medidas de influência.

im <- influence.measures(m7)
summary(im)
## Potentially influential observations of
##   lm(formula = sqrt(area) ~ mc + ml + nl, data = mal) :
## 
##    dfb.1_ dfb.mc  dfb.ml  dfb.nl dffit   cov.r   cook.d  hat    
## 36  0.05   0.23    0.13   -0.28   0.29    1.13_*  0.02    0.10  
## 58  0.61   3.03_* -2.57_* -0.92   4.01_*  0.22_*  2.58_*  0.23_*
## 92 -0.19  -0.07    0.09    0.05  -0.20    1.16_*  0.01    0.11  
## 94 -0.10  -0.03    0.06    0.01  -0.11    1.18_*  0.00    0.12  
## 96  0.13   0.00   -0.06    0.00   0.13    1.21_*  0.00    0.14_*
## 98 -0.22  -0.45    0.48    0.14   0.70_*  0.81_*  0.11    0.06
## im <- influence.measures(m4)
## summary(im)

## m10 <- update(m7, data=mal[-c(58),])
m10 <- update(m7, data=mal[-c(58,98),])
## m10 <- update(m7, data=mal[-c(58,98,96),])

modl <- list(m4=m4, m5=m5, m6=m6, m7=m7,
             m8=m8, m9=m9, m10=m10)
round(t(sapply(modl, measures)), 3)
##      logLik    AIC     BIC  PRESS  RMSE    R2 R2adj npar dfres nobs
## m4    3.084 17.833  49.095  8.131 0.249 0.991 0.990   11    89  100
## m5   -0.444 26.889  60.756  9.425 0.259 0.990 0.989   12    88  100
## m6  -27.673 71.346  92.187 19.104 0.331 0.983 0.982    7    93  100
## m7  -39.837 89.674 102.700 16.323 0.368 0.978 0.978    4    96  100
## m8  -11.519 39.038  59.880  9.003 0.282 0.988 0.987    7    93  100
## m9   11.117  7.765  46.843  6.379 0.233 0.992 0.991   14    86  100
## m10 -13.964 37.927  50.852  8.416 0.285 0.987 0.986    4    94   98
##-----------------------------------------------------------------------------
## Predição por meio do modelo.

coef(m10)
## (Intercept)          mc          ml          nl 
##   0.1523111   0.2009084   0.3917068   0.3499388
predict(m10,
        newdata=list(
            mc=13,
            nl=8.3,
            ml=12.2))^2
##        1 
## 109.1489

Praticar

##-----------------------------------------------------------------------------
## Dados de qualidade de vinho.

url <- "http://www.leg.ufpr.br/~walmes/data/MontgomeryASPE5th/Example12.14.txt"
vin <- read.table(url, header=TRUE, sep="\t")
str(vin)

splom(vin)

m0 <- lm(Quality~(.)^2, data=vin)
summary(m0)

##-----------------------------------------------------------------------------
## Major League Baseball 2005 Season.
## Modelar Wins em função das demais variáveis.

url <- "http://www.leg.ufpr.br/~walmes/data/MontgomeryASPE5th/Ex12.104.txt"
da <- read.table(url, header=TRUE, sep="\t")
str(da)

##-----------------------------------------------------------------------------
## Transient point of an eletronic inverter.

url <- "http://www.leg.ufpr.br/~walmes/data/MontgomeryASPE5th/Ex12.98.txt"
da <- read.table(url, header=TRUE, sep="\t")
str(da)

## Dados do ipea.

##-----------------------------------------------------------------------------

url <- "http://www.leg.ufpr.br/~walmes/data/
business_economics_dataset/EXAMPLES/EXECSAL.DAT"
url <- paste0(strwrap(url), collapse="")

da <- read.table(url)
head(da)

## V1 : int, line
## V2 : num, salary
## V3 : int, experience (years)
## V4 : int, education (years)
## V5 : int, bonus eligibility (0=no, 1=yes)
## V6 : int, number of employees supervised
## V7 : int, corporate assets
## V8 : int, board member (0=no, 1=yes)
## V9 : int, age
## V10: int, campany profits
## V11: int, international resposability (0=no, 1=yes)
## V12: int, company's total sales

##-----------------------------------------------------------------------------

url <- "http://www.leg.ufpr.br/~walmes/data/
business_economics_dataset/EXERCICES/BESTINS.DAT"
url <- paste0(strwrap(url), collapse="")

url <- "http://www.leg.ufpr.br/~walmes/data/
business_economics_dataset/EXERCICES/CALIRAIN.DAT"
url <- paste0(strwrap(url), collapse="")

## Average Annual preciptation, y (inches)
## Altitude, x1 (feet)
## Latitude, x2 (degree)
## Distance from coast, x3 (miles)

##-----------------------------------------------------------------------------

##-----------------------------------------------------------------------------
## Informações da sessão.

Sys.time()
## [1] "2015-04-16 17:11:00 BRT"
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: i686-pc-linux-gnu (32-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=pt_BR.UTF-8       
##  [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
## [10] LC_TELEPHONE=C             LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] tcltk     methods   splines   grid      stats     graphics  grDevices utils    
##  [9] datasets  base     
## 
## other attached packages:
##  [1] wzRfun_0.4          asbio_1.1-1         multcomp_1.3-7      TH.data_1.0-5      
##  [5] mvtnorm_1.0-1       doBy_4.5-12         survival_2.37-7     reshape_0.8.5      
##  [9] plyr_1.8.1          alr3_2.0.5          car_2.0-22          gridExtra_0.9.1    
## [13] latticeExtra_0.6-26 RColorBrewer_1.0-5  lattice_0.20-29     rmarkdown_0.3.3    
## [17] knitr_1.8          
## 
## loaded via a namespace (and not attached):
##  [1] deSolve_1.11         digest_0.6.4         evaluate_0.5.5       formatR_1.0         
##  [5] htmltools_0.2.6      MASS_7.3-37          Matrix_1.1-5         multcompView_0.1-5  
##  [9] nnet_7.3-8           pixmap_0.4-11        plotrix_3.5-10       Rcpp_0.11.3         
## [13] sandwich_2.3-2       scatterplot3d_0.3-35 stringr_0.6.2        tkrplot_0.0-23      
## [17] tools_3.1.2          yaml_2.1.13          zoo_1.7-11