CE 001 - Bioestatística

Universidade Federal do Paraná
Prof. Dr. Walmes M. Zeviani
Curso de Ciências Biológicas - 2015/1
Lab. de Estatística e Geoinformação - LEG
Departamento de Estatística - UFPR

Análise descritiva dos dados do questionário

##-----------------------------------------------------------------------------
## Pacotes usados na sessão.

require(lattice)
## Loading required package: lattice
require(EnvStats)
## Loading required package: EnvStats
## 
## Attaching package: 'EnvStats'
## 
## The following object is masked from 'package:stats':
## 
##     predict.lm
require(fBasics)
## Loading required package: fBasics
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:EnvStats':
## 
##     boxcox
## 
## Loading required package: methods
## Loading required package: timeDate
## 
## Attaching package: 'timeDate'
## 
## The following objects are masked from 'package:EnvStats':
## 
##     kurtosis, skewness
## 
## Loading required package: timeSeries
## 
## Attaching package: 'fBasics'
## 
## The following object is masked from 'package:base':
## 
##     norm
##-----------------------------------------------------------------------------
## Leitura dos dados.

da <- read.table("quest.txt", sep="\t",
                 stringsAsFactors=FALSE, header=TRUE,
                 colClasses=c("character","factor","integer")[
                     c(1,2,3,2,2,2,2,2,1,2,2,2,2,2,3,3)])
str(da)
##-----------------------------------------------------------------------------
## Sexo.

tb <- xtabs(~sexo, data=da)
prop.table(tb)
## sexo
##  Feminino Masculino 
##    0.7027    0.2973
par(mfrow=c(1,2))
pie(tb)
barplot(tb, ylab="Frequência absoluta")

plot of chunk unnamed-chunk-5

layout(1)

##-----------------------------------------------------------------------------
## Origem.

tb <- xtabs(~origem, data=da)
tp <- prop.table(tb)
tp
## origem
## Curitiba ou região metropolitana                  De outro Estado 
##                           0.5405                           0.3514 
##            Do interior do Paraná 
##                           0.1081
pie(tb)

plot of chunk unnamed-chunk-5

## barplot(tb, ylab="Frequência absoluta", horiz=TRUE)
barchart(tb, xlab="Frequência absoluta")

plot of chunk unnamed-chunk-5

##-----------------------------------------------------------------------------
## Número de irmãos.

tb <- xtabs(~irmaos, data=da)
tp <- prop.table(tb)
tp
## irmaos
##       0       1       2       3 
## 0.16216 0.48649 0.27027 0.08108
barplot(tb,
        xlab="Número de irmãos",
        ylab="Frequência absoluta")

plot of chunk unnamed-chunk-5

##-----------------------------------------------------------------------------
## Para as demais variáveis.

pie(table(da$torcida))

plot of chunk unnamed-chunk-5

pie(table(da$cnh))

plot of chunk unnamed-chunk-5

pie(table(da$mora))

plot of chunk unnamed-chunk-5

pie(table(da$transporte))

plot of chunk unnamed-chunk-5

##-----------------------------------------------------------------------------
## Tabelas de frequência cruzada.

xt <- xtabs(~sexo+pet, data=da)
xt
##            pet
## sexo        Não Sim
##   Feminino    8  18
##   Masculino   6   5
addmargins(xt)
##            pet
## sexo        Não Sim Sum
##   Feminino    8  18  26
##   Masculino   6   5  11
##   Sum        14  23  37
addmargins(prop.table(xt))
##            pet
## sexo           Não    Sim    Sum
##   Feminino  0.2162 0.4865 0.7027
##   Masculino 0.1622 0.1351 0.2973
##   Sum       0.3784 0.6216 1.0000
mosaicplot(xt)

plot of chunk unnamed-chunk-5

##-----------------------------------------------------------------------------

xt <- xtabs(~cnh+sexo, data=da)
xt
##      sexo
## cnh   Feminino Masculino
##   A          1         1
##   AB         3         3
##   B         10         3
##   Não       12         4
addmargins(xt)
##      sexo
## cnh   Feminino Masculino Sum
##   A          1         1   2
##   AB         3         3   6
##   B         10         3  13
##   Não       12         4  16
##   Sum       26        11  37
addmargins(prop.table(xt))
##      sexo
## cnh   Feminino Masculino     Sum
##   A    0.02703   0.02703 0.05405
##   AB   0.08108   0.08108 0.16216
##   B    0.27027   0.08108 0.35135
##   Não  0.32432   0.10811 0.43243
##   Sum  0.70270   0.29730 1.00000
mosaicplot(xt)

plot of chunk unnamed-chunk-5

da$cnhs <- da$cnh
levels(da$cnhs) <- c("S","S","S","N")

xt <- xtabs(~cnhs+sexo, data=da)
xt
##     sexo
## cnhs Feminino Masculino
##    S       14         7
##    N       12         4
addmargins(xt)
##      sexo
## cnhs  Feminino Masculino Sum
##   S         14         7  21
##   N         12         4  16
##   Sum       26        11  37
addmargins(prop.table(xt))
##      sexo
## cnhs  Feminino Masculino    Sum
##   S     0.3784    0.1892 0.5676
##   N     0.3243    0.1081 0.4324
##   Sum   0.7027    0.2973 1.0000
mosaicplot(xt)

plot of chunk unnamed-chunk-5

##-----------------------------------------------------------------------------
## Histogramas.

hist(da$alt, xlab="Altura (cm)")
rug(da$alt)

plot of chunk unnamed-chunk-5

hist(da$peso, xlab="Peso (kg)")
rug(da$peso)

plot of chunk unnamed-chunk-5

plot(ecdf(da$alt))

plot of chunk unnamed-chunk-5

plot(ecdf(da$peso))

plot of chunk unnamed-chunk-5

##-----------------------------------------------------------------------------
## Tabela de frequência.

x <- cut(da$alt, breaks=seq(150, 200, 10))
fq <- xtabs(~x)

cbind(f=fq, Fq=cumsum(fq), fr=prop.table(fq), Fr=cumsum(prop.table(fq)))
##            f Fq      fr     Fr
## (150,160]  4  4 0.10811 0.1081
## (160,170] 17 21 0.45946 0.5676
## (170,180] 11 32 0.29730 0.8649
## (180,190]  3 35 0.08108 0.9459
## (190,200]  2 37 0.05405 1.0000
##-----------------------------------------------------------------------------
## Densidade Kernel.

plot(density(da$alt, kernel="rectangular")); rug(da$alt)

plot of chunk unnamed-chunk-5

plot(density(da$alt, kernel="gaussian")); rug(da$alt)

plot of chunk unnamed-chunk-5

plot(density(da$alt, kernel="gaussian", width=2)); rug(da$alt)

plot of chunk unnamed-chunk-5

plot(density(da$alt, kernel="gaussian", width=10)); rug(da$alt)

plot of chunk unnamed-chunk-5

plot(density(da$alt, kernel="gaussian", width=20)); rug(da$alt)

plot of chunk unnamed-chunk-5

##-----------------------------------------------------------------------------
## Diagramas de dispersão.

plot(peso~alt, data=da, xlab="Altura (cm)", ylab="Peso (kg)")

plot of chunk unnamed-chunk-5

xyplot(peso~alt, data=da, xlab="Altura (cm)", ylab="Peso (kg)")

plot of chunk unnamed-chunk-5

xyplot(peso~alt, data=da, groups=sexo,
       xlab="Altura (cm)", ylab="Peso (kg)")

plot of chunk unnamed-chunk-5

xyplot(peso~alt|sexo, data=da,
       ## type=c("p","smooth"),
       xlab="Altura (cm)", ylab="Peso (kg)")

plot of chunk unnamed-chunk-5

##-----------------------------------------------------------------------------

boxplot(alt~sexo, data=da)

plot of chunk unnamed-chunk-5

bwplot(alt~sexo, data=da, pch="|")

plot of chunk unnamed-chunk-5

bwplot(peso~sexo, data=da, pch="|")

plot of chunk unnamed-chunk-5

##-----------------------------------------------------------------------------
## Tratamento dos dados de comprimento na mão.

da$mao
##  [1] "16; 12"     "18.9;11.6"  "14.0; 10.3" "23; 13.5"   "17;12"      "18.5;12.5" 
##  [7] "11.5;17.5"  "18;12"      "15;9.5"     "21; 13"     "18.5;12.2"  "11.2;16.5" 
## [13] "14;9"       "16.4; 12.9" "15.3; 10.5" "16;9"       "15.5;12"    "17; 11.5"  
## [19] "17.5;12"    "21.3;16.3"  "14.8;10"    "15.5;10.0"  "15.7;10.4"  "15,8; 10,5"
## [25] "19;12.1"    "19.5;11.95" "1.05"       "16.4;11"    "23.7;14.9"  "20;13.3"   
## [31] "22.4;15.4"  "21,1; 13"   "18,5; 13,5" "17.5;12.5"  "15.4;10,5"  "20.3;12.4" 
## [37] "23.2;16.2"
s <- strsplit(da$mao, split=";")
s <- lapply(s,
            function(x){
                x <- as.numeric(gsub(",", ".", x))
                as.data.frame(rbind(x))
            })

s <- do.call(plyr::rbind.fill, s)
names(s) <- c("polmin","indmin")
str(s)
## 'data.frame':    37 obs. of  2 variables:
##  $ polmin: num  16 18.9 14 23 17 18.5 11.5 18 15 21 ...
##  $ indmin: num  12 11.6 10.3 13.5 12 12.5 17.5 12 9.5 13 ...
da <- cbind(da, s)

##-----------------------------------------------------------------------------

hist(da$polmin)

plot of chunk unnamed-chunk-5

hist(da$indmin)

plot of chunk unnamed-chunk-5

xyplot(polmin~indmin, data=da,
       xlab="Polegar - mínimo (cm)", ylab="Indicador - mínimo (cm)")

plot of chunk unnamed-chunk-5

## Razão áurea: (1+sqrt(5))/2.
da$r <- da$polmin/da$indmin

hist(da$r); rug(da$r)

plot of chunk unnamed-chunk-5

plot(ecdf(da$r)); rug(da$r)

plot of chunk unnamed-chunk-5

boxplot(da$r)

plot of chunk unnamed-chunk-5

##-----------------------------------------------------------------------------
## Medidas descritivas.

basicStats(x=da$r)
##              X..da.r
## nobs        37.00000
## NAs          1.00000
## Minimum      0.65714
## Maximum      1.77778
## 1. Quartile  1.41250
## 3. Quartile  1.57242
## Mean         1.45226
## Median       1.48545
## Sum         52.28125
## SE Mean      0.03732
## LCL Mean     1.37649
## UCL Mean     1.52803
## Variance     0.05015
## Stdev        0.22394
## Skewness    -2.21398
## Kurtosis     5.62807
summaryStats(da$r)
##       N  Mean    SD Median   Min   Max NA's N.Total
## da$r 36 1.452 0.224  1.485 0.657 1.778    1      37
## stem(da$r)