Regressão logística
# Cria a variável binária para o descrecho.
levels(da$Imm_rej)
## [1] "No immunological rejection" "Yes for immunological rejection"
da$y <- as.integer(da$Imm_rej) - 1
# Passa variável para escala numérica.
if (is.factor(da$HLA_MM)) {
z <- gsub("\\D", "", da$HLA_MM)
z[z == ""] <- "0"
z <- as.integer(z)
da$HLA_MM <- z
}
table(da$y)
##
## 0 1
## 37 30
# Encontra missings.
i <- complete.cases(da)
which(!i)
## [1] 16 36 67
da[!i, ]
## ID Age Gender Weight BMI Height N_pre_Tx Re_Tx
## 16 TXR1R16 52 F 76 Overweight 162 0 no
## 36 TXR1R36 41 F 65 Normal weight 164 0 no
## 67 TXR1R67 35 M 57 Normal weight 165 0 no
## N_pre_blood_tx Multip N_pregn Abortions N_abortions PRA_30
## 16 0 yes 4 yes 2 high PRA
## 36 0 yes 2 no 0 high PRA
## 67 2 no 0 no 0 low PRA
## DSAtotal Hd_CAPD Time_dialysis Ischemia HLA_MM D_typeI
## 16 yes total DSA Hd 83 19:07:00.00 5 Deceased donor
## 36 no total DSA Hd 15 04:14:00.00 NA Deceased donor
## 67 no total DSA Hd NA 19:48:00.00 NA Deceased donor
## D_gender D_age Diff_age Diff_weight Diff_height B_Hypert B_DM B_DLP
## 16 F 59 7 26 7 yes no no
## 36 M 66 25 0 4 yes no no
## 67 F 46 11 11 0 yes no no
## B_GNC B_PN B_PKD B_Hypert_nephro B_G_other Imm_rej_type
## 16 yes no no no yes <NA>
## 36 yes no no no yes RCA
## 67 no no no no yes No immunological rejection
## Imm_rej Graft_lost Death Sec_comp_rej IschemiaH
## 16 Yes for immunological rejection 1 0 <NA> 19.116667
## 36 Yes for immunological rejection 0 0 PNA 4.233333
## 67 No immunological rejection 0 0 Dengue 19.800000
## y
## 16 1
## 36 1
## 67 0
# Elimina missings.
db <- da[-c(36, 67), ]
#-----------------------------------------------------------------------
# Avalia a inclusão de uma variável por vez.
# Modelo nulo (apenas intercepto).
m0 <- glm(y ~ 1,
data = db,
family = binomial)
summary(m0)
##
## Call:
## glm(formula = y ~ 1, family = binomial, data = db)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.087 -1.087 -1.087 1.270 1.270
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2162 0.2495 -0.867 0.386
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 89.354 on 64 degrees of freedom
## Residual deviance: 89.354 on 64 degrees of freedom
## AIC: 91.354
##
## Number of Fisher Scoring iterations: 3
# Variáveis a serem excluídas do escopo.
v <- c(1, 18, 34:38, 40)
# Formula.
scope <- as.formula(sprintf(". ~ %s",
paste(names(db)[-v],
collapse = " + ")))
# Modelo testando cada uma das variáveis entrando.
m0a <- add1(m0, scope = scope, test = "LRT")
m0a
## Single term additions
##
## Model:
## y ~ 1
## Df Deviance AIC LRT Pr(>Chi)
## <none> 89.354 91.354
## Age 1 88.942 92.942 0.4119 0.520998
## Gender 1 82.631 86.631 6.7228 0.009519 **
## Weight 1 86.003 90.003 3.3513 0.067153 .
## BMI 3 88.553 96.553 0.8013 0.849151
## Height 1 86.954 90.954 2.3998 0.121348
## N_pre_Tx 1 88.926 92.926 0.4281 0.512905
## Re_Tx 1 88.984 92.984 0.3699 0.543061
## N_pre_blood_tx 1 89.070 93.070 0.2838 0.594200
## Multip 1 89.103 93.103 0.2513 0.616180
## N_pregn 1 88.169 92.169 1.1847 0.276406
## Abortions 1 89.277 93.277 0.0772 0.781187
## N_abortions 1 88.601 92.601 0.7530 0.385533
## PRA_30 1 89.070 93.070 0.2839 0.594181
## DSAtotal 1 85.880 89.880 3.4739 0.062345 .
## Hd_CAPD 3 88.568 96.568 0.7861 0.852787
## Time_dialysis 1 84.931 88.931 4.4224 0.035470 *
## HLA_MM 1 82.361 86.361 6.9933 0.008181 **
## D_typeI 1 87.109 91.109 2.2448 0.134064
## D_gender 1 89.259 93.259 0.0949 0.758090
## D_age 1 82.722 86.722 6.6313 0.010020 *
## Diff_age 1 88.597 92.597 0.7569 0.384309
## Diff_weight 1 89.321 93.321 0.0327 0.856399
## Diff_height 1 88.868 92.868 0.4858 0.485817
## B_Hypert 1 87.915 91.915 1.4390 0.230301
## B_DM 1 89.302 93.302 0.0520 0.819646
## B_DLP 1 89.005 93.005 0.3485 0.554950
## B_GNC 1 89.121 93.121 0.2331 0.629249
## B_PN 1 83.111 87.111 6.2428 0.012470 *
## B_PKD 1 89.344 93.344 0.0098 0.920988
## B_Hypert_nephro 1 88.227 92.227 1.1268 0.288465
## B_G_other 1 89.288 93.288 0.0654 0.798132
## IschemiaH 1 89.082 93.082 0.2714 0.602365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Variáveis com significância inferior a 0.25.
subset(m0a, `Pr(>Chi)` <= 0.25)
## Df Deviance AIC LRT Pr(>Chi)
## Gender 1 82.631 86.631 6.7228 0.009519 **
## Weight 1 86.003 90.003 3.3513 0.067153 .
## Height 1 86.954 90.954 2.3998 0.121348
## DSAtotal 1 85.880 89.880 3.4739 0.062345 .
## Time_dialysis 1 84.931 88.931 4.4224 0.035470 *
## HLA_MM 1 82.361 86.361 6.9933 0.008181 **
## D_typeI 1 87.109 91.109 2.2448 0.134064
## D_age 1 82.722 86.722 6.6313 0.010020 *
## B_Hypert 1 87.915 91.915 1.4390 0.230301
## B_PN 1 83.111 87.111 6.2428 0.012470 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#-----------------------------------------------------------------------
# Modelo "feito a mão".
# Variáveis selecionadas (p-valor <= 0.25) com adição de mais algumas.
hand <- y ~ Gender + Weight + Height + DSAtotal + Time_dialysis +
HLA_MM + D_typeI + D_age + B_Hypert + B_PN + N_pre_Tx +
N_pre_blood_tx + N_pregn + N_abortions
m1 <- glm(hand,
data = db,
family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(m1)
##
## Call:
## glm(formula = hand, family = binomial, data = db)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8288 -0.5106 0.0000 0.3638 1.7600
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.35436 10.87329 -0.400 0.6888
## GenderM 2.01062 1.61317 1.246 0.2126
## Weight -0.04203 0.05002 -0.840 0.4007
## Height -0.03581 0.06528 -0.549 0.5833
## DSAtotalyes total DSA 21.92967 2401.05888 0.009 0.9927
## Time_dialysis 0.01615 0.01219 1.325 0.1852
## HLA_MM 1.69127 0.78186 2.163 0.0305 *
## D_typeIDeceased donor -2.69949 1.70826 -1.580 0.1140
## D_age 0.10908 0.04696 2.323 0.0202 *
## B_Hypertyes 2.66281 2.26348 1.176 0.2394
## B_PNyes -42.03483 3872.54389 -0.011 0.9913
## N_pre_Tx -2.13853 1.52599 -1.401 0.1611
## N_pre_blood_tx 0.37746 0.26152 1.443 0.1489
## N_pregn -0.53132 0.46041 -1.154 0.2485
## N_abortions 4.81725 3.10153 1.553 0.1204
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 89.354 on 64 degrees of freedom
## Residual deviance: 42.837 on 50 degrees of freedom
## AIC: 72.837
##
## Number of Fisher Scoring iterations: 18
# Teste marginal para o abandono de cada termo do modelo por LRT.
drop1(m1, test = "Chisq")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Single term deletions
##
## Model:
## y ~ Gender + Weight + Height + DSAtotal + Time_dialysis + HLA_MM +
## D_typeI + D_age + B_Hypert + B_PN + N_pre_Tx + N_pre_blood_tx +
## N_pregn + N_abortions
## Df Deviance AIC LRT Pr(>Chi)
## <none> 42.837 72.837
## Gender 1 44.536 72.536 1.6991 0.1924059
## Weight 1 43.583 71.583 0.7461 0.3877000
## Height 1 43.150 71.150 0.3133 0.5756917
## DSAtotal 1 56.085 84.085 13.2480 0.0002729 ***
## Time_dialysis 1 44.714 72.714 1.8771 0.1706698
## HLA_MM 1 52.345 80.345 9.5087 0.0020451 **
## D_typeI 1 45.840 73.840 3.0038 0.0830708 .
## D_age 1 50.330 78.330 7.4935 0.0061921 **
## B_Hypert 1 44.507 72.507 1.6708 0.1961508
## B_PN 1 58.991 86.991 16.1547 5.837e-05 ***
## N_pre_Tx 1 44.999 72.999 2.1623 0.1414319
## N_pre_blood_tx 1 45.175 73.175 2.3380 0.1262475
## N_pregn 1 44.419 72.419 1.5823 0.2084338
## N_abortions 1 46.612 74.612 3.7754 0.0520118 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hand <- y ~ Gender + DSAtotal + Time_dialysis + HLA_MM + D_typeI +
D_age + B_PN + N_abortions
m2 <- update(m1, hand)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
drop1(m2, test = "Chisq")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Single term deletions
##
## Model:
## y ~ Gender + DSAtotal + Time_dialysis + HLA_MM + D_typeI + D_age +
## B_PN + N_abortions
## Df Deviance AIC LRT Pr(>Chi)
## <none> 49.826 67.826
## Gender 1 51.643 67.643 1.8165 0.1777331
## DSAtotal 1 62.273 78.273 12.4471 0.0004186 ***
## Time_dialysis 1 51.111 67.111 1.2852 0.2569275
## HLA_MM 1 57.291 73.291 7.4651 0.0062906 **
## D_typeI 1 50.604 66.604 0.7783 0.3776724
## D_age 1 56.296 72.296 6.4699 0.0109717 *
## B_PN 1 63.082 79.082 13.2562 0.0002717 ***
## N_abortions 1 50.887 66.887 1.0606 0.3030800
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m2)
##
## Call:
## glm(formula = y ~ Gender + DSAtotal + Time_dialysis + HLA_MM +
## D_typeI + D_age + B_PN + N_abortions, family = binomial,
## data = db)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.61548 -0.61233 -0.00016 0.58152 1.99117
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.721e+00 2.338e+00 -3.302 0.00096 ***
## GenderM 1.116e+00 8.443e-01 1.322 0.18626
## DSAtotalyes total DSA 2.065e+01 2.648e+03 0.008 0.99378
## Time_dialysis 1.102e-02 9.934e-03 1.109 0.26726
## HLA_MM 9.461e-01 4.379e-01 2.161 0.03072 *
## D_typeIDeceased donor -8.650e-01 9.918e-01 -0.872 0.38310
## D_age 7.984e-02 3.433e-02 2.325 0.02005 *
## B_PNyes -3.606e+01 4.323e+03 -0.008 0.99334
## N_abortions 1.105e+00 1.166e+00 0.948 0.34323
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 89.354 on 64 degrees of freedom
## Residual deviance: 49.826 on 56 degrees of freedom
## AIC: 67.826
##
## Number of Fisher Scoring iterations: 18
#-----------------------------------------------------------------------
# Modelo resultado do forward.
form <- as.formula(sprintf("y ~ %s",
paste(names(db)[-v],
collapse = " + ")))
m00 <- glm(form,
data = db,
family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# Seleção forward.
m0s <- step(m0, direction = "forward", scope = scope)
## Start: AIC=91.35
## y ~ 1
##
## Df Deviance AIC
## + HLA_MM 1 82.361 86.361
## + Gender 1 82.631 86.631
## + D_age 1 82.722 86.722
## + B_PN 1 83.111 87.111
## + Time_dialysis 1 84.931 88.931
## + DSAtotal 1 85.880 89.880
## + Weight 1 86.003 90.003
## + Height 1 86.954 90.954
## + D_typeI 1 87.109 91.109
## <none> 89.354 91.354
## + B_Hypert 1 87.915 91.915
## + N_pregn 1 88.169 92.169
## + B_Hypert_nephro 1 88.227 92.227
## + Diff_age 1 88.597 92.597
## + N_abortions 1 88.601 92.601
## + Diff_height 1 88.868 92.868
## + N_pre_Tx 1 88.926 92.926
## + Age 1 88.942 92.942
## + Re_Tx 1 88.984 92.984
## + B_DLP 1 89.005 93.005
## + PRA_30 1 89.070 93.070
## + N_pre_blood_tx 1 89.070 93.070
## + IschemiaH 1 89.082 93.082
## + Multip 1 89.103 93.103
## + B_GNC 1 89.121 93.121
## + D_gender 1 89.259 93.259
## + Abortions 1 89.277 93.277
## + B_G_other 1 89.288 93.288
## + B_DM 1 89.302 93.302
## + Diff_weight 1 89.321 93.321
## + B_PKD 1 89.344 93.344
## + BMI 3 88.553 96.553
## + Hd_CAPD 3 88.568 96.568
##
## Step: AIC=86.36
## y ~ HLA_MM
##
## Df Deviance AIC
## + B_PN 1 74.304 80.304
## + DSAtotal 1 77.871 83.871
## + Gender 1 78.377 84.377
## + Time_dialysis 1 78.561 84.561
## + D_age 1 78.671 84.671
## <none> 82.361 86.361
## + Weight 1 80.405 86.405
## + B_Hypert 1 80.808 86.808
## + N_pregn 1 80.994 86.994
## + Height 1 81.046 87.046
## + B_GNC 1 81.347 87.347
## + D_typeI 1 81.560 87.560
## + N_abortions 1 81.670 87.670
## + PRA_30 1 81.681 87.681
## + B_Hypert_nephro 1 81.838 87.838
## + Diff_height 1 81.910 87.910
## + Diff_age 1 82.055 88.055
## + B_DLP 1 82.125 88.125
## + Multip 1 82.162 88.162
## + Diff_weight 1 82.165 88.165
## + N_pre_blood_tx 1 82.212 88.212
## + B_G_other 1 82.237 88.237
## + Abortions 1 82.238 88.238
## + Re_Tx 1 82.249 88.249
## + N_pre_Tx 1 82.255 88.255
## + Age 1 82.261 88.261
## + B_PKD 1 82.329 88.329
## + D_gender 1 82.357 88.357
## + IschemiaH 1 82.357 88.357
## + B_DM 1 82.358 88.358
## + Hd_CAPD 3 78.925 88.925
## + BMI 3 80.214 90.214
##
## Step: AIC=80.3
## y ~ HLA_MM + B_PN
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## + DSAtotal 1 62.053 70.053
## + Time_dialysis 1 69.851 77.851
## + B_GNC 1 70.877 78.877
## + Gender 1 70.945 78.945
## + D_age 1 71.517 79.517
## + N_pregn 1 71.855 79.855
## <none> 74.304 80.304
## + B_Hypert 1 73.085 81.085
## + PRA_30 1 73.284 81.284
## + N_pre_blood_tx 1 73.422 81.422
## + Multip 1 73.621 81.621
## + Weight 1 73.693 81.693
## + B_DLP 1 73.811 81.811
## + B_G_other 1 73.881 81.881
## + N_abortions 1 73.927 81.927
## + N_pre_Tx 1 73.934 81.934
## + D_typeI 1 74.049 82.049
## + Re_Tx 1 74.077 82.077
## + Diff_height 1 74.144 82.144
## + Diff_weight 1 74.160 82.160
## + B_Hypert_nephro 1 74.176 82.176
## + D_gender 1 74.215 82.215
## + Height 1 74.238 82.238
## + B_DM 1 74.240 82.240
## + Age 1 74.241 82.241
## + Abortions 1 74.288 82.288
## + Diff_age 1 74.297 82.297
## + B_PKD 1 74.299 82.299
## + IschemiaH 1 74.304 82.304
## + Hd_CAPD 3 70.929 82.929
## + BMI 3 72.244 84.244
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=70.05
## y ~ HLA_MM + B_PN + DSAtotal
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## + D_age 1 54.034 64.034
## + Gender 1 59.234 69.234
## <none> 62.053 70.053
## + Time_dialysis 1 60.152 70.152
## + N_pregn 1 60.302 70.302
## + N_pre_blood_tx 1 61.117 71.117
## + B_GNC 1 61.255 71.255
## + B_Hypert 1 61.297 71.297
## + Weight 1 61.515 71.515
## + N_abortions 1 61.568 71.568
## + Diff_height 1 61.634 71.634
## + B_Hypert_nephro 1 61.705 71.705
## + Diff_weight 1 61.745 71.745
## + B_G_other 1 61.758 71.758
## + Multip 1 61.760 71.760
## + B_DM 1 61.826 71.826
## + D_typeI 1 61.838 71.838
## + B_DLP 1 61.957 71.957
## + Abortions 1 61.984 71.984
## + B_PKD 1 61.988 71.988
## + Re_Tx 1 62.008 72.008
## + Height 1 62.017 72.017
## + IschemiaH 1 62.024 72.024
## + Diff_age 1 62.025 72.025
## + N_pre_Tx 1 62.029 72.029
## + Age 1 62.040 72.040
## + PRA_30 1 62.040 72.040
## + D_gender 1 62.041 72.041
## + Hd_CAPD 3 58.742 72.742
## + BMI 3 60.233 74.233
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=64.03
## y ~ HLA_MM + B_PN + DSAtotal + D_age
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## <none> 54.034 64.034
## + Gender 1 52.309 64.309
## + Time_dialysis 1 52.448 64.448
## + N_pregn 1 52.614 64.614
## + B_Hypert 1 52.805 64.805
## + B_PKD 1 53.202 65.202
## + N_pre_blood_tx 1 53.371 65.371
## + Re_Tx 1 53.380 65.380
## + B_GNC 1 53.443 65.443
## + D_gender 1 53.567 65.567
## + Diff_age 1 53.620 65.620
## + Diff_height 1 53.820 65.820
## + PRA_30 1 53.835 65.835
## + Multip 1 53.836 65.836
## + B_Hypert_nephro 1 53.850 65.850
## + Diff_weight 1 53.876 65.876
## + Weight 1 53.912 65.912
## + N_abortions 1 53.922 65.922
## + D_typeI 1 53.980 65.980
## + B_DLP 1 53.985 65.985
## + IschemiaH 1 54.000 66.000
## + Height 1 54.003 66.003
## + Hd_CAPD 3 50.005 66.005
## + B_DM 1 54.011 66.011
## + B_G_other 1 54.018 66.018
## + N_pre_Tx 1 54.020 66.020
## + Abortions 1 54.032 66.032
## + Age 1 54.033 66.033
## + BMI 3 51.264 67.264
# Teste marginal.
drop1(m0s, test = "Chisq")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Single term deletions
##
## Model:
## y ~ HLA_MM + B_PN + DSAtotal + D_age
## Df Deviance AIC LRT Pr(>Chi)
## <none> 54.034 64.034
## HLA_MM 1 64.449 72.449 10.4151 0.001250 **
## B_PN 1 70.340 78.340 16.3060 5.389e-05 ***
## DSAtotal 1 71.517 79.517 17.4836 2.898e-05 ***
## D_age 1 62.053 70.053 8.0196 0.004627 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0s)
##
## Call:
## glm(formula = y ~ HLA_MM + B_PN + DSAtotal + D_age, family = binomial,
## data = db)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.38909 -0.81549 -0.00015 0.77211 1.66600
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.64246 2.44291 -3.128 0.00176 **
## HLA_MM 1.07573 0.43544 2.470 0.01349 *
## B_PNyes -37.44502 4397.55819 -0.009 0.99321
## DSAtotalyes total DSA 21.53193 2633.45558 0.008 0.99348
## D_age 0.08292 0.03259 2.544 0.01096 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 89.354 on 64 degrees of freedom
## Residual deviance: 54.034 on 60 degrees of freedom
## AIC: 64.034
##
## Number of Fisher Scoring iterations: 18
# par(mfrow = c(2, 2))
# plot(m0s)
# layout(1)
# Predição.
pred <- as.integer(fitted(m0s) > 0.5)
tb <- table(pred, m0s$model$y)
# Taxa de acerto.
100 * sum(diag(tb))/sum(tb)
## [1] 76.92308
#-----------------------------------------------------------------------
# Curvas ROC dos ajustes.
# help(package = "pROC", h = "html")
perf_m0s <- roc(response = m0s$model$y, predictor = fitted(m0s))
perf_m1 <- roc(response = m1$model$y, predictor = fitted(m1))
perf_m2 <- roc(response = m2$model$y, predictor = fitted(m2))
# Métodos e classes.
# class(perf_m0s)
# methods(class = class(perf_m0s))
# Gráfico das ROC.
plot(perf_m0s,
col = 2,
main = "ROC curve",
xlab = "Specificity",
ylab = "Sensitivity")
plot(perf_m1, col = 4, add = TRUE)
plot(perf_m2, col = 3, add = TRUE)
legend("bottomright",
legend = c("Forward", "Hand made 1", "Hand made 2"),
col = c(2, 4, 3),
lty = 1,
bty = "n")

# Para extrair as medidas da curva com área, IC e erro-padrão.
get_ROCmeasures <- function(rocobj) {
val <- c(c(ci(rocobj)), sqrt(var(rocobj)))
val <- val[c(2, 1, 3, 4)]
names(val) <- c("AUC", "lower ci", "upper ci", "std error")
val
}
# Obtém as medidas para cada um dos modelos.
sapply(list(m0s = perf_m0s,
m1 = perf_m1,
m2 = perf_m2), FUN = get_ROCmeasures)
## m0s m1 m2
## AUC 0.8754789 0.94348659 0.9090038
## lower ci 0.7935085 0.89228837 0.8386388
## upper ci 0.9574493 0.99468481 0.9793689
## std error 0.0418224 0.02612202 0.0359012
# Testa se as curvas feitas a mão diferem entre si.
roc.test(roc1 = perf_m1,
roc2 = perf_m2)
##
## DeLong's test for two correlated ROC curves
##
## data: perf_m1 and perf_m2
## Z = 1.3809, p-value = 0.1673
## alternative hypothesis: true difference in AUC is not equal to 0
## sample estimates:
## AUC of roc1 AUC of roc2
## 0.9434866 0.9090038
#-----------------------------------------------------------------------
Árvore de classificação
library(rpart)
levels(da$Gender)
## [1] "F" "M"
levels(da$DSAtotal)
## [1] "no total DSA" "yes total DSA"
fit <- rpart(hand,
data = db,
method = "class",
control = rpart.control(minsplit = 15, cp = 0.001))
printcp(fit)
##
## Classification tree:
## rpart(formula = hand, data = db, method = "class", control = rpart.control(minsplit = 15,
## cp = 0.001))
##
## Variables actually used in tree construction:
## [1] D_age Gender Time_dialysis
##
## Root node error: 29/65 = 0.44615
##
## n= 65
##
## CP nsplit rel error xerror xstd
## 1 0.310345 0 1.00000 1.00000 0.13820
## 2 0.103448 1 0.68966 1.00000 0.13820
## 3 0.068966 2 0.58621 1.06897 0.13886
## 4 0.017241 3 0.51724 0.89655 0.13620
## 5 0.001000 5 0.48276 0.86207 0.13525
plotcp(fit)

summary(fit)
## Call:
## rpart(formula = hand, data = db, method = "class", control = rpart.control(minsplit = 15,
## cp = 0.001))
## n= 65
##
## CP nsplit rel error xerror xstd
## 1 0.31034483 0 1.0000000 1.0000000 0.1381960
## 2 0.10344828 1 0.6896552 1.0000000 0.1381960
## 3 0.06896552 2 0.5862069 1.0689655 0.1388563
## 4 0.01724138 3 0.5172414 0.8965517 0.1361960
## 5 0.00100000 5 0.4827586 0.8620690 0.1352525
##
## Variable importance
## Time_dialysis D_age Gender HLA_MM N_abortions
## 42 32 13 10 2
## DSAtotal
## 1
##
## Node number 1: 65 observations, complexity param=0.3103448
## predicted class=0 expected loss=0.4461538 P(node) =1
## class counts: 36 29
## probabilities: 0.554 0.446
## left son=2 (38 obs) right son=3 (27 obs)
## Primary splits:
## D_age < 47 to the left, improve=4.491498, (0 missing)
## Gender splits as LR, improve=3.226391, (0 missing)
## HLA_MM < 3.5 to the left, improve=3.021628, (0 missing)
## Time_dialysis < 45.5 to the left, improve=2.549718, (0 missing)
## B_PN splits as RL, improve=2.156410, (0 missing)
## Surrogate splits:
## HLA_MM < 4.5 to the left, agree=0.692, adj=0.259, (0 split)
## Time_dialysis < 45.5 to the left, agree=0.615, adj=0.074, (0 split)
## N_abortions < 1.5 to the left, agree=0.615, adj=0.074, (0 split)
##
## Node number 2: 38 observations, complexity param=0.1034483
## predicted class=0 expected loss=0.2894737 P(node) =0.5846154
## class counts: 27 11
## probabilities: 0.711 0.289
## left son=4 (33 obs) right son=5 (5 obs)
## Primary splits:
## Time_dialysis < 93 to the left, improve=3.0012760, (0 missing)
## Gender splits as LR, improve=2.5789470, (0 missing)
## DSAtotal splits as LR, improve=2.0274120, (0 missing)
## HLA_MM < 0.5 to the left, improve=0.9649123, (0 missing)
## D_age < 25.5 to the left, improve=0.9649123, (0 missing)
##
## Node number 3: 27 observations, complexity param=0.06896552
## predicted class=1 expected loss=0.3333333 P(node) =0.4153846
## class counts: 9 18
## probabilities: 0.333 0.667
## left son=6 (6 obs) right son=7 (21 obs)
## Primary splits:
## Time_dialysis < 17.5 to the left, improve=1.71428600, (0 missing)
## HLA_MM < 3.5 to the left, improve=1.61538500, (0 missing)
## D_age < 48.5 to the right, improve=0.68571430, (0 missing)
## Gender splits as LR, improve=0.03947368, (0 missing)
##
## Node number 4: 33 observations, complexity param=0.01724138
## predicted class=0 expected loss=0.2121212 P(node) =0.5076923
## class counts: 26 7
## probabilities: 0.788 0.212
## left son=8 (18 obs) right son=9 (15 obs)
## Primary splits:
## Gender splits as LR, improve=1.9414140, (0 missing)
## DSAtotal splits as LR, improve=1.7731600, (0 missing)
## D_age < 30.5 to the left, improve=0.9503030, (0 missing)
## HLA_MM < 1 to the left, improve=0.5303030, (0 missing)
## Time_dialysis < 4 to the right, improve=0.4160173, (0 missing)
## Surrogate splits:
## Time_dialysis < 14.5 to the left, agree=0.697, adj=0.333, (0 split)
## HLA_MM < 2.5 to the right, agree=0.606, adj=0.133, (0 split)
## DSAtotal splits as LR, agree=0.576, adj=0.067, (0 split)
## D_age < 23.5 to the right, agree=0.576, adj=0.067, (0 split)
##
## Node number 5: 5 observations
## predicted class=1 expected loss=0.2 P(node) =0.07692308
## class counts: 1 4
## probabilities: 0.200 0.800
##
## Node number 6: 6 observations
## predicted class=0 expected loss=0.3333333 P(node) =0.09230769
## class counts: 4 2
## probabilities: 0.667 0.333
##
## Node number 7: 21 observations
## predicted class=1 expected loss=0.2380952 P(node) =0.3230769
## class counts: 5 16
## probabilities: 0.238 0.762
##
## Node number 8: 18 observations
## predicted class=0 expected loss=0.05555556 P(node) =0.2769231
## class counts: 17 1
## probabilities: 0.944 0.056
##
## Node number 9: 15 observations, complexity param=0.01724138
## predicted class=0 expected loss=0.4 P(node) =0.2307692
## class counts: 9 6
## probabilities: 0.600 0.400
## left son=18 (10 obs) right son=19 (5 obs)
## Primary splits:
## Time_dialysis < 21.5 to the right, improve=0.60000000, (0 missing)
## D_age < 32 to the left, improve=0.60000000, (0 missing)
## HLA_MM < 2.5 to the left, improve=0.08888889, (0 missing)
## D_typeI splits as LR, improve=0.02142857, (0 missing)
## Surrogate splits:
## D_age < 37 to the left, agree=0.733, adj=0.2, (0 split)
##
## Node number 18: 10 observations
## predicted class=0 expected loss=0.3 P(node) =0.1538462
## class counts: 7 3
## probabilities: 0.700 0.300
##
## Node number 19: 5 observations
## predicted class=1 expected loss=0.4 P(node) =0.07692308
## class counts: 2 3
## probabilities: 0.400 0.600
plot(fit, uniform = TRUE)
text(fit, use.n = TRUE, all = TRUE, cex = 0.8)

pred <- apply(predict(fit), MARGIN = 1, FUN = which.max) - 1
tb <- table(pred, db$y)
# Taxa de acerto.
100 * sum(diag(tb))/sum(tb)
## [1] 78.46154