#Problema de Regressão Caso 3: Dados airquality do pacote MASS#
Conjunto de dados que tem como variável resposta a concentração de ozônio em partes por bilhão no ar, representada por Ozone. O objetivo é explicar esta variável de acordo com 3 covariáveis:
Solar.R: Radiação solar
Wind: Média da velocidade do vento em milhas por hora
Temp: Temperatura máxima diária em Fahrenheit
A análise está dividida em 4 arquivos, sendo que o arquivo com:
Parte1: Análise com a covariável Temp e comparação dos modelos pela qualidade preditiva
Parte2: Análise com a covariável Temp e comparação dos modelos pela qualidade de ajuste
Parte3: Análise com as três covariáveis e comparação dos modelos pela qualidade preditiva
Parte4: Análise com as três covariáveis e comparação dos modelos pela qualidade de ajuste
pkg=c("mcglm","xtable","cluster","class","mvtnorm","ggplot2","splines","e1071","kernlab","mlbench","rpart","MASS","gtools","geoR","pROC","data.table","mgcv","gamlss.data")
sapply(pkg,require,character.only=T)
## mcglm xtable cluster class mvtnorm ggplot2
## TRUE TRUE TRUE TRUE TRUE TRUE
## splines e1071 kernlab mlbench rpart MASS
## TRUE TRUE TRUE TRUE TRUE TRUE
## gtools geoR pROC data.table mgcv gamlss.data
## TRUE TRUE TRUE TRUE TRUE TRUE
source("conditional_prediction.R")
#set.seed(13032019)
set.seed(13032019)
data("airquality")
dat=na.omit(data.frame(y=sqrt(airquality$Ozone),X1=airquality$Solar.R,X2=airquality$Wind,X3=airquality$Temp))
summary(dat)
## y X1 X2 X3
## Min. : 1.000 Min. : 7.0 Min. : 2.30 Min. :57.00
## 1st Qu.: 4.243 1st Qu.:113.5 1st Qu.: 7.40 1st Qu.:71.00
## Median : 5.568 Median :207.0 Median : 9.70 Median :79.00
## Mean : 6.016 Mean :184.8 Mean : 9.94 Mean :77.79
## 3rd Qu.: 7.874 3rd Qu.:255.5 3rd Qu.:11.50 3rd Qu.:84.50
## Max. :12.961 Max. :334.0 Max. :20.70 Max. :97.00
ggplot(data=dat,aes(x=X3,y=y))+geom_point(alpha=.5,size=4,col="black")+xlab("Temperatura em Fahrenheit")+ylab("Raiz quadrada da média de ozônio em partes por bilhão")
ggplot(data=dat,aes(x=X2,y=y))+geom_point(alpha=.5,size=4,col="black")+xlab("Velocidade do vendo em milhas por hora")+ylab("Raiz quadrada da média de ozônio em partes por bilhão")
ggplot(data=dat,aes(x=X1,y=y))+geom_point(alpha=.5,size=4,col="black")+xlab("Radiação solar")+ylab("Raiz quadrada da média de ozônio em partes por bilhão")
ggplot(data=dat,aes(x=X1*X2,y=y))+geom_point(alpha=.5,size=4,col="black")+xlab("Interação da Radiação solar com a velocidade de vento")+ylab("Raiz quadrada da média de ozônio em partes por bilhão")
ggplot(data=dat,aes(x=X1*X3,y=y))+geom_point(alpha=.5,size=4,col="black")+xlab("Interação da Radiação solar com a temperatura do ar")+ylab("Raiz quadrada da média de ozônio em partes por bilhão")
ggplot(data=dat,aes(x=X2*X3,y=y))+geom_point(alpha=.5,size=4,col="black")+xlab("Interação da velocidade do vento com a temeratura do ar")+ylab("Raiz quadrada da média de ozônio em partes por bilhão")
ggplot(data=dat,aes(x=X1*X2*X3,y=y))+geom_point(alpha=.5,size=4,col="black")+xlab("Interação das três covariáveis")+ylab("Raiz quadrada da média de ozônio em partes por bilhão")
Parece haver um padrão da variável resposta na interação da radiação solar com a temperatura e na interação da velocidade do vento com a temperatura do ar. Porém, por haver um padrão semelhante com a dispersão da resposta com apenas a covariável radiação solar e com apenas a covariável velocidade do vento, possivelmente não precisará incluir as interações no modelo, uma vez que estas covariáveis já explicam a mesma coisa.
São 111 observações no total. As mesmas serão divididas em 66 para treino ( ∼ 60%) e 45 para teste ( ∼ 40%).
#indexes<-sample(nrow(dat),floor(0.6*nrow(dat)),replace = F)
indexes=c(34,90,20,86,66,11,36,3,65,8,59,2,94,107,56,76,101,80,6,17,63,19,15,97,22,18,27,111,13,38,7,16,23,69,31,42,103,82,5,99,39,62,105,78,70,53,47,44,108,37,50,14,96,29,89,35,83,1,54,72,75,25,52,9,110,85)
Basicamente, foram ajustados vários modelos com finalidade de compara-los segundo a qualidade preditiva e qualidade de ajuste global.
Primeiramente foram ajustados modelos da classe MCGLM, os quais tem média e covariância espressos por:
E(Ozonio) = μ
V(Ozonio) = Ω
Para os MCGLMs, a média foi considerada de duas formas:
Como constante
μ = β0
ou com as covariáveis
μ = β0 + β1 * Temp + β2 * wind
Z0 <- mc_id(train)
fit.iid.0 <- mcglm(linear_pred = c(y~1), list(Z0),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .3,
"max_iter"=1000),
covariance = "identity",
control_initial = list(regression=list(c(6)),
power=list(c(0)),
tau=list(c(1)),
rho=0))
plot(fit.iid.0,'algorithm')
summary(fit.iid.0)
## Call: y ~ 1
##
## Link function: identity
## Variance function: tweedie
## Covariance function: identity
## Regression:
## Estimates Std.error Z value
## (Intercept) 6.003375 0.3053544 19.66035
##
## Dispersion:
## Estimates Std.error Z value
## 1 6.153927 0.9097819 6.764178
##
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 29
fit.norm.1=glm(y~X2+X3,data = train,family = gaussian())
form1=fit.norm.1$formula
n.cov=names(fit.norm.1$coefficients)[-1]
form1=y~X2+X3
ptm=proc.time()
fit.iid.1 <- mcglm(linear_pred = c(form1), list(Z0),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .5,
"max_iter"=1000),
covariance = "identity",
control_initial = list(regression=list(fit.norm.1$coefficients),
power=list(c(0)),
tau=list(c(5)),
rho=0))
proc.time()-ptm
## user system elapsed
## 0.98 0.02 1.06
summary(fit.iid.1)
## Call: y ~ X2 + X3
##
## Link function: identity
## Variance function: tweedie
## Covariance function: identity
## Regression:
## Estimates Std.error Z value
## (Intercept) -2.6641210 1.96173574 -1.358043
## X2 -0.2350696 0.05797174 -4.054899
## X3 0.1440339 0.02038469 7.065787
##
## Dispersion:
## Estimates Std.error Z value
## 1 1.872072 0.3311735 5.652845
##
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 16
As covariáveis foram significativas, além disso, a dispersão reduziu de 6, 15 para 1, 87.
source("conditional_prediction.R")
dat.p=rbind(train[,c("X1","X2","X3")],test[,c("X1","X2","X3")])
n.cov=c("X2","X3")
S2=fit.iid.1$Covariance*diag(nrow(dat.p))
preds=pred.conditional(X=as.matrix(test[,n.cov]),model = fit.iid.1,S2=S2,sig = F)
sum((test$y-preds$mu.est)^2)
## [1] 116.6907
Modelos com modelagem da dependência entre as observações. Neste primeiro, com kernel exponencial Kexp expresso por:
Kexp = exp( − D/ϕ)
Onde
Dij = ∥ xi − xj∥
sendo que ∥ .∥ denota a distância euclidiana.
Foi especificado um grid de 200 possíveis valores para ϕ, variando de min(D)/3 até (min(D) + max(D))/2.
D=dist(train[,c("X1","X2","X3")])
phi.grid=seq(min(D)/3,(min(D)+max(D))/2,len=200)
Calculando Kexp para cada um dos valores da sequência de ϕ
Z_exp=lapply(1:length(phi.grid), function(i) as.matrix(cov.spatial(D,cov.model = "matern",cov.pars = c(1,phi.grid[i]),kappa = .5)))
gof=lapply(1:length(Z_exp),function(i) mc_sic_covariance(fit.iid.0, scope = list(Z_exp[[i]]), idx = 1, data = train, response = 1))
sic= unlist(gof)[seq(1,(length(unlist(gof))-4),by=5)]
lattice::xyplot(sic~phi.grid,xlab = "Phi",ylab = "SIC",main="Kernel exponencial",type=c("p","l"),lwd=2,pch=19)
seq(1,length(Z_exp))[min(sic)==sic]
## [1] 26
ptm=proc.time()
fit.exp.0 <- mcglm(linear_pred = c(y~1), list(list(Z0[[1]],Z_exp[[200]])),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .1,
"max_iter"=1000,
tol=10e-7),
covariance = "identity",
control_initial = list(regression=list(fit.iid.0$Regression),
power=list(c(0)),
tau=list(c(4,2)),
rho=0))
proc.time()-ptm
## user system elapsed
## 14.30 0.28 15.12
summary(fit.exp.0)
## Warning in sqrt(diag(object$vcov)): NaNs produzidos
## Warning in sqrt(diag(object$vcov)): NaNs produzidos
## Warning in sqrt(diag(object$vcov)): NaNs produzidos
## Warning in sqrt(diag(object$vcov)): NaNs produzidos
## Call: y ~ 1
##
## Link function: identity
## Variance function: tweedie
## Covariance function: identity
## Regression:
## Estimates Std.error Z value
## (Intercept) 4.87246 3.175156 1.534557
##
## Dispersion:
## Estimates Std.error Z value
## 1 21.06273 NaN NaN
## 2 20.31458 NaN NaN
##
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 145
plot(fit.exp.0,"algorithm")
Z0p=mc_id(dat.p)
Dp=dist(dat.p)
Z_expp=as.matrix(cov.spatial(Dp,cov.model = "matern",cov.pars = c(1,phi.grid[200]),kappa = .5))
S2=fit.exp.0$Covariance[1]*Z0p[[1]]+fit.exp.0$Covariance[2]*Z_expp
preds=pred.conditional(X=as.matrix(test[,n.cov]),model = fit.exp.0,S2=as.matrix(S2),sig = F)
sum((test$y-preds$mu.est)^2)
## [1] 111.41
gof=lapply(1:length(Z_exp),function(i) mc_sic_covariance(fit.iid.1, scope = list(Z_exp[[i]]), idx = 1, data = train, response = 1))
sic= unlist(gof)[seq(1,(length(unlist(gof))-4),by=5)]
lattice::xyplot(sic~phi.grid,xlab = "Phi",ylab = "SIC",main="Kernel exponencial",type=c("p","l"),lwd=2,pch=19)
seq(1,length(Z_exp))[min(sic)==sic]
## [1] 7
ptm=proc.time()
fit.exp.1 <- mcglm(linear_pred = c(form1), list(list(Z0[[1]],Z_exp[[30]])),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .1,
"max_iter"=1000,
tol=10e-7),
covariance = "identity",
control_initial = list(regression=list(fit.iid.1$Regression),
power=list(c(0)),
tau=list(c(4,2)),
rho=0))
proc.time()-ptm
## user system elapsed
## 22.16 0.42 23.41
summary(fit.exp.1)
## Call: y ~ X2 + X3
##
## Link function: identity
## Variance function: tweedie
## Covariance function: identity
## Regression:
## Estimates Std.error Z value
## (Intercept) -1.7550978 2.06066174 -0.8517156
## X2 -0.2359486 0.05586325 -4.2236829
## X3 0.1307547 0.02260942 5.7831932
##
## Dispersion:
## Estimates Std.error Z value
## 1 1.9036099 0.3648503 5.217509
## 2 0.4750869 0.4032298 1.178204
##
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 223
plot(fit.exp.1,"algorithm")
Z_expp=as.matrix(cov.spatial(Dp,cov.model = "matern",cov.pars = c(1,phi.grid[30]),kappa = .5))
S2=fit.exp.1$Covariance[1]*Z0p[[1]]+fit.exp.1$Covariance[2]*Z_expp
preds=pred.conditional(X=as.matrix(test[,n.cov]),model = fit.exp.1,S2=as.matrix(S2),sig = F)
sum((test$y-preds$mu.est)^2)
## [1] 100.2132
Modelos com modelagem da dependência entre as observações. Neste segundo, com kernel gaussiano Kgauss expresso por:
Kexp = exp( − (D/ϕ)2)
Onde
Dij = ∥ xi − xj∥
sendo que ∥ .∥ denota a distância euclidiana.
Foi especificado um grid de 200 possíveis valores para ϕ, variando de min(D)/5, 92 até (min(D) + max(D))/2.
phi.grid=seq(min(D)/5.92,(min(D)+max(D))/2,len=200)
Calculando Kgauss para cada um dos valores da sequência de ϕ
Z_gauss=lapply(1:length(phi.grid), function(i) as.matrix(cov.spatial(D,cov.model = "matern",cov.pars = c(1,phi.grid[i]),kappa = 2.5)))
gof=lapply(1:length(Z_gauss),function(i) mc_sic_covariance(fit.iid.0, scope = list(Z_gauss[[i]]), idx = 1, data = train, response = 1))
sic= unlist(gof)[seq(1,(length(unlist(gof))-4),by=5)]
lattice::xyplot(sic~phi.grid,xlab = "Phi",ylab = "SIC",main="Kernel gaussiano",type=c("p","l"),lwd=2,pch=19)
seq(1,length(Z_gauss))[min(sic)==sic]
## [1] 11
ptm=proc.time()
fit.gauss.0 <- mcglm(linear_pred = c(y~1), list(list(Z0[[1]],Z_gauss[[200]])),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .1,
"max_iter"=1000,
tol=10e-7),
covariance = "identity",
control_initial = list(regression=list(fit.iid.0$Regression),
power=list(c(0)),
tau=list(c(4,2)),
rho=0))
proc.time()-ptm
## user system elapsed
## 23.04 0.44 24.27
summary(fit.gauss.0)
## Warning in sqrt(diag(object$vcov)): NaNs produzidos
## Warning in sqrt(diag(object$vcov)): NaNs produzidos
## Warning in sqrt(diag(object$vcov)): NaNs produzidos
## Warning in sqrt(diag(object$vcov)): NaNs produzidos
## Call: y ~ 1
##
## Link function: identity
## Variance function: tweedie
## Covariance function: identity
## Regression:
## Estimates Std.error Z value
## (Intercept) 2.470607 26.02355 0.09493737
##
## Dispersion:
## Estimates Std.error Z value
## 1 948.3891 NaN NaN
## 2 946.9229 NaN NaN
##
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 235
plot(fit.gauss.0,"algorithm")
Z_gaussp=as.matrix(cov.spatial(Dp,cov.model = "matern",cov.pars = c(1,phi.grid[200]),kappa = 2.5))
S2=fit.gauss.0$Covariance[1]*Z0p[[1]]+fit.gauss.0$Covariance[2]*Z_gaussp
preds=pred.conditional(X=as.matrix(test[,n.cov]),model = fit.gauss.0,S2=as.matrix(S2),sig = F)
sum((test$y-preds$mu.est)^2)
## [1] 95.65213
gof=lapply(1:length(Z_gauss),function(i) mc_sic_covariance(fit.iid.1, scope = list(Z_gauss[[i]]), idx = 1, data = train, response = 1))
sic= unlist(gof)[seq(1,(length(unlist(gof))-4),by=5)]
lattice::xyplot(sic~phi.grid,xlab = "Phi",ylab = "SIC",main="Kernel gaussonencial",type=c("p","l"),lwd=2,pch=19)
seq(1,length(Z_gauss))[min(sic)==sic]
## [1] 4
ptm=proc.time()
fit.gauss.1 <- mcglm(linear_pred = c(form1), list(list(Z0[[1]],Z_gauss[[20]])),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .1,
"max_iter"=1000,
tol=10e-7),
covariance = "identity",
control_initial = list(regression=list(fit.iid.1$Regression),
power=list(c(0)),
tau=list(c(4,2)),
rho=0))
proc.time()-ptm
## user system elapsed
## 25.53 0.27 26.49
summary(fit.gauss.1)
## Call: y ~ X2 + X3
##
## Link function: identity
## Variance function: tweedie
## Covariance function: identity
## Regression:
## Estimates Std.error Z value
## (Intercept) -2.1539600 1.98887540 -1.083004
## X2 -0.2358008 0.05690299 -4.143908
## X3 0.1365830 0.02113113 6.463592
##
## Dispersion:
## Estimates Std.error Z value
## 1 1.86345335 0.3415262 5.4562527
## 2 0.08929916 0.1666174 0.5359533
##
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 249
plot(fit.gauss.1,"algorithm")
Z_gaussp=as.matrix(cov.spatial(Dp,cov.model = "matern",cov.pars = c(1,phi.grid[20]),kappa = 2.5))
S2=fit.gauss.1$Covariance[1]*Z0p[[1]]+fit.gauss.1$Covariance[2]*Z_gaussp
preds=pred.conditional(X=as.matrix(test[,n.cov]),model = fit.gauss.1,S2=as.matrix(S2),sig = F)
sum((test$y-preds$mu.est)^2)
## [1] 107.3748
n.clust=seq(1,nrow(train)/2)
set.seed(222)
ag.n=lapply(1:length(n.clust), function(i) kmeans(train[,c("X1","X2","X3")],i+1,iter.max = 100))
Z_disc=lapply(1:length(n.clust), function(i) as.matrix(mc_mixed(~0+factor(id.n),data = data.frame(id.n=ag.n[[i]]$cluster))[[1]]))
gof=lapply(1:length(Z_disc),function(i) mc_sic_covariance(fit.iid.0, scope = list(Z_disc[[i]]), idx = 1, data = train, response = 1))
sic= unlist(gof)[seq(1,(length(unlist(gof))-4),by=5)]
lattice::xyplot(sic~n.clust,xlab = "Clusters",ylab = "SIC",main="Kernel via k-médias",type=c("p","l"),lwd=2,pch=19)
e=seq(1,length(Z_disc))[min(sic)==sic]
##################################
ptm=proc.time()
fit.kmedias.0 <- mcglm(linear_pred = c(y~1), list(list(Z0[[1]],Z_disc[[e]])),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .1,
"max_iter"=1000,
tol=10e-7),
covariance = "identity",
control_initial = list(regression=list(fit.iid.0$Regression),
power=list(c(0)),
tau=list(c(3,1)),
rho=0))
proc.time()-ptm
## user system elapsed
## 8.39 0.14 8.81
summary(fit.kmedias.0)
## Call: y ~ 1
##
## Link function: identity
## Variance function: tweedie
## Covariance function: identity
## Regression:
## Estimates Std.error Z value
## (Intercept) 5.692749 0.5019198 11.34195
##
## Dispersion:
## Estimates Std.error Z value
## 1 3.059849 0.9028368 3.389150
## 2 2.377775 1.2351223 1.925133
##
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 98
plot(fit.kmedias.0,"algorithm")
source("kmeansPred.R")
agp=kmeans.pred(dat.p,ag.n[[e]]$centers)
Z_discp=as.matrix(mc_mixed(~0+factor(id.n),data = data.frame(id.n=agp))[[1]])
#Z_gaussp=as.matrix(cov.spatial(Dp,cov.model = "matern",cov.pars = c(1,phi.grid[20]),kappa = 2.5))
S2=fit.kmedias.0$Covariance[1]*Z0p[[1]]+fit.kmedias.0$Covariance[2]*Z_discp
preds=pred.conditional(X=as.matrix(test[,n.cov]),model = fit.kmedias.0,S2=as.matrix(S2),sig = F)
sum((test$y-preds$mu.est)^2)
## [1] 189.1569
gof=lapply(1:length(Z_disc),function(i) mc_sic_covariance(fit.iid.1, scope = list(Z_disc[[i]]), idx = 1, data = train, response = 1))
sic= unlist(gof)[seq(1,(length(unlist(gof))-4),by=5)]
lattice::xyplot(sic~n.clust,xlab = "Clusters",ylab = "SIC",main="Kernel via k-médias",type=c("p","l"),lwd=2,pch=19)
e=seq(1,length(Z_disc))[min(sic)==sic]
##################################
ptm=proc.time()
fit.kmedias.1 <- mcglm(linear_pred = c(form1), list(list(Z0[[1]],Z_disc[[e]])),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .1,
"max_iter"=1000,
tol=10e-7),
covariance = "identity",
control_initial = list(regression=list(fit.iid.1$Regression),
power=list(c(0)),
tau=list(c(3,1)),
rho=0))
proc.time()-ptm
## user system elapsed
## 10.11 0.17 10.58
summary(fit.kmedias.1)
## Call: y ~ X2 + X3
##
## Link function: identity
## Variance function: tweedie
## Covariance function: identity
## Regression:
## Estimates Std.error Z value
## (Intercept) -2.3727759 1.94525170 -1.219778
## X2 -0.2410573 0.05539021 -4.351984
## X3 0.1398581 0.02096531 6.670932
##
## Dispersion:
## Estimates Std.error Z value
## 1 1.157778 0.3214739 3.601470
## 2 0.686048 0.3590852 1.910544
##
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 118
plot(fit.kmedias.1,"algorithm")
agp=kmeans.pred(dat.p,ag.n[[e]]$centers)
Z_discp=as.matrix(mc_mixed(~0+factor(id.n),data = data.frame(id.n=agp))[[1]])
#Z_gaussp=as.matrix(cov.spatial(Dp,cov.model = "matern",cov.pars = c(1,phi.grid[20]),kappa = 2.5))
S2=fit.kmedias.1$Covariance[1]*Z0p[[1]]+fit.kmedias.1$Covariance[2]*Z_discp
preds=pred.conditional(X=as.matrix(test[,n.cov]),model = fit.kmedias.1,S2=as.matrix(S2),sig = F)
sum((test$y-preds$mu.est)^2)
## [1] 95.69739
gof=lapply(1:length(Z_disc),function(i) mc_sic_covariance(fit.gauss.0, scope = list(Z_disc[[i]]), idx = 1, data = train, response = 1))
sic= unlist(gof)[seq(1,(length(unlist(gof))-4),by=5)]
lattice::xyplot(sic~n.clust,xlab = "Clusters",ylab = "SIC",main="Kernel via k-médias",type=c("p","l"),lwd=2,pch=19)
seq(1,length(Z_disc))[min(sic)==sic]
## [1] 3
##################################
ptm=proc.time()
fit.gauss.kmedias.0 <- mcglm(linear_pred = c(y~1), list(list(Z0[[1]],Z_gauss[[11]],Z_disc[[12]])),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .01,
"max_iter"=1000,
tol=10e-6),
covariance = "identity",
control_initial = list(regression=list(fit.iid.0$Regression),
power=list(c(0)),
tau=list(c(6,5,2)),
rho=0))
proc.time()-ptm
Modelo não convergiu.
summary(fit.gauss.kmedias.0)
plot(fit.gauss.kmedias.0,"algorithm")
gof=lapply(1:length(Z_disc),function(i) mc_sic_covariance(fit.gauss.1, scope = list(Z_disc[[i]]), idx = 1, data = train, response = 1))
sic= unlist(gof)[seq(1,(length(unlist(gof))-4),by=5)]
lattice::xyplot(sic~n.clust,xlab = "Clusters",ylab = "SIC",main="Kernel via k-médias",type=c("p","l"),lwd=2,pch=19)
e=seq(1,length(Z_disc))[min(sic)==sic]
##################################
ptm=proc.time()
fit.gauss.kmedias.1 <- mcglm(linear_pred = c(form1), list(list(Z0[[1]],Z_gauss[[4]],Z_disc[[e]])),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .05,
"max_iter"=1000,
tol=10e-7),
covariance = "identity",
control_initial = list(regression=list(fit.iid.1$Regression),
power=list(c(0)),
tau=list(c(3,1,1)),
rho=0))
proc.time()-ptm
summary(fit.gauss.kmedias.1)
plot(fit.gauss.kmedias.1,"algorithm")
gof=lapply(1:length(Z_disc),function(i) mc_sic_covariance(fit.exp.0, scope = list(Z_disc[[i]]), idx = 1, data = train, response = 1))
sic= unlist(gof)[seq(1,(length(unlist(gof))-4),by=5)]
lattice::xyplot(sic~n.clust,xlab = "Clusters",ylab = "SIC",main="Kernel via k-médias",type=c("p","l"),lwd=2,pch=19)
e=seq(1,length(Z_disc))[min(sic)==sic]
##################################
ptm=proc.time()
fit.exp.kmedias.0 <- mcglm(linear_pred = c(y~1), list(list(Z0[[1]],Z_exp[[26]],Z_disc[[e]])),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "rc",
"tuning" = .7,
"max_iter"=1000,
tol=10e-5),
covariance = "identity",
control_initial = list(regression=list(fit.iid.0$Regression),
power=list(c(0)),
tau=list(c(6,5,1)),
rho=0))
proc.time()-ptm
Modelo não convergiu
summary(fit.exp.kmedias.0)
plot(fit.exp.kmedias.0,"algorithm")
gof=lapply(1:length(Z_disc),function(i) mc_sic_covariance(fit.exp.1, scope = list(Z_disc[[i]]), idx = 1, data = train, response = 1))
sic= unlist(gof)[seq(1,(length(unlist(gof))-4),by=5)]
lattice::xyplot(sic~n.clust,xlab = "Clusters",ylab = "SIC",main="Kernel via k-médias",type=c("p","l"),lwd=2,pch=19)
e=seq(1,length(Z_disc))[min(sic)==sic]
##################################
ptm=proc.time()
fit.exp.kmedias.1 <- mcglm(linear_pred = c(form1), list(list(Z0[[1]],Z_exp[[7]],Z_disc[[e]])),
link = "identity", variance = "tweedie",
data = train, power_fixed = T,
control_algorithm = list("verbose" = F,
"method" = "chaser",
"tuning" = .05,
"max_iter"=1000,
tol=10e-7),
covariance = "identity",
control_initial = list(regression=list(fit.iid.1$Regression),
power=list(c(0)),
tau=list(c(3,1,1)),
rho=0))
proc.time()-ptm
Modelo não convergiu
summary(fit.exp.kmedias.1)
plot(fit.exp.kmedias.1,"algorithm")
fit.svm=svm(y~X1+X2+X3,kernel="radial",data = train)
pred.svm=predict(fit.svm,test,type="response")
sum((test$y-pred.svm)^2)
## [1] 87.48491
fit.gam=gam(y~s(X1)+s(X2)+s(X3),data = train,family = gaussian())
pred.gam=predict(fit.gam,test,type="response")
sum((test$y-pred.gam)^2)
## [1] 87.96051