#require(ASOR)
require(MASS)
#Attach("../.R_Store")
#find("Iowa")
Iowa <- read.csv("../CSV/Iowa.csv")
names(Iowa)
head(Iowa)
iowa.lm1 <- lm(Yield ~ ., Iowa)
iowa.step <- stepAIC(iowa.lm1, scope = list(lower = ~ Year,
upper = ~.), k = log(nrow(Iowa)), trace = F)
dropterm(iowa.step, test = "F", k = log(nrow(Iowa)), sorted = T)
## now inspect for possible curvatures with GAM's
## note: tendency to use simple terms and avoid interactions
## possile way around is to redefine variables such that the resultant ones
## are likely to have additive effects
##
library(mgcv)
##alternatives are vgam and gam
## here the automatic procedure to fins knots does not work (samll data)
## so knots are held fixed
## default family is gaussian
iowa.gam <- gam(Yield ~ s(Temp4,k=5) + s(Rain0,k=5) +
s(Rain2,k=5) + s(Year,k=5),
data = Iowa, trace=T)
## plot functions available -- allows inspecting the representations of each contribution
par(mfrow = c(2,2))
plot(iowa.gam, se = T, ylim = c(-30, 30), resid = TRUE, pch=19)
par(mfrow = c(2,2))
plot(iowa.gam, se = T, resid = T)
summary(iowa.gam)
## effect of each covariate can be discussed, in particular bits where it remains flat
## for Year the increased stoped during the 2nd world war -- work stoped in favor of other areas
## this kind of behaviour is not captured by polinomial regressions
## reinforces the idea of using GAM for exploration, detective work
library(splines)
iowa.ns <- lm(Yield ~ ns(Temp4, df=3) + ns(Rain0, df=3) + ns(Rain2, df = 3) + ns(Year, df=3), Iowa)
termplot(iowa.ns, se=T, partial.resid = T, rug = T)
dropterm(iowa.ns, test = "F", k = log(nrow(Iowa)))
# 8.8 Additive models
rock.lm <- lm(log(perm) ~ area + peri + shape, data = rock)
summary(rock.lm)
rock.gam <- gam(log(perm) ~ s(area) + s(peri) + s(shape),
control = gam.control(maxit = 50), data = rock)
summary(rock.gam)
anova(rock.lm, rock.gam)
par(mfrow = c(2, 3), pty = "s")
plot(rock.gam, se = T, resid = T)
rock.gam1 <- gam(log(perm) ~ area + peri + s(shape), data = rock)
summary(rock.gam1)
anova(rock.gam1)
plot(rock.gam1, se = T, all.terms = T, resid = T)
anova(rock.lm, rock.gam1, rock.gam)
## another example
## try to build a predictor for the proportion between the 2 species as a function of ...
## ... position (lat/long), PTime (time of the year when the catch is made, number of days from the start of the year -- periodic?)
## Depth, Rland (distance from coast), Mud (kind of botton -- percentage of mud)
## the last tree are (partially) functions of lat/long
## so lat/long is reserved to capture anything which may be left
## data on proportion of 1 specie w.r.t total numbers
npf()
## the species move around to breed and grow -- direction generally to off-shore
## in derection of deeper waters
names(TigersW)
## V[Y] = \mu(1-\mu) / T/\psi quasibinomial(logit)
## te is a tensor spline
npf.gam <- gam(Psem/Tiger ~ te(PDay, Rland) + te(PDay, Depth_av) + s(Mud_av) +
s(Longitude, Latitude), family=quasibinomial, data=TigersW,
weights=Tiger, trace=T)
## Psem : weights of one specie and Tiget the total weight
## av stands for average value over the cell
## s(Longitude, Latitude) means an isotropic term so they need to be
## measure in the same scale ==> s(,) forces isotropy -- isotropic basis functions
plot(npf.gam)
## red lines are conf. intervals for the dakr contour lines
## mud_av indicates specie favor mud areas
##
Aus(add=T, col="gold")
## or other way to look at this
vis.gam(npf.gam) ## indicates the periodic behavior
vis.gam(npf.gam, c("PDay","Rland"))
vis.gam(npf.gam, c("PDay","Depth_av"))
## no variation in proportions in shalow areas but yes in deeper areas
## in practice difficult to have more than 2 pred. in 1 term
## lots of try an error and tentative fits
## usual reccomendations for caution
## there is a connection with R.Ef. model through the penalty term
## R.Ef. uses penalty given by the shrinkage given by the r.ef. distribution