#require(ASOR) require(MASS) #Attach("../.R_Store") #find("Iowa") Iowa <- read.csv("../CSV/Iowa.csv") names(Iowa) head(Iowa) iowa.lm1 <- lm(Yield ~ ., Iowa) iowa.step <- stepAIC(iowa.lm1, scope = list(lower = ~ Year, upper = ~.), k = log(nrow(Iowa)), trace = F) dropterm(iowa.step, test = "F", k = log(nrow(Iowa)), sorted = T) ## now inspect for possible curvatures with GAM's ## note: tendency to use simple terms and avoid interactions ## possile way around is to redefine variables such that the resultant ones ## are likely to have additive effects ## library(mgcv) ##alternatives are vgam and gam ## here the automatic procedure to fins knots does not work (samll data) ## so knots are held fixed ## default family is gaussian iowa.gam <- gam(Yield ~ s(Temp4,k=5) + s(Rain0,k=5) + s(Rain2,k=5) + s(Year,k=5), data = Iowa, trace=T) ## plot functions available -- allows inspecting the representations of each contribution par(mfrow = c(2,2)) plot(iowa.gam, se = T, ylim = c(-30, 30), resid = TRUE, pch=19) par(mfrow = c(2,2)) plot(iowa.gam, se = T, resid = T) summary(iowa.gam) ## effect of each covariate can be discussed, in particular bits where it remains flat ## for Year the increased stoped during the 2nd world war -- work stoped in favor of other areas ## this kind of behaviour is not captured by polinomial regressions ## reinforces the idea of using GAM for exploration, detective work library(splines) iowa.ns <- lm(Yield ~ ns(Temp4, df=3) + ns(Rain0, df=3) + ns(Rain2, df = 3) + ns(Year, df=3), Iowa) termplot(iowa.ns, se=T, partial.resid = T, rug = T) dropterm(iowa.ns, test = "F", k = log(nrow(Iowa))) # 8.8 Additive models rock.lm <- lm(log(perm) ~ area + peri + shape, data = rock) summary(rock.lm) rock.gam <- gam(log(perm) ~ s(area) + s(peri) + s(shape), control = gam.control(maxit = 50), data = rock) summary(rock.gam) anova(rock.lm, rock.gam) par(mfrow = c(2, 3), pty = "s") plot(rock.gam, se = T, resid = T) rock.gam1 <- gam(log(perm) ~ area + peri + s(shape), data = rock) summary(rock.gam1) anova(rock.gam1) plot(rock.gam1, se = T, all.terms = T, resid = T) anova(rock.lm, rock.gam1, rock.gam) ## another example ## try to build a predictor for the proportion between the 2 species as a function of ... ## ... position (lat/long), PTime (time of the year when the catch is made, number of days from the start of the year -- periodic?) ## Depth, Rland (distance from coast), Mud (kind of botton -- percentage of mud) ## the last tree are (partially) functions of lat/long ## so lat/long is reserved to capture anything which may be left ## data on proportion of 1 specie w.r.t total numbers npf() ## the species move around to breed and grow -- direction generally to off-shore ## in derection of deeper waters names(TigersW) ## V[Y] = \mu(1-\mu) / T/\psi quasibinomial(logit) ## te is a tensor spline npf.gam <- gam(Psem/Tiger ~ te(PDay, Rland) + te(PDay, Depth_av) + s(Mud_av) + s(Longitude, Latitude), family=quasibinomial, data=TigersW, weights=Tiger, trace=T) ## Psem : weights of one specie and Tiget the total weight ## av stands for average value over the cell ## s(Longitude, Latitude) means an isotropic term so they need to be ## measure in the same scale ==> s(,) forces isotropy -- isotropic basis functions plot(npf.gam) ## red lines are conf. intervals for the dakr contour lines ## mud_av indicates specie favor mud areas ## Aus(add=T, col="gold") ## or other way to look at this vis.gam(npf.gam) ## indicates the periodic behavior vis.gam(npf.gam, c("PDay","Rland")) vis.gam(npf.gam, c("PDay","Depth_av")) ## no variation in proportions in shalow areas but yes in deeper areas ## in practice difficult to have more than 2 pred. in 1 term ## lots of try an error and tentative fits ## usual reccomendations for caution ## there is a connection with R.Ef. model through the penalty term ## R.Ef. uses penalty given by the shrinkage given by the r.ef. distribution