## This script (which should turned into a vignette) ## illustrates the usage of aRT with emphasis on: ## \item reading and tidying up text files with polygon information ## \item storing on the data-base with respective attributes ## \item performing some operations on polygons such as: ## centroids, area, perimeter and neighbourhood ## \item reading and storing point data from a case-control study ## \item performing some operation between points and polygons ## SOURCE: ## ## HENDERSON, R. ; SHIMAKURA, S. ; GORST, D. ## Modelling spatial variation in Leukaemia survival data. ## Journal of the American Statistical Association, v. 97, p. 965-972, 2002. ## ## SHIMAKURA, S. Statistical methods for spatial survival data. ## PhD Thesis, Department od Mathematics and Statistics, Lancaster University, UK. 2003 rm(list=ls()) require(maptools) require(RColorBrewer) require(aRT) aRTsilent(FALSE) ## Reading R dump files with the polygons for each city ## and making a list with the polygons coordinates files <- dir(path="Polygons", pattern="dump.*.poly.unit") objs <- substring(files, 6) cities <- unlist(strsplit(objs, ".poly.unit")) cities nw <- list() for(i in 1:length(files)){ source(paste("Polygons/",files[i], sep="")) nw[[cities[i]]] <- get(objs[i]) remove(list=objs[i]) } ## computing limits and plotting the polygons using basic \R\ functions xl <- diag(apply(sapply(nw, function(x) range(x[,1], na.rm=T)),1,range)) yl <- diag(apply(sapply(nw, function(x) range(x[,2], na.rm=T)),1,range)) plot(xl,yl,type="n", asp=1) sapply(nw,polygon) ## removing object no longer needed rm(i,objs) ## converting nw to \pkg{maptools}'s \code{polylist} class ## (is there a easier way to do so???) attr(nw, "maplim") <- list(x=xl, y=yl) attr(nw, "region.id") <- sprintf("%02d",1:length(nw)) attr(nw, "plotOrder") <- 1:length(nw) attr(nw, "after") <- rep(NA, length(nw)) class(nw) <- "polylist" for(i in 1:length(nw)) attr(nw[[i]], "bbox") <- as.vector(t(apply(nw[[i]],2,range, na.rm=TRUE))) ## and now it can be more easily plotted by plot.polylist() from maptools length(nw) ## backing up ##save.image(file="nw.RData") ## restoring from backup ##load("nw.RData") ## Highlighting polygons with "problems": preston, lancaster e wyre polygon(nw$preston, col=2) polygon(nw$lancaster, col=3) polygon(nw$wyre, col=4) ## looking more closely the polygons with problems #plot(subset(nw, names(nw) == "lancaster")) #plot(subset(nw, names(nw) == "preston")) #plot(subset(nw, names(nw) == "wyre")) ## checking for each polygon whether the 1st point equals to the last ## i.e. if the polygons are closed which(sapply(nw, function(x){!identical(x[1,], x[nrow(x),])})) #lapply(nw, function(x) # if(!identical(x[1,], x[nrow(x),])) x <- rbind(x,x[1,])) ## converting to the sp format: ## this does not work because of the polygons with problems: #nwSP <- lapply(nw,Polygon) ## but notice that excluding the "problems" it does work: #nwsub <- subset(nw, !(names(nw) %in% c("preston", "lancaster", "wyre"))) #nwsubSP <- lapply(nwsub, Polygon) #nwsubSP <- lapply(1:length(nwsubSP), function(x) # Polygons(list(nwsubSP[[x]]), ID=names(nwsub)[x])) #nwsubSP <- SpatialPolygons(nwsubSP) #plot(nwsubSP) #slotNames(nwsubSP) #class(slot(nwsubSP, "polygons")) #length(slot(nwsubSP, "polygons")) ## This is from Pedro, and it is more general since it ## handles the problems with the 3 districts and also ## converts the list of polygons to the sp format resp <- lapply(1:length(nw), function(pos){ it=nw[[pos]] nas = which(is.na(it[,2])) # hole's positions if(length(nas) == 0) # if no hole all is ok pol = list(Polygon(it)) if(length(nas) > 0){ nas = c(0,nas,length(it[,1])+1) ## adds to the holes vector de buracos 0 and hole size pol = lapply(1:(length(nas)-1), function(i) {Polygon(it[(nas[i]+1):(nas[i+1]-1),]) ## polygons with consecutive positions ## left+1 and dir=1 to exclud NA's }) } Polygons(pol,ID=names(nw)[pos]) }) class(resp) length(resp) ## converting "corrected" polygons to a \pkg{sp} class nwSP <- SpatialPolygons(resp) plot(nwSP) plot(subset(nwSP, getID(nwSP) == "lancaster"), col=2, add=TRUE) plot(subset(nwSP, getID(nwSP) == "preston"), col=3, add=TRUE) plot(subset(nwSP, getID(nwSP) == "wyre"), col=4, add=TRUE) ## verificar: ## suspeite de leitura errada em municipios com mais de um poligono!!! class(nwSP[which(getID(nwSP) =="wyre")]) length(nwSP[which(getID(nwSP) =="wyre")]) slotNames(nwSP[which(getID(nwSP) =="wyre")]) class(slot(nwSP[which(getID(nwSP) =="wyre")], "polygons")) length(slot(nwSP[which(getID(nwSP) =="wyre")], "polygons")) class(slot(nwSP[which(getID(nwSP) =="wyre")], "polygons")[[1]]) slotNames(slot(nwSP[which(getID(nwSP) =="wyre")], "polygons")[[1]]) ## the problematioc districts are districts with more than one polygon length(slot(slot(nwSP[which(getID(nwSP) =="wyre")], "polygons")[[1]], "Polygons")) length(slot(slot(nwSP[which(getID(nwSP) =="lancaster")], "polygons")[[1]], "Polygons")) length(slot(slot(nwSP[which(getID(nwSP) =="preston")], "polygons")[[1]], "Polygons")) ## IN SUMMARY ## total number of districts: 24 ## number of districts defined by 1 polygon: 21 ## number of districts defined by 2 polygon: 2 ## number of districts defined by 4 polygon: 1 ## total number of polygons: 21x1 + 2x2 + 1x4 = 29 ## creating a database (requires permissions for the user on the DBMS) con <- openConn() db <- createDb(con, "northwest", replace=TRUE) ## the following removes duplicated values in the polygons coordinates ## (otherwise the union of polygons using \pkg{aRT} won't work ag=as.aRTgeometry(nwSP) -> g class(g) removeDup(g) ## simplfying the polygon simplify(g,0.002,0.002) class(g) spg=getGeometry(g) plot(spg) ## reading a table of attributes associated to each polygon leuc <- read.table("Polygons/attrpolygons.dat", head=T, row.names=1) head(leuc) dim(leuc) ## notice the column names contains "." ## "." is a mysql keyword. replacing it to "_" ... names(leuc) <- sub(".","_",names(leuc), fixed=TRUE) names(leuc) head(leuc) rownames(leuc)[10]="manc" identical(getID(nwSP), rownames(leuc)) ## ## Districts layer: method 1 ## ## creating a layer to store the district's polygons ## and adding the geometry to the database ldistrictso <- createLayer(db, "original-districts") addPolygons(ldistrictso, nwSP) ldistrictso tpolo <- createTable(ldistrictso, "original_districts") ldistricts <- createLayer(db, "simplified-districts") addPolygons(ldistricts, g) ldistricts ## attachs a table to the layer (PEDRO: THIS CAN BE AUTOMATICALLY CALLED ## BY addPolygons() SINCE IS NEEDED EVEN IF THERE ARE NO ATTRIBUTES) tpol <- createTable(ldistricts, "simplified_districts") ## and writing atttributes in the database ## at this points everything is already visible from a TL applications ## such as TerraView updateColumns(tpol, cbind(ID=getID(nwSP),leuc)) # And the visualisation can be set from here thdistricts <- createTheme(ldistricts, view="simplified-districts") thdistrictso <- createTheme(ldistrictso, view="Districts") ## changing visual setVisual(thdistricts, visualPolygons(transp=60, color="green")) setVisual(thdistrictso, visualPolygons(transp=60, color="green")) ## Districts layer: method 2 ## nwSPolDF <- aRT::getData(thdistricts) # SpatialPolygonsDataFrame(nwSP, leuc) ## ## some aRT/TerralLib computations ## ## geometric centroid centroids <- getOperation(ldistricts, "centroid") centroids centroids@coords areasCT <- t(sapply(1:length(slot(nwSP, "polygons")), function(x) slot(nwSP, "polygons")[[x]]@labpt)) areasCT all.equal(areasCT, unname(centroids@coords)) plot(nwSP) points(centroids, pch=20) text(centroids@coords[,1], centroids@coords[,2], getID(nwSP), pos=3, cex=0.75) ## notice coordinates provided with data are not centroids points(leuc[,1:2], pch=20, col=2) ## Both belows seems OK with 24 elements ## area of each polygon areasTL <- getMetric(ldistricts, "area") areasTL slotNames(nwSP) class(slot(nwSP, "polygons")) length(slot(nwSP, "polygons")) class(slot(nwSP, "polygons")[[1]]) slotNames(slot(nwSP, "polygons")[[1]]) areasSP <- sapply(1:length(slot(nwSP, "polygons")), function(x) slot(nwSP, "polygons")[[x]]@area) ## diferencas puramente numericas: identical(areasTL$area, areasSP) all.equal(areasTL$area, areasSP) ## perimeter of each polygon getMetric(ldistricts, "length") ## Mostrar outras como hull e buffer ## WARNING: time demanding!!!! BF <- getOperation(ldistricts, "buffer", dist=0.02) class(BF) save.image() plot(BF, add=TRUE, lty=2) ## union of all polygons -- A BIT TIME DEMANDING! ## Paulo, isto nao esta funcionando. union <- getSetOperation(ldistricts, "union", ID=c("wigan", "bolton", "salford", "bury")) ##, ID=getID(nwSP)) plot(union, col="grey97") ## neighbourhood matrix -- VERY VERY TIME DEMANDING! neighs<-sapply(names(nw), function(x){ print(paste("neighbours of ",x)) which(is.element(names(nw), getRelation(ldistrictso,"touches",ID=x)))} ) neighs save.image() ## map with some districs and their neighboors ids=c("lancaster","wigan","burnley") ## currently a polygon is its on neighbour ## Pedro: I believe this must be an argumetn with defaults to false ## i.e. by default do not include the polygon in its neighbourhood plot(nwSP, col="gray97", lty=0) plot(nwSP[ids,], add=TRUE, col="blue") for(i in ids){ plot(nwSP[getID(nwSP)[neighs[[i]]],],add=T,col=brewer.pal(3, "YlOrBr")[which(ids == i)]) } ## next step is to get more general neig. operations as, for instance, ## the "size" of the border between 2 polygons ## TO DO: ## Produce some views with colors (legends) according to some attributes ## ## Points data: case-control data ## ## now creating another layer with points (cases and controls): ## (a) cases cases <- read.table("cases.txt", head=TRUE) head(cases) class(cases) cases$ID <- rownames(cases) ## Pedro: should aRT be able to transfer attributed from a ## SpatialPointsDataFrame/creat ID automatically or use rownames ## adding attributes of the "cases" points ## Notice "sp" does this print("method 1") ## method 1 p <- SpatialPoints(cases[c("x","y")]) spdf.cases <- SpatialPointsDataFrame(p,data.frame(ID=cases$ID)) lcases = createLayer(db,"cases") addPoints(lcases, spdf.cases) tcas=createTable(lcases,"cases") updateColumns(tcas,cbind(ID=cases$ID,cases[,-c(1,2,ncol(cases))])) print("method 2") ## method 2 ## mostrando que pode usar importSpData() aqui tb!! ## NAO ESTÁ FUNCIONANDO DA FORMA ABAIXO coordinates(cases) <- c("x","y") class(cases) importSpData(db, cases, lname="lcases2", tname = "tcases2", thname="cases2", view="Districts" ) head(aRT::getData(tcas)) thcases <- createTheme(lcases, view="Districts") setVisual(thcases, visualPolygons(color="red")) ## (b) controls controls <- read.table("controls.txt", head=T) head(controls) p <- SpatialPoints(controls[c("x","y")]) spdf.controls=SpatialPointsDataFrame(p,data.frame(ID=paste(1:(nrow(p@coords))))) ## adding attributes to the "control" points lcontrols = createLayer(db,"controls") addPoints(lcontrols, spdf.controls) tcon=createTable(lcontrols,"controls") updateColumns(tcon,cbind(ID=paste(1:nrow(p@coords)),controls)) thcontrols <- createTheme(lcontrols,view="Districts") setVisual(thcontrols, visualPoints(color="blue") ) ## par(mfrow=c(1,2)) plot(nwSP, col="gray", lty=0) plot(spdf.cases, add=TRUE, pch=20, col=2, cex=0.3) points(centroids, pch=20, cex=1) points(leuc[,1:2], pch=20, col=5, cex=1) plot(nwSP, col="gray", lty=0) plot(spdf.controls, add=TRUE, pch=20, col=4, cex=0.3) ## counting how many points (\# of cases and \# of controls) ## withing each polygon countcases=unlist(lapply(getID(nwSP), function(x){ length(getRelation(lcases,"within",ldistricts,ID=x))})) countcases countcontrol=unlist(lapply(getID(nwSP), function(x){ length(getRelation(lcontrols,"within",ldistricts,ID=x))})) countcontrol ## adding these to the polygons attribute table ncc <- data.frame(ID=getID(nwSP),countcases,countcontrol) updateColumns(tpol,ncc) ## ## adicionar: ## spdep: probmap results ## kernel razao ## empirical Bayes (global e local)