## This script (which should turned into a vignette)
## illustrates the usage of aRT with emphasis on:
## \item reading and tidying up text files with polygon information
## \item storing on the data-base with respective attributes
## \item performing some operations on polygons such as:
##       centroids, area, perimeter and neighbourhood
## \item reading and storing point data from a case-control study
## \item performing some operation between points and polygons

## SOURCE:
##
## HENDERSON, R. ; SHIMAKURA, S. ; GORST, D.
## Modelling spatial variation in Leukaemia survival data. 
## Journal of the American Statistical Association, v. 97, p. 965-972, 2002.
##
## SHIMAKURA, S. Statistical methods for spatial survival data.
## PhD Thesis, Department od Mathematics and Statistics, Lancaster University, UK. 2003

rm(list=ls())

require(maptools)
require(RColorBrewer)
require(aRT)

aRTsilent(FALSE)

## Reading R dump files with the polygons for each city
## and making a list with the polygons coordinates
files <- dir(path="Polygons", pattern="dump.*.poly.unit")
objs <- substring(files, 6)
cities <- unlist(strsplit(objs, ".poly.unit"))
cities
nw <- list()
for(i in 1:length(files)){
  source(paste("Polygons/",files[i], sep=""))
  nw[[cities[i]]] <- get(objs[i])
  remove(list=objs[i])
}

## computing limits and plotting the polygons using basic \R\ functions
xl <- diag(apply(sapply(nw, function(x) range(x[,1], na.rm=T)),1,range))
yl <- diag(apply(sapply(nw, function(x) range(x[,2], na.rm=T)),1,range))
plot(xl,yl,type="n", asp=1)
sapply(nw,polygon)

## removing object no longer needed
rm(i,objs)

## converting nw to \pkg{maptools}'s \code{polylist} class
## (is there a easier way to do so???)
attr(nw, "maplim") <- list(x=xl, y=yl)
attr(nw, "region.id") <- sprintf("%02d",1:length(nw))
attr(nw, "plotOrder") <- 1:length(nw)
attr(nw, "after") <- rep(NA, length(nw))
class(nw) <- "polylist"
for(i in 1:length(nw))
  attr(nw[[i]], "bbox") <- as.vector(t(apply(nw[[i]],2,range, na.rm=TRUE)))

## and now it can be more easily plotted by plot.polylist() from maptools
length(nw)

## backing up
##save.image(file="nw.RData")
## restoring from backup
##load("nw.RData")

## Highlighting polygons with "problems": preston, lancaster e wyre
polygon(nw$preston, col=2)
polygon(nw$lancaster, col=3)
polygon(nw$wyre, col=4)

## looking more closely the polygons with problems
#plot(subset(nw, names(nw) == "lancaster"))
#plot(subset(nw, names(nw) == "preston"))
#plot(subset(nw, names(nw) == "wyre"))

## checking for each polygon whether the 1st point equals to the last
## i.e. if the polygons are closed
which(sapply(nw, function(x){!identical(x[1,], x[nrow(x),])}))
#lapply(nw, function(x)
#       if(!identical(x[1,], x[nrow(x),])) x <- rbind(x,x[1,]))

## converting to the sp format:
## this does not work because of the polygons with problems:
#nwSP <- lapply(nw,Polygon)

## but notice that excluding the "problems" it does work:
#nwsub <- subset(nw, !(names(nw) %in% c("preston", "lancaster", "wyre")))
#nwsubSP <- lapply(nwsub, Polygon)
#nwsubSP <- lapply(1:length(nwsubSP), function(x)
#                  Polygons(list(nwsubSP[[x]]), ID=names(nwsub)[x]))
#nwsubSP <- SpatialPolygons(nwsubSP)
#plot(nwsubSP)
#slotNames(nwsubSP)
#class(slot(nwsubSP, "polygons"))
#length(slot(nwsubSP, "polygons"))

## This is from Pedro, and it is more general since it
## handles the problems with the 3 districts and also
## converts the list of polygons to the sp format
resp <- lapply(1:length(nw),
               function(pos){
                 it=nw[[pos]]
                 nas = which(is.na(it[,2])) # hole's positions
                 if(length(nas) == 0) # if no hole all is ok
                   pol = list(Polygon(it))
                 if(length(nas) > 0){
                   nas = c(0,nas,length(it[,1])+1)
                   ## adds to the holes vector de buracos 0 and hole size
                   pol = lapply(1:(length(nas)-1), function(i)
                     {Polygon(it[(nas[i]+1):(nas[i+1]-1),])
                      ## polygons with consecutive positions 
                      ## left+1 and dir=1 to exclud NA's
                    })
                 }
                 Polygons(pol,ID=names(nw)[pos])
               })
class(resp)
length(resp)

## converting "corrected" polygons to a \pkg{sp} class
nwSP <- SpatialPolygons(resp)

plot(nwSP)
plot(subset(nwSP, getID(nwSP) == "lancaster"), col=2, add=TRUE)
plot(subset(nwSP, getID(nwSP) == "preston"), col=3, add=TRUE)
plot(subset(nwSP, getID(nwSP) == "wyre"), col=4, add=TRUE)

## verificar:
## suspeite de leitura errada em municipios com mais de um poligono!!!
class(nwSP[which(getID(nwSP) =="wyre")])
length(nwSP[which(getID(nwSP) =="wyre")])
slotNames(nwSP[which(getID(nwSP) =="wyre")])
class(slot(nwSP[which(getID(nwSP) =="wyre")], "polygons"))
length(slot(nwSP[which(getID(nwSP) =="wyre")], "polygons"))
class(slot(nwSP[which(getID(nwSP) =="wyre")], "polygons")[[1]])
slotNames(slot(nwSP[which(getID(nwSP) =="wyre")], "polygons")[[1]])

## the problematioc districts are districts with more than one polygon
length(slot(slot(nwSP[which(getID(nwSP) =="wyre")], "polygons")[[1]], "Polygons"))
length(slot(slot(nwSP[which(getID(nwSP) =="lancaster")], "polygons")[[1]], "Polygons"))
length(slot(slot(nwSP[which(getID(nwSP) =="preston")], "polygons")[[1]], "Polygons"))


## IN SUMMARY
## total number of districts: 24
## number of districts defined by 1 polygon: 21
## number of districts defined by 2 polygon:  2
## number of districts defined by 4 polygon:  1
## total number of polygons: 21x1 + 2x2 + 1x4 = 29 

## creating a database (requires permissions for the user on the DBMS)
con <- openConn()
db <- createDb(con, "northwest", replace=TRUE)

## the following removes duplicated values in the polygons coordinates
## (otherwise the union of polygons using \pkg{aRT} won't work
ag=as.aRTgeometry(nwSP) -> g
class(g)
removeDup(g)

## simplfying the polygon
simplify(g,0.002,0.002)
class(g)
spg=getGeometry(g)
plot(spg)

## reading a table of attributes associated to each polygon
leuc <- read.table("Polygons/attrpolygons.dat", head=T, row.names=1)
head(leuc)
dim(leuc)

## notice the column names contains "."
## "." is a mysql keyword. replacing it to "_" ...
names(leuc) <- sub(".","_",names(leuc), fixed=TRUE)
names(leuc)
head(leuc)
rownames(leuc)[10]="manc"
identical(getID(nwSP), rownames(leuc))

##
## Districts layer: method 1
##
## creating a layer to store the district's polygons
## and adding the geometry to the database
ldistrictso <- createLayer(db, "original-districts")
addPolygons(ldistrictso, nwSP)
ldistrictso
tpolo <- createTable(ldistrictso, "original_districts")

ldistricts <- createLayer(db, "simplified-districts")
addPolygons(ldistricts, g)
ldistricts

## attachs a table to the layer (PEDRO: THIS CAN BE AUTOMATICALLY CALLED
## BY addPolygons() SINCE IS NEEDED EVEN IF THERE ARE NO ATTRIBUTES)
tpol <- createTable(ldistricts, "simplified_districts")

## and writing atttributes in the database
## at this points everything is already visible from a TL applications
## such as TerraView
updateColumns(tpol, cbind(ID=getID(nwSP),leuc))

# And the visualisation can be set from here
thdistricts  <- createTheme(ldistricts,  view="simplified-districts")
thdistrictso <- createTheme(ldistrictso, view="Districts")

## changing visual
setVisual(thdistricts,  visualPolygons(transp=60, color="green"))
setVisual(thdistrictso, visualPolygons(transp=60, color="green"))

## Districts layer: method 2
##
nwSPolDF <- aRT::getData(thdistricts) # SpatialPolygonsDataFrame(nwSP, leuc)

##
## some aRT/TerralLib computations
##
## geometric centroid
centroids <- getOperation(ldistricts, "centroid")
centroids
centroids@coords

areasCT <- t(sapply(1:length(slot(nwSP, "polygons")), function(x) slot(nwSP, "polygons")[[x]]@labpt))
areasCT

all.equal(areasCT, unname(centroids@coords))


plot(nwSP)
points(centroids, pch=20)
text(centroids@coords[,1], centroids@coords[,2], getID(nwSP), pos=3, cex=0.75) 
## notice coordinates provided with data are not centroids
points(leuc[,1:2], pch=20, col=2)


## Both belows seems OK with 24 elements
## area of each polygon
areasTL <- getMetric(ldistricts, "area")
areasTL
slotNames(nwSP)
class(slot(nwSP, "polygons"))
length(slot(nwSP, "polygons"))
class(slot(nwSP, "polygons")[[1]])
slotNames(slot(nwSP, "polygons")[[1]])
areasSP <- sapply(1:length(slot(nwSP, "polygons")), function(x) slot(nwSP, "polygons")[[x]]@area)

## diferencas puramente numericas:
identical(areasTL$area, areasSP)
all.equal(areasTL$area, areasSP)

## perimeter of each polygon
getMetric(ldistricts, "length")

## Mostrar outras como hull e buffer
 
## WARNING: time demanding!!!!
BF <- getOperation(ldistricts, "buffer", dist=0.02)
class(BF)
save.image()
plot(BF, add=TRUE, lty=2)  

## union of all polygons -- A BIT TIME DEMANDING!
## Paulo, isto nao esta funcionando.
union <- getSetOperation(ldistricts, "union", ID=c("wigan", "bolton", "salford", "bury"))
##, ID=getID(nwSP))

plot(union, col="grey97")

## neighbourhood matrix -- VERY VERY TIME DEMANDING!
neighs<-sapply(names(nw),
               function(x){
                 print(paste("neighbours of ",x))
                 which(is.element(names(nw),
                                  getRelation(ldistrictso,"touches",ID=x)))}
               )
neighs
save.image()

## map with some districs and their neighboors
ids=c("lancaster","wigan","burnley")


## currently a polygon is its on neighbour
## Pedro: I believe this must be an argumetn with defaults to false
## i.e. by default do not include the polygon in its neighbourhood
plot(nwSP, col="gray97", lty=0)
plot(nwSP[ids,], add=TRUE, col="blue")
for(i in ids){
  plot(nwSP[getID(nwSP)[neighs[[i]]],],add=T,col=brewer.pal(3, "YlOrBr")[which(ids == i)])
}

## next step is to get more general neig. operations as, for instance,
## the "size" of the border between 2 polygons

## TO DO:
## Produce some views with colors (legends) according to some attributes


##
## Points data: case-control data
## 

## now creating another layer with points (cases and controls): 

## (a) cases
cases <- read.table("cases.txt", head=TRUE)
head(cases)
class(cases)
cases$ID <- rownames(cases)
## Pedro: should aRT be able to transfer attributed from a
##        SpatialPointsDataFrame/creat ID automatically or use rownames
## adding attributes of the "cases" points
## Notice "sp" does this

print("method 1")
## method 1
p <- SpatialPoints(cases[c("x","y")])
spdf.cases <- SpatialPointsDataFrame(p,data.frame(ID=cases$ID))
lcases = createLayer(db,"cases")
addPoints(lcases, spdf.cases)
tcas=createTable(lcases,"cases")
updateColumns(tcas,cbind(ID=cases$ID,cases[,-c(1,2,ncol(cases))]))
print("method 2")

## method 2
## mostrando que pode usar importSpData() aqui tb!!
## NAO ESTÁ FUNCIONANDO DA FORMA ABAIXO
coordinates(cases) <- c("x","y")
class(cases)

importSpData(db, cases, lname="lcases2", tname = "tcases2", thname="cases2", view="Districts" )

head(aRT::getData(tcas))

thcases <- createTheme(lcases, view="Districts")
setVisual(thcases, visualPolygons(color="red"))    


## (b) controls
controls <- read.table("controls.txt", head=T)
head(controls)
p <- SpatialPoints(controls[c("x","y")])
spdf.controls=SpatialPointsDataFrame(p,data.frame(ID=paste(1:(nrow(p@coords)))))

## adding attributes to the "control" points
lcontrols = createLayer(db,"controls")
addPoints(lcontrols, spdf.controls)
tcon=createTable(lcontrols,"controls")
updateColumns(tcon,cbind(ID=paste(1:nrow(p@coords)),controls))

thcontrols <- createTheme(lcontrols,view="Districts")
setVisual(thcontrols, visualPoints(color="blue") )

##
par(mfrow=c(1,2))
plot(nwSP, col="gray", lty=0)
plot(spdf.cases, add=TRUE, pch=20, col=2, cex=0.3)
points(centroids, pch=20, cex=1)
points(leuc[,1:2], pch=20, col=5, cex=1)

plot(nwSP, col="gray", lty=0)
plot(spdf.controls, add=TRUE, pch=20, col=4, cex=0.3)

## counting how many points (\# of cases and \# of controls)
## withing each polygon
countcases=unlist(lapply(getID(nwSP), function(x){
    length(getRelation(lcases,"within",ldistricts,ID=x))}))
countcases

countcontrol=unlist(lapply(getID(nwSP), function(x){
    length(getRelation(lcontrols,"within",ldistricts,ID=x))}))
countcontrol

## adding these to the polygons attribute table
ncc <- data.frame(ID=getID(nwSP),countcases,countcontrol)
updateColumns(tpol,ncc)

##
## adicionar:
## spdep: probmap results
## kernel razao
## empirical Bayes (global e local)