### A prodictive model for Claude's example.
require(ASOR)
CD<-read.csv("../CSV/CD.csv")
Attach()
dim(CD)
names(CD)

#Speculative data
#Increasing variance
#Inverse Gaussian, adequate model
with(CD, plot(Load, Price, pch = 15, cex = 0.5, col = "green4"))

## get a local approximation to the mean using a normal density
## the local weight function.  An SD = 1000 seems about right

localMV <- function(x, y, lf = 30) {
  n <- length(x)
  mu <- sig2 <- numeric(n)
  SD <- diff(range(x))/lf       ## determines smoothness

  for(i in 1:n)
    mu[i] <- weighted.mean(y, dnorm(x, x[i], SD))
  d <- (y - mu)^2
  for(i in 1:n)
    sig2[i] <- weighted.mean(d, dnorm(x, x[i], SD))
  data.frame(mu = mu, sig2 = sig2)
}

CD1 <- transform(CD, m30 = localMV(Load, Price),
                     m20 = localMV(Load, Price, 20),
                     m10 = localMV(Load, Price, 10))
names(CD1)

# _> names(CD1)
# _[1] "Load"     "Price"    "m30.mu"   "m30.sig2"
# _[5] "m20.mu"   "m20.sig2" "m10.mu"   "m10.sig2"

# order Load values
CD1 <- CD1[order(CD1$Load), ]  ## for plotting lines, &c


with(CD1, {
  plot(Load, Price, col = "blue", cex = 0.5)
  lines(Load, m30.mu, col = "green4",lwd=2)
  lines(Load, m20.mu, col = "yellow",lwd=2)
  lines(Load, m10.mu, col = "red",lwd=2)
})
legend("topleft", paste("SD = 1/", 10*3:1, " of range", sep = ""),
  lty = 1, col = c("green4", "yellow", "red"), cex = 0.8)

## '20' seems best compromise

## check for power laws
## how the variance depends on the mean

with(CD1, plot(m20.mu, m20.sig2, col = "blue", cex = 0.7))

with(CD1, plot(m20.mu, m20.sig2^(1/2), col = "blue", cex = 0.7))

with(CD1, plot(m20.mu, m20.sig2^(1/3), col = "blue", cex = 0.7))

with(CD1, plot(m20.mu, m20.sig2^(1/4), col = "blue", cex = 0.7))

#the cubic seems to be the best

### loess exploration

CD.mulo <- loess(Price ~ Load, CD1)
pCD.mulo <- predict(CD.mulo)

with(CD1, {
  plot(Load, Price, col = "green", cex = 0.5)
  lines(Load, pCD.mulo, lwd = 1.5, col = "red")
})

CD.sig2lo <- loess((Price-pCD.mulo)^2 ~ Load, CD1)

plot(pCD.mulo, predict(CD.sig2lo, CD1)^(1/3))

library(splines)
#GLM using inverse gaussian and working with the log link
CD.mod <- glm(Price ~ ns(Load, 4),
  inverse.gaussian(link = "log"), CD1)#the variance is proportional to the cube of the mean
# or link = "identity" ?

with(CD1, {
  plot(Load, Price, col = "blue", cex = 0.5)
  lines(Load, m30.mu, col = "green4")
  lines(Load, m20.mu, col = "yellow")
  lines(Load, m10.mu, col = "red")
  lines(Load, predict(CD.mod, type = "resp"), lwd = 1.5)
})

### How do the diagnostics look?

fv <- fitted(CD.mod)
rs <- resid(CD.mod)

plot(fv, rs, col = "blue", cex = 0.7)
abline(h = 0, col = "red", lty = "dashed")

qqnorm(rs)
qqline(rs, col = "red", lty = "dashed")

hist(rs, prob = TRUE)
lines(density(rs), col = "red")
srs <- sort(rs)
lines(srs, dnorm(srs, 0, sd(rs)), col = "blue")

### the automatic selection of smoothness
require(mgcv)
CD.gam <- gam(Price ~ s(Load),
    inverse.gaussian(link = "log"), CD1)
    

with(CD1, {
  plot(Load, Price, col = "blue", cex = 0.5)
  lines(Load, m30.mu, col = "green4")
  lines(Load, m20.mu, col = "yellow")
  lines(Load, m10.mu, col = "red")
  lines(Load, predict(CD.mod, type = "resp"), lwd = 1.5)
  lines(Load, predict(CD.gam, type = "response"),
    lwd = 1.5, col = "hotpink")
})

legend("topleft", c(paste("LWM, SD = 1/", 10*3:1, " of range", sep = ""),
                    "glm with natural splines",
                    "gam with autosmoother"), lty = 1, lwd = 2,
  col = c("green4", "yellow", "red", "black", "hotpink"), cex = 0.8)


