Supplementary material of the paper

A dynamic rating for Brazilian football league 2011 season

Marcelo Leme de Arruda (I)
Walmes Marques Zeviani (II),
Júlio Sílvio de Sousa Bueno Filho (I)

I: DEX: Department of Exact Sciences, UFLA: Federal University of Lavras,
II: LEG: Statistics and Geoinformation Lab, DEST: Department of Statistics, UFPR: Federal University of Paraná.

Download the dataset: brasileiro2004-2011.txt
Download the R script: map.R

TODO list

Url encurtada do markup quando estiver na fase de submissão.
Esclarecer o porquê do uso da inversa da diagonal da informação observada ((diag(H))^− 1) ao invés da diagonal da inversa que (diag(H^− 1)) representa as variâncias das estimativas.
Desenvolver o conceito de time médio. Estabelecer comparações de alguns times contra o time médio. Tentar interpretar a diferença com relação ao time médio quanto da probabilidade de fazer gols ou ganhar a partida.
DONE Gráficos de barras com ordem homeWin, draw awayWin.
DONE Tabela de dupla entrada ou gráfico que permita saber a probabilidade do time Home ganhar (gráfico de contornos ou supercície, prob no eixo z) a partir da diferença γ_H − δ_A (eixo x) e γ_A − δ_H (eixo y).
DONE Revisar a implementação das medidas de DeFinetti.

Latex tables

Descriptive table for 2010 season
Maximum likelihood estimation for 2010 season
Descriptive table for 2011 season

Session definitions

##----------------------------------------------------------------------
## Required packages and session definition.

library(latticeExtra)
library(plyr)
library(reshape2)
library(gridExtra)
library(xtable)

## devtools::session_info()
## Sys.time()

##----------------------------------------------------------------------

rm(list=ls())

## Color palette.
mycol <- brewer.pal(6, "Set1")
## mycol <- c("gray50", "black")

## Trellis graphical style.
ps <- list(
    box.rectangle=list(col=1, fill=c("gray70")),
    box.umbrella=list(col=1, lty=1),
    dot.symbol=list(col=1, pch=19),
    dot.line=list(col="gray50", lty=3),
    plot.symbol=list(col=1, cex=1.2),
    plot.line=list(col=1),
    plot.polygon=list(col="gray95"),
    superpose.line=list(col=mycol, lty=1),
    superpose.symbol=list(col=mycol, pch=c(1:5)),
    superpose.polygon=list(col=mycol),
    strip.background=list(col=c("gray80","gray50"))
    )
trellis.par.set(ps)
## show.settings()

Load data

##----------------------------------------------------------------------
## Load the data of the season 2011.

## UNIX command to get the charset/encoding of a file.
system("file -bi brasileiro2004-2011.txt")

## iso-8859-1 aka latin1.

## Importing the dataset with seasons 2004 to 2011.
da <- read.table(
    file="brasileiro2004-2011.txt",
    header=TRUE,
    sep="\t",
    encoding="latin1",
    colClasses=c("factor","character","integer","NULL")[
        c(2,4,3,4,4,4,1,1,2)])

## Short names are more manageable.
## man: home team, away: away team.
names(da) <- substr(names(da), 1, 3)
names(da)[3:4] <- c("home", "away")
str(da)

'data.frame': 3294 obs. of  5 variables:
 $ dat : chr  "21/04/2004" "21/04/2004" "21/04/2004" "21/04/2004" ...
 $ rod : int  1 1 1 1 1 1 1 1 1 1 ...
 $ home: Factor w/ 40 levels "América/MG","América/RN",..: 9 16 36 15 28 21 31 29 13 39 ...
 $ away: Factor w/ 40 levels "América/MG","América/RN",..: 20 24 40 26 4 17 12 35 23 14 ...
 $ pla : chr  "1X4" "1X0" "1X0" "2X1" ...

## Same order in the factor levels.
l <- union(levels(da$home), levels(da$away))
da$home <- factor(da$home, levels=l)
da$away <- factor(da$away, levels=l)

## Converting string to data (calendar format).
da$dat <- as.POSIXct(x=da$dat, format="%d/%m/%Y")
da$yea <- as.integer(format(da$dat, format="%Y"))
    
## Get the goals of each team in a match.
da$h <- as.integer(gsub("^(\\d+)x\\d+$", "\\1", tolower(da$pla)))
da$a <- as.integer(gsub("^\\d+x(\\d+)$", "\\1", tolower(da$pla)))
da <- transform(da, d=abs(h-a), s=h+a)
str(da)

'data.frame': 3294 obs. of  10 variables:
 $ dat : POSIXct, format: "2004-04-21 00:00:00" ...
 $ rod : int  1 1 1 1 1 1 1 1 1 1 ...
 $ home: Factor w/ 40 levels "América/MG","América/RN",..: 9 16 36 15 28 21 31 29 13 39 ...
 $ away: Factor w/ 40 levels "América/MG","América/RN",..: 20 24 40 26 4 17 12 35 23 14 ...
 $ pla : chr  "1X4" "1X0" "1X0" "2X1" ...
 $ yea : int  2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 ...
 $ h   : int  1 1 1 2 0 0 3 3 1 0 ...
 $ a   : int  4 0 0 1 0 0 2 2 0 1 ...
 $ d   : int  3 1 1 1 0 0 1 1 1 1 ...
 $ s   : int  5 1 1 3 0 0 5 5 1 1 ...

## Marginal statistics in goals scored for each season.
aggregate(cbind(home=h, away=a, absdiff=d, total=s)~yea,
          data=da, FUN=mean, na.rm=TRUE)

   yea     home      away  absdiff    total
1 2004 1.717391 1.0652174 1.362319 2.782609
2 2005 1.803030 1.3311688 1.307359 3.134199
3 2006 1.589474 1.1210526 1.289474 2.710526
4 2007 1.665789 1.0868421 1.384211 2.752632
5 2008 1.731579 0.9894737 1.389474 2.721053
6 2009 1.736842 1.1421053 1.257895 2.878947
7 2010 1.528947 1.0447368 1.147368 2.573684
8 2011 1.605263 1.0710526 1.239474 2.676316

Season 2010

##----------------------------------------------------------------------

## 2010 season only.
db <- droplevels(subset(da, yea==2010,
                        select=c("rod","home","away","h","a")))
str(db)

'data.frame': 380 obs. of  5 variables:
 $ rod : int  1 1 1 1 1 1 1 1 1 1 ...
 $ home: Factor w/ 20 levels "Atlético/GO",..: 5 1 16 9 2 15 7 6 14 4 ...
 $ away: Factor w/ 20 levels "Atlético/GO",..: 17 12 20 18 19 8 3 10 11 13 ...
 $ h   : int  3 0 1 1 2 1 2 1 1 6 ...
 $ a   : int  3 0 0 1 1 2 1 0 0 1 ...

subset(db, home=="Corinthians")

     rod        home            away h a
2541   1 Corinthians     Atlético/PR 2 1
2558   3 Corinthians      Fluminense 1 0
2578   5 Corinthians          Santos 4 2
2594   6 Corinthians   Internacional 2 0
2618   9 Corinthians     Atlético/MG 1 0
2643  11 Corinthians         Guarani 3 1
2658  13 Corinthians        Flamengo 1 0
2682  15 Corinthians       São Paulo 3 0
2698  17 Corinthians         Vitória 2 1
2717  19 Corinthians           Goiás 5 1
2735  21 Corinthians          Grêmio 0 1
2755  23 Corinthians Grêmio Prudente 3 0
2792  26 Corinthians        Botafogo 1 1
2796  27 Corinthians           Ceará 2 2
2820  29 Corinthians     Atlético/GO 3 4
2839  31 Corinthians       Palmeiras 1 0
2861  33 Corinthians            Avaí 4 0
2877  35 Corinthians        Cruzeiro 1 0
2895  37 Corinthians           Vasco 2 0

## Are the levels in the same order?
all(levels(db$home)==levels(db$away))

[1] TRUE

##----------------------------------------------------------------------

## Matrices to identify teams matches (design matrices).
Xh <- model.matrix(~-1+home, db) ## h: home.
Xa <- model.matrix(~-1+away, db) ## a: away.

## Check.
Xha <- Xh-Xa
db[1:5,c("home","away")]

            home      away
2535    Botafogo    Santos
2536 Atlético/GO    Grêmio
2537   Palmeiras   Vitória
2538    Flamengo São Paulo
2539 Atlético/MG     Vasco

print(as.table(t(Xha[1:5,])), zero.print=".")

                    2535 2536 2537 2538 2539
homeAtlético/GO        .    1    .    .    .
homeAtlético/MG        .    .    .    .    1
homeAtlético/PR        .    .    .    .    .
homeAvaí               .    .    .    .    .
homeBotafogo           1    .    .    .    .
homeCeará              .    .    .    .    .
homeCorinthians        .    .    .    .    .
homeCruzeiro           .    .    .    .    .
homeFlamengo           .    .    .    1    .
homeFluminense         .    .    .    .    .
homeGoiás              .    .    .    .    .
homeGrêmio             .   -1    .    .    .
homeGrêmio Prudente    .    .    .    .    .
homeGuarani            .    .    .    .    .
homeInternacional      .    .    .    .    .
homePalmeiras          .    .    1    .    .
homeSantos            -1    .    .    .    .
homeSão Paulo          .    .    .   -1    .
homeVasco              .    .    .    .   -1
homeVitória            .    .   -1    .    .

##----------------------------------------------------------------------
## http://esportes.terra.com.br/futebol/brasileiro/2010/noticias/0,,OI4339585-EI15413,00-Classificacao+e+Jogos+Serie+A.html

## Place in 2010 season, from first to last.
gsgc <- cbind(scored=t(Xh)%*%db$h+t(Xa)%*%db$a,   ## goals scored
              conceded=t(Xa)%*%db$h+t(Xh)%*%db$a) ## goals conceded
colnames(gsgc) <- c("gScored", "gConced")

aux <- with(db, {
    d <- h-a
    draw <- d==0
    winH <- d>0
    ## winA <- !draw & !winH
    winA <- !(draw | winH)
    x <- cbind(h=winH*3+draw, a=winA*3+draw)
    return(x)
})

## To prevent vetors of length less than 3, e.g. teams that didn't lose
## any match.
tableIn <- function(x){
    tb <- table(x)
    f <- rep(0, 3)
    names(f) <- c(0,1,3)
    f[names(tb)] <- tb
    f
}

## Lose Draw Win frequency
ldw <-
    do.call(rbind, lapply(tapply(aux[,1], db$home, tableIn), as.vector))+
    do.call(rbind, lapply(tapply(aux[,2], db$away, tableIn), as.vector))
colnames(ldw) <- c("Lose", "Draw", "Win")
ldw

                Lose Draw Win
Atlético/GO       18    9  11
Atlético/MG       19    6  13
Atlético/PR       12    9  17
Avaí              17   10  11
Botafogo           7   17  14
Ceará             11   17  10
Corinthians        8   11  19
Cruzeiro           9    9  20
Flamengo          12   17   9
Fluminense         7   11  20
Goiás             21    9   8
Grêmio             9   12  17
Grêmio Prudente   21   10   7
Guarani           17   13   8
Internacional     12   10  16
Palmeiras         12   14  12
Santos            12   11  15
São Paulo         13   10  15
Vasco             11   16  11
Vitória           14   15   9

## cbind(db, aux)[sample(1:nrow(db), size=10),]

pl10 <- t(Xh)%*%aux[,"h"]+t(Xa)%*%aux[,"a"]
pl10 <- cbind(team=levels(db$home), data.frame(pts=pl10))
## pl10$rk <- rank(pl10$pts, decreasing=TRUE)

pl10 <- cbind(pl10, ldw[,3:1], gsgc)

pl10 <- arrange(pl10, -pts)
pl10$pos <- rank(-pl10$pts)
pl10

              team pts Win Draw Lose gScored gConced  pos
1       Fluminense  71  20   11    7      62      36  1.0
2         Cruzeiro  69  20    9    9      53      38  2.0
3      Corinthians  68  19   11    8      65      41  3.0
4           Grêmio  63  17   12    9      68      43  4.0
5      Atlético/PR  60  17    9   12      43      45  5.0
6         Botafogo  59  14   17    7      54      42  6.0
7    Internacional  58  16   10   12      48      41  7.0
8           Santos  56  15   11   12      63      50  8.0
9        São Paulo  55  15   10   13      54      54  9.0
10       Palmeiras  50  12   14   12      42      43 10.0
11           Vasco  49  11   16   11      43      45 11.0
12           Ceará  47  10   17   11      35      44 12.0
13     Atlético/MG  45  13    6   19      52      64 13.0
14        Flamengo  44   9   17   12      41      44 14.0
15            Avaí  43  11   10   17      49      58 15.0
16     Atlético/GO  42  11    9   18      51      57 16.5
17         Vitória  42   9   15   14      42      48 16.5
18         Guarani  37   8   13   17      33      53 18.0
19           Goiás  33   8    9   21      41      68 19.0
20 Grêmio Prudente  31   7   10   21      39      64 20.0

th16 <- pl10[16,]

rm(aux, gsgc, ldw)

Descriptive table of the 2010 season

print(xtable(x=pl10,
             caption="A suitable caption here.",
             digits=0),
      include.rownames=FALSE)

% latex table generated in R 3.2.2 by xtable 1.7-4 package
% Thu Sep 10 20:32:50 2015
\begin{table}[ht]
\centering
\begin{tabular}{lrrrrrrr}
  \hline
team & pts & Win & Draw & Lose & gScored & gConced & pos \\ 
  \hline
Fluminense & 71 & 20 & 11 & 7 & 62 & 36 & 1 \\ 
  Cruzeiro & 69 & 20 & 9 & 9 & 53 & 38 & 2 \\ 
  Corinthians & 68 & 19 & 11 & 8 & 65 & 41 & 3 \\ 
  Grêmio & 63 & 17 & 12 & 9 & 68 & 43 & 4 \\ 
  Atlético/PR & 60 & 17 & 9 & 12 & 43 & 45 & 5 \\ 
  Botafogo & 59 & 14 & 17 & 7 & 54 & 42 & 6 \\ 
  Internacional & 58 & 16 & 10 & 12 & 48 & 41 & 7 \\ 
  Santos & 56 & 15 & 11 & 12 & 63 & 50 & 8 \\ 
  São Paulo & 55 & 15 & 10 & 13 & 54 & 54 & 9 \\ 
  Palmeiras & 50 & 12 & 14 & 12 & 42 & 43 & 10 \\ 
  Vasco & 49 & 11 & 16 & 11 & 43 & 45 & 11 \\ 
  Ceará & 47 & 10 & 17 & 11 & 35 & 44 & 12 \\ 
  Atlético/MG & 45 & 13 & 6 & 19 & 52 & 64 & 13 \\ 
  Flamengo & 44 & 9 & 17 & 12 & 41 & 44 & 14 \\ 
  Avaí & 43 & 11 & 10 & 17 & 49 & 58 & 15 \\ 
  Atlético/GO & 42 & 11 & 9 & 18 & 51 & 57 & 16 \\ 
  Vitória & 42 & 9 & 15 & 14 & 42 & 48 & 16 \\ 
  Guarani & 37 & 8 & 13 & 17 & 33 & 53 & 18 \\ 
  Goiás & 33 & 8 & 9 & 21 & 41 & 68 & 19 \\ 
  Grêmio Prudente & 31 & 7 & 10 & 21 & 39 & 64 & 20 \\ 
   \hline
\end{tabular}
\caption{A suitable caption here.} 
\end{table}

Maximum likelihood estimates for the 2010 season

##----------------------------------------------------------------------
## Likelihood definition.

## log-likeligood funtion corresponding to a Poisson model for the goals
## scored as function of teams attacking streng (gamma), teams
## deffensive strength (delta), and home(away) field
## advantage(disadvatage) (omega,kappa), so theta=c(gamma, delta,
## omega, kappa).

## attack, defense and home field advantage.
ll1 <- function(theta, gh, ga, Xh, Xa){
    ## theta: parameter vector.
    ## gh, ga: goals for the home and away team.
    ## Xh, Xa: design matrices for home and away team.
    gamma <- 1:20; delta <- 21:40; omega <- 41
    eta.h <- (Xh%*%theta[gamma]-Xa%*%theta[delta])+theta[omega]
    eta.a <- (Xa%*%theta[gamma]-Xh%*%theta[delta])
    ll.h <- dpois(gh, lambda=exp(eta.h), log=TRUE)
    ll.a <- dpois(ga, lambda=exp(eta.a), log=TRUE)
    ll <- sum(ll.h+ll.a)
    return(ll)
}

## attack, defense, home field advantage and away field disadvantage.
ll2 <- function(theta, gh, ga, Xh, Xa){
    ## theta: parameter vector.
    ## gh, ga: goals for the home and away team.
    ## Xh, Xa: design matrices for home and away team.
    gamma <- 1:20; delta <- 21:40; omega <- 41; kappa <- 42
    eta.h <- (Xh%*%theta[gamma]-Xa%*%theta[delta])+theta[omega]
    eta.a <- (Xa%*%theta[gamma]-Xh%*%theta[delta])-theta[kappa]
    ll.h <- dpois(gh, lambda=exp(eta.h), log=TRUE)
    ll.a <- dpois(ga, lambda=exp(eta.a), log=TRUE)
    ll <- sum(ll.h+ll.a)
    return(ll)
}

##----------------------------------------------------------------------
## Optimization to get parameter estimates.

## DANGER ATTENTION!
## Initial values and corresponding ll:
## rep(1, 41)              -1080.332 -1191.508
## rep(c(1,0.5), c(40,1))  -1065.329 -1063.594

op1 <- optim(
    ## par=rep(1, 41),
    par=rep(c(1,0.5), c(40,1)),
    fn=ll1,
    gh=db$h, ga=db$a,
    Xh=Xh, Xa=Xa,
    control=list(fnscale=-1),
    hessian=TRUE)
## str(op1)

op2 <- optim(
    ## par=rep(1, 42),
    par=c(op1$par, 1),
    fn=ll2,
    gh=db$h, ga=db$a,
    Xh=Xh, Xa=Xa,
    control=list(fnscale=-1),
    hessian=TRUE)
## str(op2)

## ll1(theta=op1$par, gh=db$h, ga=db$a, Xh=Xh, Xa=Xa)
## ll2(theta=op2$par, gh=db$h, ga=db$a, Xh=Xh, Xa=Xa)

## Likelihoods.
l <- c(ll1=op1$value, ll2=op2$value)
l

      ll1       ll2 
-1065.329 -1063.594

## LRT for the H0: kappa == 0.
pchisq(2*abs(diff(l)), df=1, lower.tail=FALSE)

       ll2 
0.06246576

theta <- data.frame(th1=c(op1$par, NA), th2=op2$par)
theta

         th1       th2
1  1.0146062 1.0943550
2  1.0323265 1.1359488
3  0.9164412 0.9613652
4  0.9999984 1.0985514
5  1.0891357 1.0325902
6  0.9262548 0.8159923
7  1.1432700 1.2233851
8  1.0958177 1.1905418
9  1.0895740 1.1405554
10 1.1443458 1.2411890
11 0.9388537 0.9865775
12 1.3700004 1.4670460
13 1.0106891 1.0596737
14 0.9228727 0.9295684
15 1.1514337 1.1990136
16 0.9369711 0.9804244
17 1.1462865 1.2330103
18 1.1723053 1.0719137
19 0.7899019 0.8784483
20 1.0003924 1.0471273
21 0.9416675 0.9183128
22 0.9208289 0.8725802
23 1.0666239 1.1064511
24 0.8847919 0.8772774
25 1.0033544 0.9974467
26 1.1928647 1.2403810
27 1.1155715 1.1439042
28 1.1062243 1.1040541
29 1.0688107 1.1056441
30 1.1061888 1.1060945
31 0.6660984 0.6559304
32 1.0620773 1.0959321
33 0.8378148 0.7846597
34 0.9887209 0.8281367
35 1.1097679 1.0998134
36 1.0847166 1.1157103
37 0.9634091 1.0087434
38 0.8679171 0.8602855
39 1.0029171 1.1289538
40 1.0258934 1.0686045
41 0.3256861 0.3552726
42        NA 0.1166481

## plot(th2~th1, data=theta[1:40,], asp=1,
##      pch=as.integer(theta[1:40,"param"]))
## abline(a=0, b=1)
## a <- with(theta, identify(th1, th2))
## dput(a)

## a <- c(12L, 18L, 19L, 31L, 34L)
## b <- c(2,4,2,4,4)
a <- c(5L, 6L, 12L, 18L, 19L, 22L, 31L, 33L, 34L, 39L)
b <- c(4,4,2,4,2,4,4,4,4,2)
theta <- cbind(theta[1:40,],
               expand.grid(team=levels(db$home),
                           param=c("gamma","delta")))
## theta[a,]

## Legend for gamma and delta.
key.gd <- list(columns=2,
               title="Parameter", cex.title=1.1,
               points=list(
                   col=ps$superpose.line$col[1:2],
                   pch=ps$superpose.symbol$pch[1:2]),
               text=list(c(expression(gamma), expression(delta))))

xyplot(th2~th1, data=theta, groups=param,
       key=key.gd, aspect="iso",
       xlab="Parameter estimates in the 1 field parameter model",
       ylab="Parameter estimates in the 2 field parameter model")+
    layer(panel.abline(a=0, b=1, lty=2))+
             layer(panel.text(x=theta$th1[a], y=theta$th2[a], pos=b,
                              labels=as.character(theta$team[a])))

##----------------------------------------------------------------------

## Precision matrix.
## levelplot(-op1$hessian[-41,-41])

## Variace-covariance matrix.
## levelplot(-solve(op1$hessian)[-41,-41])

## Standard error of parameter estimates (so big).
## sqrt(diag(-solve(op1$hessian)))

priors1 <- expand.grid(teams=levels(db$home),
                       par=c("gamma", "delta"),
                       stringsAsFactors=FALSE)
priors1 <- rbind(priors1, c("homefield", "omega"))

priors1$est <- sqrt(op1$par)        ## Estimate.
priors1$info <- diag(-op1$hessian)  ## Precision.
priors1$var <- 1/priors1$info       ## Inverse precision.
priors1$sd <- sqrt(priors1$var)

## Spliting the table of priors1.
L <- split(subset(priors1, select=c("teams","est","var")),
           f=priors1$par)

## Using better names.
names(L$gamma)[-1] <- c("mu","sigma2")
names(L$delta)[-1] <- c("theta","psi2")
names(L$omega)[-1] <- c("mu0","sigma20")
priors1 <- merge(L$gamma, L$delta)
priors1$teams <- factor(priors1$teams, levels=levels(db$home))
attr(priors1, "hf") <- list(omega=L$omega)

arrange(priors1, -mu)

             teams        mu     sigma2     theta       psi2
1           Grêmio 1.1704702 0.01508992 1.0305714 0.02272856
2        São Paulo 1.0827305 0.01858343 0.9316207 0.01847316
3    Internacional 1.0730488 0.01873354 1.0534552 0.02349849
4           Santos 1.0706477 0.01896948 0.9815340 0.02029303
5       Fluminense 1.0697410 0.01886995 1.0517551 0.02340488
6      Corinthians 1.0692381 0.01888201 1.0562062 0.02362404
7         Cruzeiro 1.0468131 0.01980822 1.0517720 0.02334159
8         Flamengo 1.0438266 0.01996787 1.0338330 0.02247673
9         Botafogo 1.0436166 0.02004255 1.0016758 0.02105210
10     Atlético/MG 1.0160347 0.02130959 0.9595983 0.01932613
11     Atlético/GO 1.0072766 0.02166518 0.9703955 0.01971524
12 Grêmio Prudente 1.0053303 0.02188332 0.9153222 0.01776698
13         Vitória 1.0001962 0.02187710 1.0128639 0.02143241
14            Avaí 0.9999992 0.02205573 0.9406338 0.01861156
15           Goiás 0.9689446 0.02378966 0.8161485 0.01491161
16       Palmeiras 0.9679727 0.02324199 1.0414973 0.02266156
17           Ceará 0.9624213 0.02337781 1.0921834 0.02523726
18         Guarani 0.9606626 0.02368610 0.9943445 0.02057387
19     Atlético/PR 0.9573094 0.02374480 1.0327749 0.02223417
20           Vasco 0.8887643 0.02703450 1.0014575 0.02074934

attr(priors1, "hf")

$omega
       teams       mu0     sigma20
41 homefield 0.5706891 0.001790835

##----------------------------------------------------------------------

priors2 <- expand.grid(teams=levels(db$home),
                       par=c("gamma", "delta"),
                       stringsAsFactors=FALSE)
priors2 <- rbind(priors2,
                 c("homefield", "omega"),
                 c("awayfield", "kappa"))

priors2$est <- sqrt(op2$par)        ## Estimate.
priors2$info <- diag(-op2$hessian)  ## Precision.
priors2$var <- 1/priors2$info       ## Inverse precision.
priors2$sd <- sqrt(priors2$var)

## Spliting the table of priors2.
M <- split(subset(priors2, select=c("teams","est","var")),
           f=priors2$par)

## Using better names.
names(M$gamma)[-1] <- c("mu","sigma2")
names(M$delta)[-1] <- c("theta","psi2")
names(M$omega)[-1] <- c("mu0","sigma20")
names(M$kappa)[-1] <- c("theta0","psi20")
priors2 <- merge(M$gamma, M$delta)
priors2$teams <- factor(priors2$teams, levels=levels(db$home))
attr(priors2, "hf") <- list(omega=M$omega, kappa=M$kappa)

arrange(priors2, -mu)

             teams        mu     sigma2     theta       psi2
1           Grêmio 1.2112167 0.01409858 1.0468678 0.02316603
2       Fluminense 1.1140866 0.01766265 1.0517103 0.02304036
3           Santos 1.1104100 0.01789339 1.0043622 0.02089268
4      Corinthians 1.1060674 0.01794874 1.0695346 0.02390242
5    Internacional 1.0949948 0.01842895 1.0487199 0.02283848
6         Cruzeiro 1.0911195 0.01858203 1.0507398 0.02292424
7         Flamengo 1.0679679 0.01953303 1.0514961 0.02289608
8      Atlético/MG 1.0658090 0.01986755 0.9341200 0.01813147
9             Avaí 1.0481180 0.02061882 0.9366309 0.01818037
10     Atlético/GO 1.0461142 0.02065604 0.9582864 0.01893776
11       São Paulo 1.0353327 0.02119709 0.9275158 0.01784940
12 Grêmio Prudente 1.0294045 0.02156091 0.8858102 0.01653903
13         Vitória 1.0232924 0.02148392 1.0337333 0.02195597
14        Botafogo 1.0161645 0.02187710 0.9987225 0.02043324
15           Goiás 0.9932661 0.02340741 0.8098953 0.01448979
16       Palmeiras 0.9901638 0.02291448 1.0562719 0.02294128
17     Atlético/PR 0.9804923 0.02336547 1.0518798 0.02270994
18         Guarani 0.9641413 0.02448825 0.9100202 0.01716831
19           Vasco 0.9372557 0.02535904 1.0625224 0.02314297
20           Ceará 0.9033229 0.02686384 1.1137239 0.02580569

attr(priors2, "hf")

$omega
       teams       mu0     sigma20
41 homefield 0.5960475 0.001660106

$kappa
       teams    theta0       psi20
42 awayfield 0.3415379 0.002661267

Maximum likelihood estimation for 2010 season

tb1 <- 
    with(priors1,
         data.frame(
             teams=teams,
             "mu_sigma_1"=sprintf("%0.3f(%0.3f)", mu, sqrt(sigma2)),
             "theta_psi_1"=sprintf("%0.3f(%0.3f)", theta, sqrt(psi2))
         ))
tb2 <- 
    with(priors2,
         data.frame(
             teams=teams,
             "mu_sigma_2"=sprintf("%0.3f(%0.3f)", mu, sqrt(sigma2)),
             "theta_psi_2"=sprintf("%0.3f(%0.3f)", theta, sqrt(psi2))
         ))

tb <- merge(tb1, tb2)
tb$teams <- factor(tb$teams, levels=as.character(pl10$team))
tb <- arrange(tb, teams)

cat("\\begin{tabular}{lcccc}
\\hline
        & \\multicolumn{2}{c}{1 field parameter model}
        & \\multicolumn{2}{c}{2 field parameter model} \\\\ \\cline{2-5}
Teams   & \\multicolumn{1}{c}{$\\mu(\\sigma)$}
        & \\multicolumn{1}{c}{$\\theta(\\psi)$}
        & \\multicolumn{1}{c}{$\\mu(\\sigma)$}
        & \\multicolumn{1}{c}{$\\theta(\\psi)$} \\\\ \\hline")
print(xtable(x=tb, align="llcccc"), only.contents=TRUE,
      hline.after=0, include.rownames=FALSE, include.colnames=FALSE,
      sanitize.colnames.function=function(x){x})
cat(sprintf(
    "\\hline Field parameters &  &  &  &  \\\\
     $\\omega$ & \\multicolumn{2}{c}{%0.3f(%0.5f)}
               & \\multicolumn{2}{c}{%0.3f(%0.5f)} \\\\
     $\\kappa$ &           &
               & \\multicolumn{2}{c}{%0.3f(%0.5f)} \\\\ \\hline
      \\end{tabular}",
    attr(priors1, "hf")$omega[1,2], attr(priors1, "hf")$omega[1,3],
    attr(priors2, "hf")$omega[1,2], attr(priors2, "hf")$omega[1,3],
    attr(priors2, "hf")$kappa[1,2], attr(priors2, "hf")$kappa[1,3])
)

\begin{tabular}{lcccc}
\hline
        & \multicolumn{2}{c}{1 field parameter model}
        & \multicolumn{2}{c}{2 field parameter model} \\ \cline{2-5}
Teams   & \multicolumn{1}{c}{$\mu(\sigma)$}
        & \multicolumn{1}{c}{$\theta(\psi)$}
        & \multicolumn{1}{c}{$\mu(\sigma)$}
        & \multicolumn{1}{c}{$\theta(\psi)$} \\ \hline% latex table generated in R 3.2.2 by xtable 1.7-4 package
% Thu Sep 10 20:32:55 2015
  \hline
Fluminense & 1.070(0.137) & 1.052(0.153) & 1.114(0.133) & 1.052(0.152) \\ 
  Cruzeiro & 1.047(0.141) & 1.052(0.153) & 1.091(0.136) & 1.051(0.151) \\ 
  Corinthians & 1.069(0.137) & 1.056(0.154) & 1.106(0.134) & 1.070(0.155) \\ 
  Grêmio & 1.170(0.123) & 1.031(0.151) & 1.211(0.119) & 1.047(0.152) \\ 
  Atlético/PR & 0.957(0.154) & 1.033(0.149) & 0.980(0.153) & 1.052(0.151) \\ 
  Botafogo & 1.044(0.142) & 1.002(0.145) & 1.016(0.148) & 0.999(0.143) \\ 
  Internacional & 1.073(0.137) & 1.053(0.153) & 1.095(0.136) & 1.049(0.151) \\ 
  Santos & 1.071(0.138) & 0.982(0.142) & 1.110(0.134) & 1.004(0.145) \\ 
  São Paulo & 1.083(0.136) & 0.932(0.136) & 1.035(0.146) & 0.928(0.134) \\ 
  Palmeiras & 0.968(0.152) & 1.041(0.151) & 0.990(0.151) & 1.056(0.151) \\ 
  Vasco & 0.889(0.164) & 1.001(0.144) & 0.937(0.159) & 1.063(0.152) \\ 
  Ceará & 0.962(0.153) & 1.092(0.159) & 0.903(0.164) & 1.114(0.161) \\ 
  Atlético/MG & 1.016(0.146) & 0.960(0.139) & 1.066(0.141) & 0.934(0.135) \\ 
  Flamengo & 1.044(0.141) & 1.034(0.150) & 1.068(0.140) & 1.051(0.151) \\ 
  Avaí & 1.000(0.149) & 0.941(0.136) & 1.048(0.144) & 0.937(0.135) \\ 
  Atlético/GO & 1.007(0.147) & 0.970(0.140) & 1.046(0.144) & 0.958(0.138) \\ 
  Vitória & 1.000(0.148) & 1.013(0.146) & 1.023(0.147) & 1.034(0.148) \\ 
  Guarani & 0.961(0.154) & 0.994(0.143) & 0.964(0.156) & 0.910(0.131) \\ 
  Goiás & 0.969(0.154) & 0.816(0.122) & 0.993(0.153) & 0.810(0.120) \\ 
  Grêmio Prudente & 1.005(0.148) & 0.915(0.133) & 1.029(0.147) & 0.886(0.129) \\ 
  \hline Field parameters &  &  &  &  \\
     $\omega$ & \multicolumn{2}{c}{0.571(0.00179)}
               & \multicolumn{2}{c}{0.596(0.00166)} \\
     $\kappa$ &           &
               & \multicolumn{2}{c}{0.342(0.00266)} \\ \hline
      \end{tabular}

Classification in 2011 season

##----------------------------------------------------------------------

## Just the 2011 season.
dc <- droplevels(subset(da, yea==2011))
rownames(dc) <- NULL
str(dc)

'data.frame': 380 obs. of  10 variables:
 $ dat : POSIXct, format: "2011-05-21 00:00:00" ...
 $ rod : int  1 1 1 1 1 1 1 1 1 1 ...
 $ home: Factor w/ 20 levels "América/MG","Atlético/GO",..: 3 8 13 18 12 15 10 17 14 1 ...
 $ away: Factor w/ 20 levels "América/MG","Atlético/GO",..: 4 20 5 16 11 9 2 7 19 6 ...
 $ pla : chr  "3x0" "1x3" "4x0" "1x1" ...
 $ yea : int  2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
 $ h   : int  3 1 4 1 1 1 0 1 0 2 ...
 $ a   : int  0 3 0 1 0 2 1 0 2 1 ...
 $ d   : int  3 2 4 0 1 1 1 1 2 1 ...
 $ s   : int  3 4 4 2 1 3 1 1 2 3 ...

##----------------------------------------------------------------------
## http://esportes.terra.com.br/futebol/brasileiro/2011/seriea/classificacao_jogos/

## Rating and position in 2011 season.
Xh <- model.matrix(~-1+home, dc) ## h: home.
Xa <- model.matrix(~-1+away, dc) ## a: away.

## Place in 2011 season, from first to last.
gsgc <- cbind(scored=t(Xh)%*%dc$h+t(Xa)%*%dc$a,   ## goals scored
              conceded=t(Xa)%*%dc$h+t(Xh)%*%dc$a) ## goals conceded
colnames(gsgc) <- c("gScored", "gConced")

aux <- with(dc, {
    d <- h-a
    draw <- d==0
    winH <- d>0
    ## winA <- !draw & !winH
    winA <- !(draw | winH)
    x <- cbind(h=winH*3+draw, a=winA*3+draw)
    return(x)
})

## Lose Draw Win frequency
ldw <-
    do.call(rbind, lapply(tapply(aux[,1], dc$home, tableIn), as.vector))+
    do.call(rbind, lapply(tapply(aux[,2], dc$away, tableIn), as.vector))
colnames(ldw) <- c("Lose", "Draw", "Win")
ldw

              Lose Draw Win
América/MG      17   13   8
Atlético/GO     14   12  12
Atlético/MG     19    6  13
Atlético/PR     17   11  10
Avaí            21   10   7
Bahia           14   13  11
Botafogo        14    8  16
Ceará           19    9  10
Corinthians      9    8  21
Coritiba        13    9  16
Cruzeiro        17   10  11
Figueirense     10   13  15
Flamengo         7   16  15
Fluminense      15    3  20
Grêmio          16    9  13
Internacional   10   12  16
Palmeiras       10   17  11
Santos          15    8  15
São Paulo       11   11  16
Vasco            7   12  19

pl11 <- t(Xh)%*%aux[,"h"]+t(Xa)%*%aux[,"a"]
pl11 <- cbind(team=levels(dc$home), data.frame(pts=pl11))
## pl11$rk <- rank(pl11$pts, decreasing=TRUE)

pl11 <- cbind(pl11, ldw[,3:1], gsgc)

pl11 <- arrange(pl11, -pts)
pl11$pos <- rank(-pl11$pts)
pl11

            team pts Win Draw Lose gScored gConced  pos
1    Corinthians  71  21    8    9      53      36  1.0
2          Vasco  69  19   12    7      57      40  2.0
3     Fluminense  63  20    3   15      60      51  3.0
4       Flamengo  61  15   16    7      59      47  4.0
5  Internacional  60  16   12   10      57      43  5.0
6      São Paulo  59  16   11   11      57      46  6.0
7    Figueirense  58  15   13   10      46      45  7.0
8       Coritiba  57  16    9   13      57      41  8.0
9       Botafogo  56  16    8   14      52      49  9.0
10        Santos  53  15    8   15      55      55 10.0
11     Palmeiras  50  11   17   10      43      39 11.0
12   Atlético/GO  48  12   12   14      50      45 12.5
13        Grêmio  48  13    9   16      49      57 12.5
14         Bahia  46  11   13   14      43      49 14.0
15   Atlético/MG  45  13    6   19      50      60 15.0
16      Cruzeiro  43  11   10   17      48      51 16.0
17   Atlético/PR  41  10   11   17      38      55 17.0
18         Ceará  39  10    9   19      47      64 18.0
19    América/MG  37   8   13   17      51      69 19.0
20          Avaí  31   7   10   21      45      75 20.0

Descriptive table of the 2011 season

print(xtable(x=pl11,
             caption="A suitable caption here.",
             digits=0),
      include.rownames=FALSE)

% latex table generated in R 3.2.2 by xtable 1.7-4 package
% Thu Sep 10 20:32:55 2015
\begin{table}[ht]
\centering
\begin{tabular}{lrrrrrrr}
  \hline
team & pts & Win & Draw & Lose & gScored & gConced & pos \\ 
  \hline
Corinthians & 71 & 21 & 8 & 9 & 53 & 36 & 1 \\ 
  Vasco & 69 & 19 & 12 & 7 & 57 & 40 & 2 \\ 
  Fluminense & 63 & 20 & 3 & 15 & 60 & 51 & 3 \\ 
  Flamengo & 61 & 15 & 16 & 7 & 59 & 47 & 4 \\ 
  Internacional & 60 & 16 & 12 & 10 & 57 & 43 & 5 \\ 
  São Paulo & 59 & 16 & 11 & 11 & 57 & 46 & 6 \\ 
  Figueirense & 58 & 15 & 13 & 10 & 46 & 45 & 7 \\ 
  Coritiba & 57 & 16 & 9 & 13 & 57 & 41 & 8 \\ 
  Botafogo & 56 & 16 & 8 & 14 & 52 & 49 & 9 \\ 
  Santos & 53 & 15 & 8 & 15 & 55 & 55 & 10 \\ 
  Palmeiras & 50 & 11 & 17 & 10 & 43 & 39 & 11 \\ 
  Atlético/GO & 48 & 12 & 12 & 14 & 50 & 45 & 12 \\ 
  Grêmio & 48 & 13 & 9 & 16 & 49 & 57 & 12 \\ 
  Bahia & 46 & 11 & 13 & 14 & 43 & 49 & 14 \\ 
  Atlético/MG & 45 & 13 & 6 & 19 & 50 & 60 & 15 \\ 
  Cruzeiro & 43 & 11 & 10 & 17 & 48 & 51 & 16 \\ 
  Atlético/PR & 41 & 10 & 11 & 17 & 38 & 55 & 17 \\ 
  Ceará & 39 & 10 & 9 & 19 & 47 & 64 & 18 \\ 
  América/MG & 37 & 8 & 13 & 17 & 51 & 69 & 19 \\ 
  Avaí & 31 & 7 & 10 & 21 & 45 & 75 & 20 \\ 
   \hline
\end{tabular}
\caption{A suitable caption here.} 
\end{table}

Performance series

##----------------------------------------------------------------------
## Home and away points for each match.

colnames(aux) <- c("hpt", "apt")

dc <- cbind(dc, aux)
str(dc)

'data.frame': 380 obs. of  12 variables:
 $ dat : POSIXct, format: "2011-05-21 00:00:00" ...
 $ rod : int  1 1 1 1 1 1 1 1 1 1 ...
 $ home: Factor w/ 20 levels "América/MG","Atlético/GO",..: 3 8 13 18 12 15 10 17 14 1 ...
 $ away: Factor w/ 20 levels "América/MG","Atlético/GO",..: 4 20 5 16 11 9 2 7 19 6 ...
 $ pla : chr  "3x0" "1x3" "4x0" "1x1" ...
 $ yea : int  2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
 $ h   : int  3 1 4 1 1 1 0 1 0 2 ...
 $ a   : int  0 3 0 1 0 2 1 0 2 1 ...
 $ d   : int  3 2 4 0 1 1 1 1 2 1 ...
 $ s   : int  3 4 4 2 1 3 1 1 2 3 ...
 $ hpt : num  3 0 3 1 3 0 0 3 0 3 ...
 $ apt : num  0 3 0 1 0 3 3 0 3 0 ...

pfm <- rbind(
    with(dc, data.frame(rod=rod, team=home, pt=hpt, goals=h, adv=1)),
    with(dc, data.frame(rod=rod, team=away, pt=apt, goals=a, adv=0)))
pfm$team <- factor(pfm$team, levels=as.character(pl11$team))
str(pfm)

'data.frame':    760 obs. of  5 variables:
 $ rod  : int  1 1 1 1 1 1 1 1 1 1 ...
 $ team : Factor w/ 20 levels "Corinthians",..: 15 18 4 10 7 13 8 11 3 19 ...
 $ pt   : num  3 0 3 1 3 0 0 3 0 3 ...
 $ goals: int  3 1 4 1 1 1 0 1 0 2 ...
 $ adv  : num  1 1 1 1 1 1 1 1 1 1 ...

pfm$pch <- c(6,1,2)[match(pfm$pt, table=c(0,1,3))]

## By team and round.
## xyplot(adv~rod|team, data=pfm, col=1, pch=pfm$pch,
##        layout=c(4,5), as.table=TRUE,
##        ylab="y = 1: played at home, y = 0: played away",
##        xlab="Champonship round",
##        key=list(
##            text=list(c("Win", "Draw", "Lose")),
##            points=list(pch=c(2,1,6))
##        ))

Exploratory analysis to the 2011 season

##----------------------------------------------------------------------

## Distribution of goals.
histogram(~h+a+d+s, data=dc,
          breaks=with(dc, seq(min(d), max(s)+1, by=1)-0.5),
          xlab="Goals",
          ylab="Relative frequency (%)",
          strip=strip.custom(
              factor.levels=c("Goals of Home","Goals of Away",
                  "Absolute difference","Total of goals")))

## Classes of goals: 0, 1, ..., 3, >3.
cutin <- function(z){
    cut(z, breaks=c(0:4, Inf),
        right=FALSE, include.lowest=TRUE)
}

## Crossed frequency.
xt <- xtabs(~cutin(h)+cutin(a), data=dc)
addmargins(xt)

         cutin(a)
cutin(h)  [0,1) [1,2) [2,3) [3,4) [4,Inf] Sum
  [0,1)      26    26    13     6       2  73
  [1,2)      38    52    25     8       1 124
  [2,3)      32    40    24     6       2 104
  [3,4)      16    15    13     3       1  48
  [4,Inf]    13    11     4     1       2  31
  Sum       125   144    79    24       8 380

## Remove objects.
rm(xt, cutin)

Prior from 2010 to the 2011 season

##----------------------------------------------------------------------
## Set operation.

## Teams in 2010 that aren't in 2011 (the worst in seria A).
wA <- setdiff(levels(db$home), levels(dc$home))
wA

[1] "Goiás"              "Grêmio Prudente"    "Guarani"           
[4] "Vitória"

## The bests of serie B, came to 2011 season.
bB <- setdiff(levels(dc$home), levels(db$home))
bB

[1] "América/MG"    "Bahia"         "Coritiba"      "Figueirense"

##----------------------------------------------------------------------
## Prior for the new teams, those in serie B that went to seria A.

## Save the priors of the teams in 2010.
priors_2010 <- list(priors1, priors2)
str(priors_2010)

List of 2
 $ :'data.frame':   20 obs. of  5 variables:
  ..$ teams : Factor w/ 20 levels "Atlético/GO",..: 1 2 3 4 5 6 7 8 9 10 ...
  ..$ mu    : num [1:20] 1.007 1.016 0.957 1 1.044 ...
  ..$ sigma2: num [1:20] 0.0217 0.0213 0.0237 0.0221 0.02 ...
  ..$ theta : num [1:20] 0.97 0.96 1.033 0.941 1.002 ...
  ..$ psi2  : num [1:20] 0.0197 0.0193 0.0222 0.0186 0.0211 ...
  ..- attr(*, "hf")=List of 1
  .. ..$ omega:'data.frame':    1 obs. of  3 variables:
  .. .. ..$ teams  : chr "homefield"
  .. .. ..$ mu0    : num 0.571
  .. .. ..$ sigma20: num 0.00179
 $ :'data.frame':   20 obs. of  5 variables:
  ..$ teams : Factor w/ 20 levels "Atlético/GO",..: 1 2 3 4 5 6 7 8 9 10 ...
  ..$ mu    : num [1:20] 1.05 1.07 0.98 1.05 1.02 ...
  ..$ sigma2: num [1:20] 0.0207 0.0199 0.0234 0.0206 0.0219 ...
  ..$ theta : num [1:20] 0.958 0.934 1.052 0.937 0.999 ...
  ..$ psi2  : num [1:20] 0.0189 0.0181 0.0227 0.0182 0.0204 ...
  ..- attr(*, "hf")=List of 2
  .. ..$ omega:'data.frame':    1 obs. of  3 variables:
  .. .. ..$ teams  : chr "homefield"
  .. .. ..$ mu0    : num 0.596
  .. .. ..$ sigma20: num 0.00166
  .. ..$ kappa:'data.frame':    1 obs. of  3 variables:
  .. .. ..$ teams : chr "awayfield"
  .. .. ..$ theta0: num 0.342
  .. .. ..$ psi20 : num 0.00266

priors1 <- droplevels(subset(priors1, !teams%in%wA))

## The team at 16th position.
t16 <- subset(priors1, teams==as.character(th16$team))

t16 <- t16[rep(1, 4),]
t16$teams <- bB

## Priors1 for all teams in the 2011 season.
priors1 <- rbind(priors1, t16)
rownames(priors1) <- NULL
priors1

           teams        mu     sigma2     theta       psi2
1    Atlético/GO 1.0072766 0.02166518 0.9703955 0.01971524
2    Atlético/MG 1.0160347 0.02130959 0.9595983 0.01932613
3    Atlético/PR 0.9573094 0.02374480 1.0327749 0.02223417
4           Avaí 0.9999992 0.02205573 0.9406338 0.01861156
5       Botafogo 1.0436166 0.02004255 1.0016758 0.02105210
6          Ceará 0.9624213 0.02337781 1.0921834 0.02523726
7    Corinthians 1.0692381 0.01888201 1.0562062 0.02362404
8       Cruzeiro 1.0468131 0.01980822 1.0517720 0.02334159
9       Flamengo 1.0438266 0.01996787 1.0338330 0.02247673
10    Fluminense 1.0697410 0.01886995 1.0517551 0.02340488
11        Grêmio 1.1704702 0.01508992 1.0305714 0.02272856
12 Internacional 1.0730488 0.01873354 1.0534552 0.02349849
13     Palmeiras 0.9679727 0.02324199 1.0414973 0.02266156
14        Santos 1.0706477 0.01896948 0.9815340 0.02029303
15     São Paulo 1.0827305 0.01858343 0.9316207 0.01847316
16         Vasco 0.8887643 0.02703450 1.0014575 0.02074934
17    América/MG 1.0072766 0.02166518 0.9703955 0.01971524
18         Bahia 1.0072766 0.02166518 0.9703955 0.01971524
19      Coritiba 1.0072766 0.02166518 0.9703955 0.01971524
20   Figueirense 1.0072766 0.02166518 0.9703955 0.01971524

## Prior mean and variance for the home-field advantage parameter.
attr(priors1, "hf") <- list(omega=L$omega)
attr(priors1, "hf")

$omega
       teams       mu0     sigma20
41 homefield 0.5706891 0.001790835

## Same order in the factor levels.
priors1$teams <- factor(as.character(priors1$teams),
                        levels=levels(dc$home))

##----------------------------------------------------------------------

priors2 <- droplevels(subset(priors2, !teams%in%wA))

## Priors2 for all teams in the 2011 season.
priors2 <- rbind(priors2, t16)
rownames(priors2) <- NULL
priors2

           teams        mu     sigma2     theta       psi2
1    Atlético/GO 1.0461142 0.02065604 0.9582864 0.01893776
2    Atlético/MG 1.0658090 0.01986755 0.9341200 0.01813147
3    Atlético/PR 0.9804923 0.02336547 1.0518798 0.02270994
4           Avaí 1.0481180 0.02061882 0.9366309 0.01818037
5       Botafogo 1.0161645 0.02187710 0.9987225 0.02043324
6          Ceará 0.9033229 0.02686384 1.1137239 0.02580569
7    Corinthians 1.1060674 0.01794874 1.0695346 0.02390242
8       Cruzeiro 1.0911195 0.01858203 1.0507398 0.02292424
9       Flamengo 1.0679679 0.01953303 1.0514961 0.02289608
10    Fluminense 1.1140866 0.01766265 1.0517103 0.02304036
11        Grêmio 1.2112167 0.01409858 1.0468678 0.02316603
12 Internacional 1.0949948 0.01842895 1.0487199 0.02283848
13     Palmeiras 0.9901638 0.02291448 1.0562719 0.02294128
14        Santos 1.1104100 0.01789339 1.0043622 0.02089268
15     São Paulo 1.0353327 0.02119709 0.9275158 0.01784940
16         Vasco 0.9372557 0.02535904 1.0625224 0.02314297
17    América/MG 1.0072766 0.02166518 0.9703955 0.01971524
18         Bahia 1.0072766 0.02166518 0.9703955 0.01971524
19      Coritiba 1.0072766 0.02166518 0.9703955 0.01971524
20   Figueirense 1.0072766 0.02166518 0.9703955 0.01971524

## Prior mean and variance for the home-field advantage parameter.
attr(priors2, "hf") <- list(omega=M$omega, kappa=M$kappa)
attr(priors2, "hf")

$omega
       teams       mu0     sigma20
41 homefield 0.5960475 0.001660106

$kappa
       teams    theta0       psi20
42 awayfield 0.3415379 0.002661267

## Same order in the factor levels.
priors2$teams <- factor(as.character(priors2$teams),
                        levels=levels(dc$home))

## Remove objects.
rm(wA, bB, t16, L, M)

## Check.
## all.equal(levels(db$home), levels(db$away))
## cbind(levels(dc$home), levels(dc$away), levels(priors1$teams))

Visualize the priors

##----------------------------------------------------------------------

priors12 <- rbind(cbind(priors1, pr=1),
                  cbind(priors2, pr=2))
priors12$teams <- factor(priors12$teams,
                         levels=as.character(pl11$team))

key.gd <- list(columns=1,
               title="Model", cex.title=1.1,
               points=list(
                   col=ps$superpose.line$col[1:2],
                   pch=c(1,5)),
               text=list(c("1 field parameter",
                   "2 field parameter")))

xyplot(mu+sigma2+theta+psi2~teams, outer=TRUE,
       data=priors12, groups=pr, pch=c(1,5), key=key.gd,
       xlab="Teams", ylab="Prior", as.table=TRUE,
       scales=list(y="free", x=list(rot=90)),
       strip=strip.custom(
           factor.levels=expression(
               mu*": prior mean for"~gamma,
               sigma^2*": prior variance for"~gamma,
               theta*": prior mean for"~delta,
               psi^2*": prior variance for"~delta)))

##----------------------------------------------------------------------
## Prior densities.

## str(priors12)
## xtabs(~teams+pr, priors12)

L <- apply(priors12[,-1], MARGIN=1,
           FUN=function(pars){
               x <- seq(0.2, 1.8, length.out=50)
               ygamma <- dnorm(x, mean=pars[1], sd=sqrt(pars[2]))
               ydelta <- dnorm(x, mean=pars[3], sd=sqrt(pars[4]))
               data.frame(x=x, ygamma=ygamma, ydelta=ydelta, pr=pars[5])
           })

names(L) <- rep(levels(priors12$teams), times=2)
L <- ldply(L, .id="teams")
str(L)

'data.frame':  2000 obs. of  5 variables:
 $ teams : Factor w/ 20 levels "Corinthians",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ x     : num  0.2 0.233 0.265 0.298 0.331 ...
 $ ygamma: num  7.96e-07 2.62e-06 8.23e-06 2.46e-05 6.98e-05 ...
 $ ydelta: num  8.25e-07 2.88e-06 9.50e-06 2.97e-05 8.81e-05 ...
 $ pr    : num  1 1 1 1 1 1 1 1 1 1 ...

## Legend for gamma and delta.
key.gd <- list(columns=1,
               title="Parameter", cex.title=1.1,
               lines=list(
                   col=ps$superpose.line$col[1:2]),
               text=list(c(expression(gamma), expression(delta))))

## Legend for 1 and 2 field parameter models.
key.12 <- list(columns=1,
               title="Model", cex.title=1.1,
               lines=list(
                   lty=1:2),
               text=list(c("1 field parameter",
                   "2 field parameter")))

## Create a grob by merging the two keys (latticeExtra).
keys <- mergedTrellisLegendGrob(
    a=list(fun=draw.key, args=list(key=key.gd)),
    b=list(fun=draw.key, args=list(key=key.12)),
    vertical=FALSE)

xyplot(ygamma+ydelta~x|teams, data=subset(L, pr==1),
       type="l", as.table=TRUE, layout=c(4,5),
       xlab="Parameter space",
       ylab="Prior density",
       legend=list(top=list(fun=keys))
       )+
    as.layer(xyplot(ygamma+ydelta~x|teams, data=subset(L, pr==2),
                    type="l", lty=2))

## About multiple legends:
## https://procomun.wordpress.com/2012/02/18/maps_with_r_1/

MAP-1 model fitting and results

Parameter estimates

##----------------------------------------------------------------------

## Number of teams and number of rounds.
nteams <- nlevels(db$home)
nrounds <- length(unique(db$rod))
c(nteams, nrounds)

[1] 20 38

## gamma: teams attacking strength.
## delta: teams deffensive strength.
## omega: home-field advantage parameter.
gamma <- matrix(numeric(), nrow=nteams, ncol=nrounds+1)
delta <- gamma
omega <- numeric(nrounds)

## To store values in each match inside a round.
lambdaH <- matrix(numeric(), nrow=nteams/2, ncol=nrounds)
lambdaA <- lambdaH

i <- 1
gamma[,i] <- priors1$mu
delta[,i] <- priors1$theta
omega[i] <- attr(priors1, "hf")$omega$mu0

## Fitting for each round.
for(i in 1:nrounds){
    ##-------------------------------------------
    ## Subset each round (10 matches).
    ## 
    dbi <- subset(dc, rod==i)
    ##
    ##-------------------------------------------
    ## Model matrices, which teams confronted each other.
    ## 
    Xh <- model.matrix(~-1+home, data=dbi)
    Xa <- model.matrix(~-1+away, data=dbi)
    ##
    ##-------------------------------------------
    ## Columns of Home and Away teams.
    ##
    posh <- apply(apply(Xh, MARGIN=1, FUN="==", 1), MARGIN=2, FUN=which)
    posa <- apply(apply(Xa, MARGIN=1, FUN="==", 1), MARGIN=2, FUN=which)
    ##
    ##-------------------------------------------
    ## Lambda values.
    ##
    lambdaH[,i] <- as.vector(
        exp(Xh%*%gamma[,i]-Xa%*%delta[,i]+omega[i])
        )
    lambdaA[,i] <- as.vector(
        exp(Xa%*%gamma[,i]-Xh%*%delta[,i])
        )
    ##
    ##-------------------------------------------
    ## Goals expected and goals scored.
    ## 
    ## cbind(goalRateHome=lambdaA[,i], goalRateAway=lambdaB[,i],
    ##       goalScorHome=dbi$x, goalScorAway=dbi$y)
    ##
    ##-------------------------------------------
    ## Updating priors for the next round.
    ##
    gamma[posh,i+1] <- 
        Xh%*%gamma[,i]+Xh%*%priors1$sigma2*(dbi$h-lambdaH[,i])
    gamma[posa,i+1] <- 
        Xa%*%gamma[,i]+Xa%*%priors1$sigma2*(dbi$a-lambdaA[,i])
    delta[posh,i+1] <- 
        Xh%*%delta[,i]-Xh%*%priors1$psi2*(dbi$h-lambdaA[,i])
    delta[posa,i+1] <- 
        Xa%*%delta[,i]-Xa%*%priors1$psi2*(dbi$h-lambdaH[,i])
    omega[i+1] <- 
        omega[i]+attr(priors1, "hf")$omega$sigma20*
            sum(dbi$h-lambdaH[,i])
}

gamma <- gamma[,-1]
delta <- delta[,-1]
omega <- omega[-1]

##----------------------------------------------------------------------

## Series for all teams parameters along the rounds.
dd <- melt(data=as.data.frame(delta), value.name="delta")
dd <- cbind(
    melt(data=cbind(team=levels(dc$home), as.data.frame(gamma)),
         id.vars="team", variable.name="rod", value.name ="gamma"),
    delta=dd$delta)
dd$rod <- as.integer(dd$rod)
dd$team <- factor(dd$team, levels=as.character(pl11$team))
str(dd)

'data.frame': 760 obs. of  4 variables:
 $ team : Factor w/ 20 levels "Corinthians",..: 19 12 15 17 20 14 9 18 1 8 ...
 $ rod  : int  1 1 1 1 1 1 1 1 1 1 ...
 $ gamma: num  1.015 1.017 0.986 0.979 1.024 ...
 $ delta: num  0.951 0.998 0.988 0.918 0.953 ...

sc <- list(x=list(at=c(1,10,20,30,38)))

## Legend for gamma and delta.
key.gd <- list(columns=2,
               title="Parameter", cex.title=1.1,
               type="o", divide=1,
               lines=list(
                   col=ps$superpose.line$col[1:2],
                   pch=ps$superpose.symbol$pch[1:2]),
               text=list(c(expression(gamma), expression(delta))))

## All teams.
p1 <- 
    xyplot(gamma+delta~rod|team, data=dd, type="l",
           as.table=TRUE, layout=c(4,5),
           scales=sc,
           key=c(columns=1, space="right", key.gd[c(-1,-4,-5)]),
           xlab="Championship round",
           ylab="Parameter value")

p2 <- 
    xyplot(adv~rod|team, data=pfm, col=1, cex=0.6, pch=pfm$pch,
           layout=c(4,5), as.table=TRUE,
           scales=list(y=list(at=0:1, labels=c("A","H"))),
           ylab="y = 1: played at home, y = 0: played away",
           xlab="Champonship round",
           key=list(
               ## corner=c(0.1, 0.9),
               space="right",
               title="Outcome", cex.title=1.1, columns=1,
               points=list(pch=c(2,1,6), cex=0.6),
               text=list(c("Win", "Draw", "Lose"))
           ))

doubleYScale(p1, p2, use.style=FALSE)

## Home field parameter.
xyplot(omega~seq_along(omega), type="b",
       scales=sc,
       xlab="Championship round",
       ylab="Home field advantage parameter")

##----------------------------------------------------------------------

## Select som teams to a depper view, 2 bests and 2 worsts.
sel <- as.character(pl11$team[c(1,2,19,20)])
de <- droplevels(subset(dd, team%in%sel))

## Selected teams.
xyplot(gamma+delta~rod|team,
       data=de, type="o",
       as.table=TRUE,
       scales=sc,
       key=key.gd,
       xlab="Championship round",
       ylab="Parameter value")

key.tm <- list(columns=2,
               title="Teams", cex.title=1.1,
               type="o", divide=1,
               lines=list(
                   col=ps$superpose.line$col[1:4],
                   pch=ps$superpose.symbol$pch[1:4]),
               text=list(levels(de$team)))

xyplot(gamma+delta~rod, outer=TRUE, groups=team,
       data=de, type="o",
       as.table=TRUE,
       scales=sc,
       key=key.tm,
       strip=strip.custom(factor.levels=key.gd$text[[1]]),
       xlab="Championship round",
       ylab="Parameter value")

Ratings

##----------------------------------------------------------------------
## Rating series.

## Rating along the matches.
dd$rating <- with(dd, gamma+delta)
de$rating <- with(de, gamma+delta)

xyplot(rating~rod|team,
       data=dd, type="l", cex=0.8,
       as.table=TRUE,
       scales=sc,
       xlab="Championship round",
       ylab=expression("Rating"~(gamma+delta)))

xyplot(rating~rod, groups=team,
       data=de, type="o", cex=0.8,
       as.table=TRUE,
       scales=sc,
       key=key.tm,
       strip=strip.custom(factor.levels=key.gd$text[[1]]),
       xlab="Championship round",
       ylab=expression("Rating"~(gamma+delta)))

##----------------------------------------------------------------------

## Average rating.
rtm <- arrange(aggregate(rating~team, data=dd, FUN=mean), -rating)
names(rtm)[2] <- "averating"

## Final rating.
frat <- subset(dd, rod==max(dd$rod))
names(frat)[5] <- "finrating"

## Final, average rating and rank.
fr <- arrange(merge(merge(pl11, frat, by="team"),
                    rtm, by="team"), -finrating)
fr

            team pts Win Draw Lose gScored gConced  pos rod     gamma
1    Figueirense  58  15   13   10      46      45  7.0  38 0.9206398
2    Corinthians  71  21    8    9      53      36  1.0  38 0.9619856
3     Fluminense  63  20    3   15      60      51  3.0  38 1.1424211
4       Flamengo  61  15   16    7      59      47  4.0  38 1.0061927
5  Internacional  60  16   12   10      57      43  5.0  38 0.9499984
6          Vasco  69  19   12    7      57      40  2.0  38 1.0268242
7      Palmeiras  50  11   17   10      43      39 11.0  38 0.8161049
8      São Paulo  59  16   11   11      57      46  6.0  38 1.0153874
9    Atlético/GO  48  12   12   14      50      45 12.5  38 0.9690717
10      Cruzeiro  43  11   10   17      48      51 16.0  38 1.0069974
11      Coritiba  57  16    9   13      57      41  8.0  38 1.0113464
12         Bahia  46  11   13   14      43      49 14.0  38 0.8243926
13        Santos  53  15    8   15      55      55 10.0  38 1.0340808
14        Grêmio  48  13    9   16      49      57 12.5  38 0.9984186
15      Botafogo  56  16    8   14      52      49  9.0  38 0.9667164
16         Ceará  39  10    9   19      47      64 18.0  38 0.9228029
17   Atlético/MG  45  13    6   19      50      60 15.0  38 0.9499567
18          Avaí  31   7   10   21      45      75 20.0  38 0.8980071
19    América/MG  37   8   13   17      51      69 19.0  38 0.9934476
20   Atlético/PR  41  10   11   17      38      55 17.0  38 0.7904245
       delta finrating averating
1  1.1113103  2.031950  1.992404
2  1.0460642  2.008050  2.073132
3  0.8200423  1.962463  1.933153
4  0.9101695  1.916362  2.005515
5  0.9575517  1.907550  1.979122
6  0.8801595  1.906984  1.952883
7  1.0652232  1.881328  1.906771
8  0.8354563  1.850844  2.004622
9  0.8796258  1.848698  1.935937
10 0.8338494  1.840847  2.068277
11 0.8282709  1.839617  1.943582
12 1.0132323  1.837625  1.930973
13 0.7790268  1.813108  1.877899
14 0.7472485  1.745667  1.860308
15 0.7639816  1.730698  1.975777
16 0.7806327  1.703436  1.826833
17 0.7077996  1.657756  1.830839
18 0.7551507  1.653158  1.812300
19 0.6500896  1.643537  1.790881
20 0.8479490  1.638374  1.777615

xyplot(finrating+averating~pts, data=fr,
       auto.key=TRUE, type=c("p","smooth"))

splom(fr[,c("pts","finrating","averating")],
      type=c("p","smooth"))

cor(fr[,c("pts","finrating","averating")],
    method="spearman")

                pts finrating averating
pts       1.0000000 0.8476872 0.7273412
finrating 0.8476872 1.0000000 0.7924812
averating 0.7273412 0.7924812 1.0000000

Estimated probabilities and DeFinetti measure

##-----------------------------------------------------------------------------
## Who have more chance to gain, Palmeiras or Corinthians?

prob <- function(gh, ga, theta, hf=FALSE){
    ## 1: attack home
    ## 2: defense home
    ## 3: attack away
    ## 4: defense away
    ## 5: home field advantage
    if(hf){
        ph <- dpois(gh, lambda=exp(theta[5]+theta[1]-theta[4]))
    } else {
        ph <- dpois(gh, lambda=exp(theta[1]-theta[4]))
    }
    pa <- dpois(ga, lambda=exp(theta[3]-theta[2]))
    ph*pa
}
Prob <- Vectorize(prob, c("gh", "ga"))
rm("prob")

i <- sel[c(1,4)]

sel <- subset(fr, team%in%i,
              select=c("team", "gamma","delta"))
sel <- split(sel, f=as.character(sel$team))
sel <- sel[i]
theta <- unlist(lapply(sel, "[", -1))
theta <- c(theta, omega=tail(omega, 1))
theta

Corinthians.gamma Corinthians.delta        Avaí.gamma 
        0.9619856         1.0460642         0.8980071 
       Avaí.delta             omega 
        0.7551507         0.4468447

g <- 0:10
## pla <- outer(g, g, function(x, y) Prob(gh=x, ga=y, theta))
pla <- outer(g, g, function(x, y) Prob(gh=x, ga=y, theta, hf=TRUE))
sum(pla) ## must be equal to 1.

[1] 0.9999942

res <- data.frame(outcome=c("homeWin","draw","awayWin"),
                  prob=c(sum(pla[lower.tri(pla)]),
                         sum(diag(pla)),
                         sum(pla[upper.tri(pla)])))
res$outcome <- factor(res$outcome, levels=levels(res$outcome)[3:1])

barchart(prob~outcome, data=res, main=paste(i, collapse=" vs "),
         horizontal=FALSE, stack=FALSE, origin=0,
         xlab="Outcome of the match",
         ylab="Predicted probability")

## The outcome with highest probability.
m <- which(pla==max(pla), arr.ind=TRUE); m

     row col
[1,]   2   1

##----------------------------------------------------------------------

## Combines team and round in a new variable.
dg <- dc
dg <- transform(dg,
                hr=paste(home, rod, sep="."),
                ar=paste(away, rod, sep="."))
dd$tr <- with(dd, paste(team, rod, sep="."))

dg <- merge(dg,
            subset(dd, select=c("gamma","delta","tr")),
            by.x="hr", by.y="tr")
names(dg)[ncol(dg)-1:0] <- c("hgamma", "hdelta")

dg <- merge(dg,
            subset(dd, select=c("gamma","delta","tr")),
            by.x="ar", by.y="tr")
names(dg)[ncol(dg)-1:0] <- c("agamma", "adelta")

omg <- data.frame(omega=omega, rod=seq_along(omega))
dg <- merge(dg, omg, by="rod")

dg <- arrange(dg, dat)
str(dg)

'data.frame': 380 obs. of  19 variables:
 $ rod   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ ar    : Factor w/ 380 levels "América/MG.10",..: 77 58 286 362 343 20 96 191 115 153 ...
 $ hr    : Factor w/ 380 levels "América/MG.1",..: 229 39 324 134 248 172 1 210 305 267 ...
 $ dat   : POSIXct, format: "2011-05-21 00:00:00" ...
 $ home  : Factor w/ 20 levels "América/MG","Atlético/GO",..: 13 3 18 8 14 10 1 12 17 15 ...
 $ away  : Factor w/ 20 levels "América/MG","Atlético/GO",..: 5 4 16 20 19 2 6 11 7 9 ...
 $ pla   : chr  "4x0" "3x0" "1x1" "1x3" ...
 $ yea   : int  2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
 $ h     : int  4 3 1 1 0 0 2 1 1 1 ...
 $ a     : int  0 0 1 3 2 1 1 0 0 2 ...
 $ d     : int  4 3 0 2 2 1 1 1 1 1 ...
 $ s     : int  4 3 2 4 2 1 3 1 1 3 ...
 $ hpt   : num  3 3 1 0 0 0 3 3 3 0 ...
 $ apt   : num  0 0 1 3 3 3 0 0 0 3 ...
 $ hgamma: num  1.021 0.986 0.99 1.029 1.034 ...
 $ hdelta: num  0.974 0.988 0.969 1.051 1.002 ...
 $ agamma: num  1.024 0.979 0.891 1.052 1.028 ...
 $ adelta: num  0.953 0.918 1.018 0.988 1.009 ...
 $ omega : num  0.563 0.563 0.563 0.563 0.563 ...

##----------------------------------------------------------------------

## HomeWin Draw AwayWin matrix.
hda <- with(dg, {
    d <- h-a
    draw <- d==0
    winH <- d>0
    ## winA <- !draw & !winH
    winA <- !(draw | winH)
    x <- cbind(winH, draw, winA)
    return(1*x)
})

##----------------------------------------------------------------------
## Estimated probability of HomeWin Draw AwayWin matrix.

## NOTE: this uses the home field advatage parameter.
ephda.hf <- t(apply(
    dg[, c("hgamma","hdelta","agamma","adelta","omega")],
    MARGIN=1,
    FUN=function(i){
        g <- 0:10
        pla <- outer(g, g,
                     function(x, y){
                         Prob(gh=x, ga=y, theta=i, hf=TRUE)
                     })
        res <- c(homeWin=sum(pla[lower.tri(pla)]),
                 draw=sum(diag(pla)),
                 awayWin=sum(pla[upper.tri(pla)]))
        return(res)
    }))

## NOTE: this DOESN'T use the home field advatage parameter.
ephda <- t(apply(
    dg[, c("hgamma","hdelta","agamma","adelta","omega")],
    MARGIN=1,
    FUN=function(i){
        g <- 0:10
        pla <- outer(g, g,
                     function(x, y){
                         Prob(gh=x, ga=y, theta=i, hf=FALSE)
                     })
        res <- c(homeWin=sum(pla[lower.tri(pla)]),
                 draw=sum(diag(pla)),
                 awayWin=sum(pla[upper.tri(pla)]))
        return(res)
    }))
## head(ephda)

## Per match DeFinetti Index.
dfi.hf <- rowSums((ephda.hf-hda)^2)
dfi <- rowSums((ephda-hda)^2)

xyplot(dfi.hf+dfi~rod, data=dg, outer=TRUE,
       xlab="Round", ylab="DeFinetti Index")

## Average DeFinetti Index.
## Lazy model: Win Draw Lose = 1/3 in each, so DeFinetti Index is
c("with hf"=mean(dfi.hf), "no hf"=mean(dfi), "Lazy"=(2/3)^2+2*(1/3)^2)

  with hf     no hf      Lazy 
0.6137259 0.6446163 0.6666667

## table(cut(dfi.hf, breaks=c(0, 2/3, 2)))
## table(cut(dfi, breaks=c(0, 2/3, 2)))

addmargins(prop.table(
    table(cut(dfi.hf, breaks=c(0, 2/3, 2)),
          cut(dfi, breaks=c(0, 2/3, 2)))))

           
            (0,0.667] (0.667,2]       Sum
  (0,0.667] 0.3710526 0.1157895 0.4868421
  (0.667,2] 0.1921053 0.3210526 0.5131579
  Sum       0.5631579 0.4368421 1.0000000

## NOTE: the DeFinetti mean measure is not a good choice to evaluate the
## model prediction capability.

xyplot(dfi.hf+dfi~factor(hpt, levels=c(3,1,0), labels=c("W","D","L")),
       data=dg, layout=c(NA,1),
       outer=TRUE, jitter.x=TRUE,
       ylab="DeFinetti Index",
       xlab="Outcome for the home team of the match",
       strip=strip.custom(
           factor.levels=c(
               "with field parameters", "without field parameters")))+
    layer(panel.abline(h=2/3, lty=2))

##----------------------------------------------------------------------
## Saving the results of MAP-1 approach.

map1 <- list(dd=dd, fr=fr, omega=omega, dg=dg, dfi=dfi)

MAP-2 model fitting and results

Parameter estimates

##----------------------------------------------------------------------

## Number of teams and number of rounds.
nteams <- nlevels(db$home)
nrounds <- length(unique(db$rod))
c(nteams, nrounds)

[1] 20 38

## gamma: teams attacking strength.
## delta: teams deffensive strength.
## omega: home-field advantage parameter.
gamma <- matrix(numeric(), nrow=nteams, ncol=nrounds+1)
delta <- gamma
omega <- numeric(nrounds)
kappa <- omega

## To store values in each match inside a round.
lambdaH <- matrix(numeric(), nrow=nteams/2, ncol=nrounds)
lambdaA <- lambdaH

i <- 1
gamma[,i] <- priors2$mu
delta[,i] <- priors2$theta
omega[i] <- attr(priors2, "hf")$omega$mu0
kappa[i] <- attr(priors2, "hf")$kappa$theta0

## Fitting for each round.
for(i in 1:nrounds){
    ##-------------------------------------------
    ## Subset each round (10 matches).
    ## 
    dbi <- subset(dc, rod==i)
    ##
    ##-------------------------------------------
    ## Model matrices, which teams confronted each other.
    ## 
    Xh <- model.matrix(~-1+home, data=dbi)
    Xa <- model.matrix(~-1+away, data=dbi)
    ##
    ##-------------------------------------------
    ## Columns of Home and Away teams.
    ##
    posh <- apply(apply(Xh, MARGIN=1, FUN="==", 1), MARGIN=2, FUN=which)
    posa <- apply(apply(Xa, MARGIN=1, FUN="==", 1), MARGIN=2, FUN=which)
    ##
    ##-------------------------------------------
    ## Lambda values.
    ##
    lambdaH[,i] <- as.vector(
        exp(Xh%*%gamma[,i]-Xa%*%delta[,i]+omega[i])
        )
    lambdaA[,i] <- as.vector(
        exp(Xa%*%gamma[,i]-Xh%*%delta[,i])
        )
    ##
    ##-------------------------------------------
    ## Goals expected and goals scored.
    ## 
    ## cbind(goalRateHome=lambdaA[,i], goalRateAway=lambdaB[,i],
    ##       goalScorHome=dbi$x, goalScorAway=dbi$y)
    ##
    ##-------------------------------------------
    ## Updating priors for the next round.
    ##
    gamma[posh,i+1] <- 
        Xh%*%gamma[,i]+Xh%*%priors2$sigma2*(dbi$h-lambdaH[,i])
    gamma[posa,i+1] <- 
        Xa%*%gamma[,i]+Xa%*%priors2$sigma2*(dbi$a-lambdaA[,i])
    delta[posh,i+1] <- 
        Xh%*%delta[,i]-Xh%*%priors2$psi2*(dbi$h-lambdaA[,i])
    delta[posa,i+1] <- 
        Xa%*%delta[,i]-Xa%*%priors2$psi2*(dbi$h-lambdaH[,i])
    omega[i+1] <- 
        omega[i]+attr(priors2, "hf")$omega$sigma20*
            sum(dbi$h-lambdaH[,i])
    kappa[i+1] <- 
        kappa[i]-attr(priors2, "hf")$kappa$psi20*
            sum(dbi$a-lambdaA[,i])    
}

gamma <- gamma[,-1]
delta <- delta[,-1]
omega <- omega[-1]
kappa <- kappa[-1]

##----------------------------------------------------------------------

## Series for all teams parameters along the rounds.
dd <- melt(data=as.data.frame(delta), value.name="delta")
dd <- cbind(
    melt(data=cbind(team=levels(dc$home), as.data.frame(gamma)),
         id.vars="team", variable.name="rod", value.name ="gamma"),
    delta=dd$delta)
dd$rod <- as.integer(dd$rod)
dd$team <- factor(dd$team, levels=as.character(pl11$team))
str(dd)

'data.frame': 760 obs. of  4 variables:
 $ team : Factor w/ 20 levels "Corinthians",..: 19 12 15 17 20 14 9 18 1 8 ...
 $ rod  : int  1 1 1 1 1 1 1 1 1 1 ...
 $ gamma: num  1.052 1.066 1.006 1.028 0.995 ...
 $ delta: num  0.938 0.974 1.006 0.917 0.954 ...

sc <- list(x=list(at=c(1,10,20,30,38)))

## Legend for gamma and delta.
key.gd <- list(columns=2,
               title="Parameter", cex.title=1.1,
               type="o", divide=1,
               lines=list(
                   col=ps$superpose.line$col[1:2],
                   pch=ps$superpose.symbol$pch[1:2]),
               text=list(c(expression(gamma), expression(delta))))

## All teams.
p1 <- 
    xyplot(gamma+delta~rod|team, data=dd, type="l",
           as.table=TRUE, layout=c(4,5),
           scales=sc,
           key=c(columns=1, space="right", key.gd[c(-1,-4,-5)]),
           xlab="Championship round",
           ylab="Parameter value")

p2 <- 
    xyplot(adv~rod|team, data=pfm, col=1, cex=0.6, pch=pfm$pch,
           layout=c(4,5), as.table=TRUE,
           scales=list(y=list(at=0:1, labels=c("A","H"))),
           ylab="y = 1: played at home, y = 0: played away",
           xlab="Champonship round",
           key=list(
               ## corner=c(0.1, 0.9),
               space="right",
               title="Outcome", cex.title=1.1, columns=1,
               points=list(pch=c(2,1,6), cex=0.6),
               text=list(c("Win", "Draw", "Lose"))
           ))

doubleYScale(p1, p2, use.style=FALSE)

## Legend for omega and kappa.
key.ok <- list(columns=2,
               title="Parameter", cex.title=1.1,
               type="o", divide=1,
               lines=list(
                   col=ps$superpose.line$col[1:2],
                   pch=ps$superpose.symbol$pch[1:2]),
               text=list(c(expression(omega), expression(kappa))))

## Field parameters.
xyplot(omega+kappa~seq_along(omega), type="b",
       scales=sc, key=key.ok,
       xlab="Championship round",
       ylab="Field parameters")

##----------------------------------------------------------------------

## Select som teams to a depper view, 2 bests and 2 worsts.
sel <- as.character(pl11$team)[c(1,2,19,20)]
de <- droplevels(subset(dd, team%in%sel))

## Selected teams.
xyplot(gamma+delta~rod|team,
       data=de, type="o",
       as.table=TRUE,
       scales=sc,
       key=key.gd,
       xlab="Championship round",
       ylab="Parameter value")

key.tm <- list(columns=2,
               title="Teams", cex.title=1.1,
               type="o", divide=1,
               lines=list(
                   col=ps$superpose.line$col[1:4],
                   pch=ps$superpose.symbol$pch[1:4]),
               text=list(levels(de$team)))

xyplot(gamma+delta~rod, outer=TRUE, groups=team,
       data=de, type="o",
       as.table=TRUE,
       scales=sc,
       key=key.tm,
       strip=strip.custom(factor.levels=key.gd$text[[1]]),
       xlab="Championship round",
       ylab="Parameter value")

Ratings

##----------------------------------------------------------------------
## Rating series.

## Rating along the matches.
dd$rating <- with(dd, gamma+delta)
de$rating <- with(de, gamma+delta)

xyplot(rating~rod|team,
       data=dd, type="l", cex=0.8,
       as.table=TRUE,
       scales=sc,
       xlab="Championship round",
       ylab=expression("Rating"~(gamma+delta)))

xyplot(rating~rod, groups=team,
       data=de, type="o", cex=0.8,
       as.table=TRUE,
       scales=sc,
       key=key.tm,
       strip=strip.custom(factor.levels=key.gd$text[[1]]),
       xlab="Championship round",
       ylab=expression("Rating"~(gamma+delta)))

##----------------------------------------------------------------------

## Average rating.
rtm <- arrange(aggregate(rating~team, data=dd, FUN=mean), -rating)
names(rtm)[2] <- "averating"

## Final rating.
frat <- subset(dd, rod==max(dd$rod))
names(frat)[5] <- "finrating"

## Final, average rating and rank.
fr <- arrange(merge(merge(pl11, frat, by="team"),
                    rtm, by="team"), -finrating)
fr

            team pts Win Draw Lose gScored gConced  pos rod     gamma
1    Figueirense  58  15   13   10      46      45  7.0  38 0.9323928
2    Corinthians  71  21    8    9      53      36  1.0  38 0.9734536
3     Fluminense  63  20    3   15      60      51  3.0  38 1.1534896
4  Internacional  60  16   12   10      57      43  5.0  38 0.9660787
5       Flamengo  61  15   16    7      59      47  4.0  38 1.0148134
6          Vasco  69  19   12    7      57      40  2.0  38 1.0294535
7      Palmeiras  50  11   17   10      43      39 11.0  38 0.8182528
8       Cruzeiro  43  11   10   17      48      51 16.0  38 1.0320625
9    Atlético/GO  48  12   12   14      50      45 12.5  38 0.9888373
10      Coritiba  57  16    9   13      57      41  8.0  38 1.0333744
11     São Paulo  59  16   11   11      57      46  6.0  38 1.0181186
12         Bahia  46  11   13   14      43      49 14.0  38 0.7995462
13        Santos  53  15    8   15      55      55 10.0  38 1.0370015
14      Botafogo  56  16    8   14      52      49  9.0  38 0.9856944
15        Grêmio  48  13    9   16      49      57 12.5  38 0.9835249
16         Ceará  39  10    9   19      47      64 18.0  38 0.9454015
17   Atlético/MG  45  13    6   19      50      60 15.0  38 0.9588055
18   Atlético/PR  41  10   11   17      38      55 17.0  38 0.8175393
19    América/MG  37   8   13   17      51      69 19.0  38 1.0080389
20          Avaí  31   7   10   21      45      75 20.0  38 0.8829730
       delta finrating averating
1  1.1217216  2.054114  2.015287
2  1.0670055  2.040459  2.106682
3  0.8386707  1.992160  1.979608
4  0.9888259  1.954905  2.042698
5  0.9293911  1.944205  2.033765
6  0.8947738  1.924227  1.964290
7  1.0800065  1.898259  1.918910
8  0.8497502  1.881813  2.119476
9  0.8889464  1.877784  1.963156
10 0.8438227  1.877197  1.982142
11 0.8502728  1.868391  2.016514
12 1.0358844  1.835431  1.914750
13 0.7941735  1.831175  1.889614
14 0.7807288  1.766423  2.014340
15 0.7662132  1.749738  1.833191
16 0.7973009  1.742702  1.870610
17 0.7240276  1.682833  1.861797
18 0.8617990  1.679338  1.820623
19 0.6695213  1.677560  1.821124
20 0.7742484  1.657221  1.805103

xyplot(finrating+averating~pts, data=fr,
       auto.key=TRUE, type=c("p","smooth"))

splom(fr[,c("pts","finrating","averating")],
      type=c("p","smooth"))

cor(fr[,c("pts","finrating","averating")],
    method="spearman")

                pts finrating averating
pts       1.0000000 0.8273788 0.7055284
finrating 0.8273788 1.0000000 0.8075188
averating 0.7055284 0.8075188 1.0000000

Estimated probabilities and DeFinetti measure

## Como calcular as probabilidades de ganhar, empatar e perder?

##-----------------------------------------------------------------------------
## Who have more chance to gain, Palmeiras or Corinthians?

prob <- function(gh, ga, theta, hf=FALSE){
    ## 1: attack home
    ## 2: defense home
    ## 3: attack away
    ## 4: defense away
    ## 5: home field advantage
    ## 6: away field disadvantage
    if(hf){
        ph <- dpois(gh, lambda=exp(theta[1]-theta[4]+theta[5]))
        pa <- dpois(ga, lambda=exp(theta[3]-theta[2]-theta[6]))
    } else {
        ph <- dpois(gh, lambda=exp(theta[1]-theta[4]))
        pa <- dpois(ga, lambda=exp(theta[3]-theta[2]))
    }
    ph*pa
}
Prob <- Vectorize(prob, c("gh", "ga"))
rm("prob")

i <- sel[c(1,4)]

sel <- subset(fr, team%in%i,
              select=c("team", "gamma","delta"))
sel <- split(sel, f=as.character(sel$team))
sel <- sel[i]
theta <- unlist(lapply(sel, "[", -1))
theta <- c(theta, omega=tail(omega, 1), kappa=tail(kappa, 1))
theta

Corinthians.gamma Corinthians.delta        Avaí.gamma 
        0.9734536         1.0670055         0.8829730 
       Avaí.delta             omega             kappa 
        0.7742484         0.4625240         0.3321949

g <- 0:10
## pla <- outer(g, g, function(x, y) Prob(gh=x, ga=y, theta))
pla <- outer(g, g, function(x, y) Prob(gh=x, ga=y, theta, hf=TRUE))
sum(pla) ## must be equal to 1.

[1] 0.9999938

res <- data.frame(outcome=c("homeWin","draw","awayWin"),
                  prob=c(sum(pla[lower.tri(pla)]),
                         sum(diag(pla)),
                         sum(pla[upper.tri(pla)])))
res$outcome <- factor(res$outcome, levels=levels(res$outcome)[3:1])

barchart(prob~outcome, data=res, main=paste(i, collapse=" vs "),
         horizontal=FALSE, stack=FALSE, origin=0,
         xlab="Outcome of the match",
         ylab="Predicted probability")

## The outcome with highest probability.
m <- which(pla==max(pla), arr.ind=TRUE); m

     row col
[1,]   2   1

##----------------------------------------------------------------------

## Combines team and round in a new variable.
dg <- dc
dg <- transform(dg,
                hr=paste(home, rod, sep="."),
                ar=paste(away, rod, sep="."))
dd$tr <- with(dd, paste(team, rod, sep="."))

dg <- merge(dg,
            subset(dd, select=c("gamma","delta","tr")),
            by.x="hr", by.y="tr")
names(dg)[ncol(dg)-1:0] <- c("hgamma", "hdelta")

dg <- merge(dg,
            subset(dd, select=c("gamma","delta","tr")),
            by.x="ar", by.y="tr")
names(dg)[ncol(dg)-1:0] <- c("agamma", "adelta")

omg <- data.frame(omega=omega, kappa=kappa, rod=seq_along(omega))
dg <- merge(dg, omg, by="rod")

dg <- arrange(dg, dat)
str(dg)

'data.frame': 380 obs. of  20 variables:
 $ rod   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ ar    : Factor w/ 380 levels "América/MG.10",..: 77 58 286 362 343 20 96 191 115 153 ...
 $ hr    : Factor w/ 380 levels "América/MG.1",..: 229 39 324 134 248 172 1 210 305 267 ...
 $ dat   : POSIXct, format: "2011-05-21 00:00:00" ...
 $ home  : Factor w/ 20 levels "América/MG","Atlético/GO",..: 13 3 18 8 14 10 1 12 17 15 ...
 $ away  : Factor w/ 20 levels "América/MG","Atlético/GO",..: 5 4 16 20 19 2 6 11 7 9 ...
 $ pla   : chr  "4x0" "3x0" "1x1" "1x3" ...
 $ yea   : int  2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
 $ h     : int  4 3 1 1 0 0 2 1 1 1 ...
 $ a     : int  0 0 1 3 2 1 1 0 0 2 ...
 $ d     : int  4 3 0 2 2 1 1 1 1 1 ...
 $ s     : int  4 3 2 4 2 1 3 1 1 3 ...
 $ hpt   : num  3 3 1 0 0 0 3 3 3 0 ...
 $ apt   : num  0 0 1 3 3 3 0 0 0 3 ...
 $ hgamma: num  1.041 1.006 0.992 1.072 1.073 ...
 $ hdelta: num  0.987 1.006 0.97 1.05 1.025 ...
 $ agamma: num  0.995 1.028 0.938 1.052 1.029 ...
 $ adelta: num  0.954 0.917 1.079 0.991 1.012 ...
 $ omega : num  0.588 0.588 0.588 0.588 0.588 ...
 $ kappa : num  0.342 0.342 0.342 0.342 0.342 ...

##----------------------------------------------------------------------
## HomeWin Draw AwayWin matrix.

hda <- with(dg, {
    d <- h-a
    draw <- d==0
    winH <- d>0
    ## winA <- !draw & !winH
    winA <- !(draw | winH)
    x <- cbind(winH, draw, winA)
    return(1*x)
})

##----------------------------------------------------------------------
## Estimated probability of HomeWin Draw AwayWin matrix.

## NOTE: this uses the home field advatage parameter.
ephda.hf <- t(apply(
    dg[, c("hgamma","hdelta","agamma","adelta","omega","kappa")],
    MARGIN=1,
    FUN=function(i){
        g <- 0:10
        pla <- outer(g, g,
                     function(x, y){
                         Prob(gh=x, ga=y, theta=i, hf=TRUE)
                     })
        res <- c(homeWin=sum(pla[lower.tri(pla)]),
                 draw=sum(diag(pla)),
                 awayWin=sum(pla[upper.tri(pla)]))
        return(res)
    }))

## NOTE: this DOESN'T use the home field advatage parameter.
ephda <- t(apply(
    dg[, c("hgamma","hdelta","agamma","adelta","omega","kappa")],
    MARGIN=1,
    FUN=function(i){
        g <- 0:10
        pla <- outer(g, g,
                     function(x, y){
                         Prob(gh=x, ga=y, theta=i, hf=FALSE)
                     })
        res <- c(homeWin=sum(pla[lower.tri(pla)]),
                 draw=sum(diag(pla)),
                 awayWin=sum(pla[upper.tri(pla)]))
        return(res)
    }))
## head(ephda)

## Per match DeFinetti Index.
dfi.hf <- rowSums((ephda.hf-hda)^2)
dfi <- rowSums((ephda-hda)^2)

xyplot(dfi.hf+dfi~rod, data=dg, outer=TRUE,
       xlab="Round", ylab="DeFinetti Index")

## Average DeFinetti Index.
## Lazy model: Win Draw Lose = 1/3 in each, so DeFinetti Index is
c("with hf"=mean(dfi.hf), "no hf"=mean(dfi), "Lazy"=(2/3)^2+2*(1/3)^2)

  with hf     no hf      Lazy 
0.6361374 0.6447154 0.6666667

## table(cut(dfi.hf, breaks=c(0, 2/3, 2)))
## table(cut(dfi, breaks=c(0, 2/3, 2)))

addmargins(prop.table(
    table(cut(dfi.hf, breaks=c(0, 2/3, 2)),
          cut(dfi, breaks=c(0, 2/3, 2)))))

           
            (0,0.667] (0.667,2]       Sum
  (0,0.667] 0.3578947 0.1263158 0.4842105
  (0.667,2] 0.1763158 0.3394737 0.5157895
  Sum       0.5342105 0.4657895 1.0000000

## NOTE: the DeFinetti mean measure is not a good choice to evaluate the
## model prediction capability.

xyplot(dfi.hf+dfi~factor(hpt, levels=c(3,1,0), labels=c("W","D","L")),
       data=dg, layout=c(NA,1),
       outer=TRUE, jitter.x=TRUE,
       ylab="DeFinetti Index",
       xlab="Outcome for the home team of the match",
       strip=strip.custom(
           factor.levels=c(
               "with field parameters", "without field parameters")))+
    layer(panel.abline(h=2/3, lty=2))

##----------------------------------------------------------------------
## Saving the results of MAP-2 approach.

map2 <- list(dd=dd, fr=fr, omega=omega, kappa=kappa, dg=dg, dfi=dfi)

Comparing MAP-1 and MAP-2

##----------------------------------------------------------------------

## fp: number of field parameters.
map12 <- rbind(cbind(map1$dd, fp=1),
               cbind(map2$dd, fp=2))

## Select som teams to a depper view, 2 bests and 2 worsts.
sel <- as.character(pl11$team)[c(1,2,19,20)]
de <- droplevels(subset(map12, team%in%sel))

key.map <- list(columns=2,
               title="Model", cex.title=1.1,
               type="o", divide=1,
               lines=list(
                   col=ps$superpose.line$col[1:2],
                   pch=ps$superpose.symbol$pch[1:2]),
               text=list(c("MAP-1","MAP-2")))

sc <- list(x=list(at=c(1,10,20,30,38)),
           y=list(relation="free"))

useOuterStrips(combineLimits(
    xyplot(gamma+delta~rod|team, outer=TRUE, groups=fp,
           data=de, type="o",
           as.table=TRUE,
           scales=sc,
           key=key.map,
           xlab="Championship round",
           ylab="Parameter value")),
    strip.left=strip.custom(factor.levels=key.gd$text[[1]])
    )

##----------------------------------------------------------------------

## Home field parameters.
hfpar <- data.frame(rod=1:length(map1$omega),
                    omega1=map1$omega,
                    omega2=map2$omega,
                    kappa2=map2$kappa)

xyplot(omega1+omega2+kappa2~rod, data=hfpar,
       type="b", lty=c(1,2,2), pch=c(1,1,3), col=1,
       key=list(
           type="o", divide=1,
           text=list(c(expression(omega[1]),
                       expression(omega[2]),
                       expression(kappa[2]))),
           lines=list(lty=c(1,2,2), pch=c(1,1,3), col=1)
       ),
       xlab="Championship round",
       ylab="Parameter value")

Probability of win as a function of the teams parameters

prob <- function(gh, ga, difftheta){
    ## 1: attack home - defense away
    ## 2: attack away - defense home
    ## ph <- dpois(gh, lambda=exp(theta[1]-theta[4]))
    ph <- dpois(gh, lambda=exp(difftheta[1]))
    ## pa <- dpois(ga, lambda=exp(theta[3]-theta[2]))
    pa <- dpois(ga, lambda=exp(difftheta[2]))
    ph*pa
}
Prob <- Vectorize(prob, c("gh", "ga"))
ProbWin <- function(difftheta){
    g <- 0:10
    pla <- outer(g, g, FUN=function(x, y){
        Prob(gh=x, ga=y, difftheta)
    })
    pWin <- sum(pla[lower.tri(pla)])
    pLos <- sum(pla[upper.tri(pla)])
    pDra <- sum(diag(pla))
    return(c(W=pWin, D=pDra, L=pLos))
}

pred <- expand.grid(gammaH.deltaA=seq(-1, 1, length.out=50),
                    gammaA.deltaH=seq(-1, 1, length.out=50))

pred$probs <- t(apply(pred, 1, ProbWin))
## str(pred)

str(dg)

'data.frame': 380 obs. of  20 variables:
 $ rod   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ ar    : Factor w/ 380 levels "América/MG.10",..: 77 58 286 362 343 20 96 191 115 153 ...
 $ hr    : Factor w/ 380 levels "América/MG.1",..: 229 39 324 134 248 172 1 210 305 267 ...
 $ dat   : POSIXct, format: "2011-05-21 00:00:00" ...
 $ home  : Factor w/ 20 levels "América/MG","Atlético/GO",..: 13 3 18 8 14 10 1 12 17 15 ...
 $ away  : Factor w/ 20 levels "América/MG","Atlético/GO",..: 5 4 16 20 19 2 6 11 7 9 ...
 $ pla   : chr  "4x0" "3x0" "1x1" "1x3" ...
 $ yea   : int  2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
 $ h     : int  4 3 1 1 0 0 2 1 1 1 ...
 $ a     : int  0 0 1 3 2 1 1 0 0 2 ...
 $ d     : int  4 3 0 2 2 1 1 1 1 1 ...
 $ s     : int  4 3 2 4 2 1 3 1 1 3 ...
 $ hpt   : num  3 3 1 0 0 0 3 3 3 0 ...
 $ apt   : num  0 0 1 3 3 3 0 0 0 3 ...
 $ hgamma: num  1.041 1.006 0.992 1.072 1.073 ...
 $ hdelta: num  0.987 1.006 0.97 1.05 1.025 ...
 $ agamma: num  0.995 1.028 0.938 1.052 1.029 ...
 $ adelta: num  0.954 0.917 1.079 0.991 1.012 ...
 $ omega : num  0.588 0.588 0.588 0.588 0.588 ...
 $ kappa : num  0.342 0.342 0.342 0.342 0.342 ...

games <- with(
    dg,
    ## dg[sample(1:nrow(dg), size=10),],
              list(teams=paste(home, pla, away),
                   difftheta.x=c(hgamma-adelta),
                   difftheta.y=c(agamma-hdelta)))

## plot(x=games[[2]], y=games[[3]], asp=1)
## a <- identify(x=games[[2]], y=games[[3]])
## dput(a)

a <- c(50L, 73L, 103L, 130L, 159L, 212L, 227L, 231L, 255L, 290L, 293L, 
       302L, 314L, 340L, 341L, 343L, 358L, 372L, 380L)
## plot(x=games[[2]], y=games[[3]])
## text(x=games[[2]][a], y=games[[3]][a],
##      labels=games[[1]][a], pos=ifelse(games[[2]][a]>0, 4, 2))

levelplot(probs[,1]~gammaH.deltaA+gammaA.deltaH, data=pred, aspect=1,
          xlab=expression(gamma[H]-delta[A]),
          ylab=expression(gamma[A]-delta[H]),
          main=expression(Pr(Home~Win)),
          contour=TRUE)+
    layer(panel.abline(v=0, h=0, lty=2))+
    layer(panel.points(x=games[[2]], y=games[[3]], cex=0.6))+
    layer(panel.text(x=games[[2]][a],
                     y=games[[3]][a],
                     labels=games[[1]][a],
                     pos=ifelse(games[[2]][a]>0, 4, 2),
                     cex=0.8))

## levelplot(probs[,2]~gammaH.deltaA+gammaA.deltaH, data=pred, aspect=1)+
##     layer(panel.abline(a=0, b=1, lty=2))
## levelplot(probs[,3]~gammaH.deltaA+gammaA.deltaH, data=pred)