Bayesian Model Comparision¶

In [3]:

import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [4]:

import pandas as pd

In [5]:

#data from  
#http://www.zeit.de/wissen/umwelt/2016-06/unwetter-bayern-extremwetter-klimawandel-meteorologie
schaeden_all = pd.read_csv("../data/schaeden.csv")

In [6]:

schaeden_all.T

Out[6]:

	0	1	2	3	4	5	6	7	8	9	...	26	27	28	29	30	31	32	33	34	35
Jahr	1980	1981	1982	1983	1984	1985	1986	1987	1988	1989	...	2006	2007	2008	2009	2010	2011	2012	2013	2014	2015
Sturm	2	6	1	4	6	8	6	4	5	5	...	14	11	5	6	5	13	5	5	4	6
Hagel	null	1	null	null	2	1	1	2	2	5	...	2	null	1	null	null	null	1	1	null	1
Tornado	3	null	null	1	1	null	1	3	1	1	...	3	null	null	null	2	null	null	null	null	1
Sturzflut	3	1	null	1	1	null	1	1	null	2	...	3	null	null	1	1	null	1	null	1	3
Blitzschlag	null	null	null	null	null	null	null	null	null	null	...	null	null	null	null	null	null	null	null	null	null

6 rows × 36 columns

In [7]:

jahr = schaeden_all.Jahr.get_values()
jahr

Out[7]:

array([1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990,
       1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
       2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2015])

In [8]:

schaeden = schaeden_all.Sturm.get_values()
schaeden

Out[8]:

array([ 2,  6,  1,  4,  6,  8,  6,  4,  5,  5,  4,  2,  3,  3,  6,  5,  4,
        5,  9,  6,  3,  6,  9,  8, 10,  6, 14, 11,  5,  6,  5, 13,  5,  5,
        4,  6])

In [9]:

%matplotlib inline
from IPython.core.pylabtools import figsize
from matplotlib import pyplot as plt

In [10]:

figsize(12.5, 3.5)
plt.bar(jahr, schaeden, color="#348ABD")
plt.xlabel("Jahr")
plt.ylabel(u"Anzahl der Sturmschäden")
plt.title(u"Sturmschäden")
plt.xlim(min(jahr), max(jahr));

In [11]:

#observation = pm.Poisson("obs", , value=sturmschaeden, observed=True)

In [12]:

import pymc3

Couldn't import dot_parser, loading of dot files will not be possible.

In [13]:

def get_samples(mu, num_samples):
    with pymc3.Model() as model:
        sam = pymc3.Poisson('sam', mu=mu, shape=mu.shape)
        tr = pymc3.sample(num_samples)
    return tr.get_values('sam')

In [14]:

get_samples(np.array([1.,2.,3.]), 10)

Assigned Metropolis to sam
 [-----------------100%-----------------] 10 of 10 complete in 0.0 sec

Out[14]:

array([[1, 1, 2],
       [1, 1, 2],
       [1, 1, 2],
       [2, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 2, 1],
       [1, 2, 1]])

In [15]:

basic_model = pymc3.Model()

assert jahr.shape == schaeden.shape

with basic_model:
    # Priors for unknown model parameters
    alpha = pymc3.HalfNormal('alpha', sd=10)
    beta = pymc3.Normal('beta', sd=10., shape=1)

    # Expected value of outcome
    mu = alpha + beta*(jahr-jahr.min()) # + beta[1]*schaeden

    # Likelihood (sampling distribution) of observations
    obs = pymc3.Poisson('observation', mu=mu, observed=schaeden, shape=jahr.shape)

Applied log-transform to alpha and added transformed alpha_log to model.

In [16]:

with basic_model:
    tr = pymc3.sample(10000)

Assigned NUTS to alpha_log
Assigned NUTS to beta
 [-----------------100%-----------------] 10000 of 10000 complete in 6.1 sec

In [17]:

b = tr.get_values(beta, burn=4000) 
b

Out[17]:

array([[ 0.03952721],
       [ 0.0344634 ],
       [ 0.14053844],
       ..., 
       [ 0.14127214],
       [ 0.13936848],
       [ 0.13936848]])

In [18]:

a = tr.get_values(alpha, burn=4000) 
a

Out[18]:

array([ 5.7717956 ,  5.66962675,  3.03925197, ...,  4.05556502,
        4.12297346,  4.12297346])

In [19]:

ax= plt.subplot(121)

_ = ax.hist(a, bins=int(np.sqrt(len(a))), normed=True )
#ax.set_xlim(0.,1.)
plt.title("Sample-Histogram of Intersect")

ax = plt.subplot(122)
_ = ax.hist(b, bins=int(np.sqrt(len(b))), normed=True )
plt.title("Sample-Histogram of Slope")

Out[19]:

<matplotlib.text.Text at 0x10c8e33d0>

In [20]:

print a.mean()
print a.std()
print b.mean()
print b.std()

3.87131172301
0.744425992029
0.115495870567
0.0397256001856

In [21]:

constant_model = pymc3.Model()


with constant_model:

    mu = pymc3.Gamma("mu", mu=8., sd=5.)

    # Likelihood (sampling distribution) of observations
    obs = pymc3.Poisson('observation', mu=mu, observed=schaeden, shape=jahr.shape)

Applied log-transform to mu and added transformed mu_log to model.

In [22]:

with constant_model:
    tr = pymc3.sample(20000)

Assigned NUTS to mu_log
 [-----------------100%-----------------] 20000 of 20000 complete in 4.6 sec

In [23]:

a_ = tr.get_values(mu, burn=4000) 
a_.mean()

Out[23]:

5.8442857368286862

In [24]:

m = tr.get_values(mu, burn=4000) 
_ = plt.hist(m, bins=int(np.sqrt(len(m))), normed=True )

Two Models:

Hypothesis 1: no increase with time
Hypothesis 0: increase with time

In [25]:

figsize(12.5, 3.5)
plt.bar(jahr, schaeden, color="#348ABD")
plt.xlabel("Jahr")
plt.ylabel(u"Anzahl der Sturmschäden")
plt.title(u"Sturmschäden")
plt.plot(jahr, a.mean()+b.mean()*(jahr-jahr.min()),'r-')
plt.plot(jahr, [a_.mean()]*jahr.shape[0],'g-')
plt.xlim(min(jahr), max(jahr));

Model Comparision¶

The Probability $P$ of a model $M$ to explain some data $\mathcal D$ is given by the Bayes Rule:

$$ P(M \mid \mathcal D) = \frac{P(\mathcal D \mid M) P(M)}{P(\mathcal D)} $$

To compare two models $M_1$ and $M_2$ the odds between the models are

$$ \frac{P(M_1 \mid \mathcal D)}{P(M_2 \mid \mathcal D)} = \frac{P(\mathcal D \mid M_1)}{P(\mathcal D \mid M_2)} \frac { P(M_1)}{P(M_2)} $$

The first factor on the right side of the equations is the "Bayes Factor" $K$ (analog to the "Likelihood Ratio" in the frequentist approach) and the second factor is the "Prior Ratio".

If we assume a priori that the two models are equal probable (before looking at the data), only the Bayes Factor matters for model comparision.

$$ \text{Bayes-Factor} = \frac{P(\mathcal D \mid M_1)}{P(\mathcal D \mid M_2)} = \frac{\int \mathcal P(\mathcal D \mid M_1, \theta_1) P(\theta_1 \mid M_1) d\theta_1} {\int \mathcal P(\mathcal D \mid M_2, \theta_2) P(\theta_2 \mid M_2) d\theta_2} $$

Model Comparision with pymc3¶

Using Monte Carlo naively for model comparision is quite dangerous, see [Neil08].

Here the numer of hidden variables is quite low and we specify the prior of the two models in the comparision to approimatly the posterior we got above.

In [26]:

pm = pymc3

with pm.Model() as model:
    p = [0.5, 0.5] # prior distrubution for the models
    
    # Index to true model
    model_index = pm.Bernoulli('model_index', p[1])

    alpha0 = pymc3.Normal('alpha0', mu=3.85,sd=.8) 
    beta0 = pymc3.Normal('beta0', mu=0.1167,sd=.04) 
    mu_model0 = alpha0 + beta0*(jahr-jahr.min()) 
    
    mu_model1 = pymc3.Gamma("mu_model1", mu=6., sd=1.5) 
    mu = pm.switch(model_index, mu_model1, mu_model0)
    
    # pass M to a prior or likelihood, for example
    y = pm.Poisson('y', mu=mu, observed=schaeden, shape=jahr.shape)

    #step0 = pm.ElemwiseCategorical(vars=[model_index], values=[0,1])
    #step1 = pm.NUTS()
    trace = pm.sample(50000)#, step=[step0, step1])#, start=start)

Applied log-transform to mu_model1 and added transformed mu_model1_log to model.
Assigned BinaryGibbsMetropolis to model_index
Assigned NUTS to alpha0
Assigned NUTS to beta0
Assigned NUTS to mu_model1_log
 [-----------------100%-----------------] 50000 of 50000 complete in 56.9 sec

In [27]:

trace['model_index'][10000:]

Out[27]:

array([0, 0, 0, ..., 0, 0, 0])

The Bayes Factor is very sensitive to the prior!

In [28]:

#mu_model1 = pymc3.Gamma("mu_model1", mu=6., sd=1.5) 
# BF = 102.

#mu_model1 = pymc3.Gamma("mu_model1", mu=6., sd=10.5) 
# BF = 868.

In [29]:

pm.traceplot(trace);
pM1 = trace['model_index'][10000:].mean()
pM0 = 1 - pM1
print pM0, pM1, (pM0/pM1) # *(p[1]/p[0])

0.989225 0.010775 91.807424594

In [30]:

pm = pymc3

with pm.Model() as model:

    pi = (0.5, 0.5)
    # Index to true model
    model_index = pm.Bernoulli('model_index', pi[1])

    alpha0 = pymc3.Normal('alpha0', mu=3.85, sd=np.sqrt(0.6))
    beta0 = pymc3.Normal('beta0', mu=0.1167, sd=np.sqrt(0.002))
    mu_model0 = alpha0 + beta0*(jahr-jahr.min()) 
    
    
    mu_model1 = pymc3.Gamma("mu_model1", mu=6., sd=1.5) 
    
        
    Ylike = pm.DensityDist('Ylike',
            lambda value: pm.switch(model_index, 
                pm.Poisson.dist(mu_model1).logp(value),
                pm.Poisson.dist(mu_model0).logp(value)
            ),
            observed=schaeden)

    trace_ = pm.sample(20000)#, step=pm.Metropolis())

    #pm.summary(trace[5000:])

Applied log-transform to mu_model1 and added transformed mu_model1_log to model.
Assigned BinaryGibbsMetropolis to model_index
Assigned NUTS to alpha0
Assigned NUTS to beta0
Assigned NUTS to mu_model1_log
 [-----------------100%-----------------] 20000 of 20000 complete in 24.8 sec

In [31]:

p_pois = trace_[10000:]['model_index'].mean()  # mean value (i.e. the rate of poisson samples to all samples)
BF = ((1-p_pois)/p_pois) * (pi[1]/pi[0])  # BayesFactor in favor of model 0, taking prior probability into account

In [32]:

print p_pois
print BF

0.014
70.4285714286

Literature:¶

[Neil08] R. Neil: The Harmonic Mean of the Likelihood: Worst Monte Carlo Method Ever
Model Selection with pymc3: https://github.com/pymc-devs/pymc3/issues/812