Note: The code is just for demonstration purpose. Some python function are just for producing plots for slides (ipython reveal). So don't blame me for code quality.
from IPython.display import Image
Image(filename='./pics/Deeplearning 2.png')
For each layer:
Input: $\vec x^T = (x_1, x_2, \dots x_n)$
Output of the j-th neuron: $$ h_j = \sigma(\sum_{i=1}^n w_{ij} x_i + b_j) $$
In matrix form ($W$ is a matrix):
$$ \vec h = \sigma(\vec x \cdot W + \vec b) $$plot_train_data(X_train[t_train==0], X_train[t_train==1])
In the new feature space the data is linear-separable:
phi_train = X_train**2
plot_train_transformed(phi_train[t_train==0], phi_train[t_train==1])
def logistic_function(x):
return 1./(1. + T.exp(-x))
def relu(x):
return T.switch(x<0, 0, x)
Aktivity vector of the first hidden layer:
$$ \vec h^{(1)} = \sigma_1 \left(\vec x \cdot W^{(1)} + \vec b^{(1)} \right) $$Activity of the output $\vec o$ (with only one output $o$ is a scalar):
$$ \vec o = \vec h^{(2)}= \sigma_2 \left( \vec h^{(1)} \cdot W^{(2)} + \vec b^{(2)} \right) $$# (first) hidden layer
a = T.dot(X, W_h) + b_h
# activity function "rectified linear units"
h = relu(a)
# output neuron:
y = logistic_function(T.dot(h, W_o) + b_o)
fn_predict = theano.function(inputs = [X], outputs = y)
#fn_predict(X_train)
#TODO sum is only used for casting a vector with one element to a scalar!
cross_entropy = T.sum(-(T.dot(target, T.log(y)) + T.dot((1.-target), T.log(1.-y))))
l2_reg = T.mean(T.sqr(W_h)) + T.mean(T.sqr(W_o))
lambda_ = 0.02
cost = cross_entropy + lambda_ * l2_reg
#theano.printing.pprint(cost)
#theano.printing.debugprint(cost)
cost_func = theano.function(inputs=[X, target], outputs=[cost])
#theano.printing.debugprint(cost_func)
cost_func(X_train, t_train)
[array(44.90793435302534)]
def get_train_functions(cost, v, target, learning_rate=0.01):
gparams = []
for param in params:
gparam = T.grad(cost, param)
gparams.append(gparam)
updates=[]
for param, gparam in zip(params, gparams):
updates.append((param, param - gparam * learning_rate))
learn_fn = theano.function(inputs = [v, target],
outputs = cost,
updates = updates)
return learn_fn
learn_fn = get_train_functions(cost, X, target)
nb_epochs=5000
train_errors = np.ndarray(nb_epochs)
for x in range(nb_epochs):
train_errors[x] = learn_fn(X_train, t_train)
train_errors
array([ 44.90793435, 44.81909323, 44.74636265, ..., 2.94757535, 2.9474241 , 2.94727294])
plt.plot(range(nb_epochs), train_errors, '-b')
plt.xlabel('Iterations')
plt.ylabel('Cost')
<matplotlib.text.Text at 0x10bcf7c90>
plot_contour(X_train[t_train==0], X_train[t_train==1], 'train data')
plot_contour(X_test[t_test==0], X_test[t_test==1], 'test data')
pics_path="../../pythonFuerDieDatenanalyse/tensorflow/notebook/pics/"
from IPython.display import Image
Image(filename=pics_path+'Deeplearning 7.png')