Note: The code is just for demonstration purpose. Some python function are just for producing plots for slides (ipython reveal). So don't blame me for code quality.
from IPython.display import Image
Image(filename='./pics/Deeplearning 2.png')
For each layer:
Input: $\vec x^T = (x_1, x_2, \dots x_n)$
Output of the j-th neuron: $$ h_j = \sigma(\sum_{i=1}^n w_{ij} x_i + b_j) $$
In matrix form ($W$ is a matrix):
$$ \vec h = \sigma(\vec x \cdot W + \vec b) $$plot_train_data(X_train[t_train==0], X_train[t_train==1])
In the new feature space the data is linear-separable:
phi_train = X_train**2
plot_train_transformed(phi_train[t_train==0], phi_train[t_train==1])
def logistic_function(x):
return 1./(1. + T.exp(-x))
def relu(x):
return T.switch(x<0, 0, x)
Aktivity vector of the first hidden layer:
$$ \vec h^{(1)} = \sigma_1 \left(\vec x \cdot W^{(1)} + \vec b^{(1)} \right) $$Activity of the output $\vec o$ (with only one output $o$ is a scalar):
$$ \vec o = \vec h^{(2)}= \sigma_2 \left( \vec h^{(1)} \cdot W^{(2)} + \vec b^{(2)} \right) $$# (first) hidden layer
a =, W_h) + b_h
# activity function "rectified linear units"
h = relu(a)
# output neuron:
y = logistic_function(, W_o) + b_o)
fn_predict = theano.function(inputs = [X], outputs = y)
#TODO sum is only used for casting a vector with one element to a scalar!
cross_entropy = T.sum(-(, T.log(y)) +, T.log(1.-y))))
l2_reg = T.mean(T.sqr(W_h)) + T.mean(T.sqr(W_o))
lambda_ = 0.02
cost = cross_entropy + lambda_ * l2_reg
cost_func = theano.function(inputs=[X, target], outputs=[cost])
cost_func(X_train, t_train)
def get_train_functions(cost, v, target, learning_rate=0.01):
gparams = []
for param in params:
gparam = T.grad(cost, param)
for param, gparam in zip(params, gparams):
updates.append((param, param - gparam * learning_rate))
learn_fn = theano.function(inputs = [v, target],
outputs = cost,
updates = updates)
return learn_fn
learn_fn = get_train_functions(cost, X, target)
train_errors = np.ndarray(nb_epochs)
for x in range(nb_epochs):
train_errors[x] = learn_fn(X_train, t_train)
array([ 44.90793435, 44.81909323, 44.74636265, ..., 2.94757535, 2.9474241 , 2.94727294])
plt.plot(range(nb_epochs), train_errors, '-b')
plot_contour(X_train[t_train==0], X_train[t_train==1], 'train data')
plot_contour(X_test[t_test==0], X_test[t_test==1], 'test data')
Image(filename=pics_path+'Deeplearning 7.png')