commit cdd050295c3c6df780bdc65088959d908b2cf2a5
parent 9a60f6c4e39c09187710608a9e225b6024b34364
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date: Mon, 27 Apr 2015 18:27:58 -0400
A reasonnable neural network, only it doesn't work.
Diffstat:
M | model.py | | | 28 | +++++++++++++++++++--------- |
1 file changed, 19 insertions(+), 9 deletions(-)
diff --git a/model.py b/model.py
@@ -5,10 +5,13 @@ from argparse import ArgumentParser
import numpy
import theano
+from theano import printing
from theano import tensor
from theano.ifelse import ifelse
-from blocks.bricks import MLP, Rectifier, Linear
+from blocks.filter import VariableFilter
+
+from blocks.bricks import MLP, Rectifier, Linear, Sigmoid, Identity
from blocks.bricks.lookup import LookupTable
from blocks.initialization import IsotropicGaussian, Constant
@@ -18,7 +21,7 @@ from fuel.transformers import Batch
from fuel.streams import DataStream
from fuel.schemes import ConstantScheme
-from blocks.algorithms import GradientDescent, Scale
+from blocks.algorithms import GradientDescent, Scale, AdaDelta
from blocks.graph import ComputationGraph
from blocks.main_loop import MainLoop
from blocks.extensions import Printing
@@ -43,7 +46,7 @@ dim_embed = 50
dim_hidden = 200
learning_rate = 0.01
-batch_size = 64
+batch_size = 32
def main():
# The input and the targets
@@ -65,25 +68,31 @@ def main():
client_embed = client_embed_table.apply(x_client).flatten(ndim=2)
stand_embed = stand_embed_table.apply(x_stand).flatten(ndim=2)
inputs = tensor.concatenate([x_firstk, x_lastk,
- client_embed, stand_embed],
+ client_embed.zeros_like(), stand_embed.zeros_like()],
axis=1)
+ # inputs = theano.printing.Print("inputs")(inputs)
hidden = hidden_layer.apply(inputs)
+ # hidden = theano.printing.Print("hidden")(hidden)
outputs = output_layer.apply(hidden)
# Calculate the cost
# cost = (outputs - y).norm(2, axis=1).mean()
# outputs = numpy.array([[ -8.621953, 41.162142]], dtype='float32') + 0 * outputs
- cost = hdist.hdist(outputs, y).mean()
+ cost = (outputs - y).norm(2, axis=1).mean()
cost.name = 'cost'
+ hcost = hdist.hdist(outputs, y).mean()
+ hcost.name = 'hcost'
# Initialization
client_embed_table.weights_init = IsotropicGaussian(0.001)
+ stand_embed_table.weights_init = IsotropicGaussian(0.001)
hidden_layer.weights_init = IsotropicGaussian(0.01)
hidden_layer.biases_init = Constant(0.001)
output_layer.weights_init = IsotropicGaussian(0.001)
output_layer.biases_init = Constant(0.001)
client_embed_table.initialize()
+ stand_embed_table.initialize()
hidden_layer.initialize()
output_layer.initialize()
@@ -107,13 +116,14 @@ def main():
# Training
cg = ComputationGraph(cost)
+ params = VariableFilter(bricks=[Linear])(cg.parameters)
algorithm = GradientDescent(
cost=cost,
- # step_rule=AdaDelta(decay_rate=0.5),
- step_rule=Scale(learning_rate=learning_rate),
- params=cg.parameters)
+ step_rule=AdaDelta(decay_rate=0.5),
+ # step_rule=Scale(learning_rate=learning_rate),
+ params=params)
- extensions=[DataStreamMonitoring([cost], valid_stream,
+ extensions=[DataStreamMonitoring([cost, hcost], valid_stream,
prefix='valid',
every_n_batches=1000),
Printing(every_n_batches=1000),