commit 902a8dcb40b3da9492093edd5bda356240f29eb0
parent cdd050295c3c6df780bdc65088959d908b2cf2a5
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date: Mon, 27 Apr 2015 19:17:29 -0400
Add normalization
Diffstat:
2 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/data.py b/data.py
@@ -1,6 +1,7 @@
import ast, csv
import socket
import fuel
+import numpy
from enum import Enum
from fuel.datasets import Dataset
from fuel.streams import DataStream
@@ -13,6 +14,9 @@ else:
client_ids = {int(x): y+1 for y, x in enumerate(open(DATA_PATH+"/client_ids.txt"))}
+porto_center = numpy.array([[ -8.61612, 41.1573]], dtype='float32')
+data_std = numpy.sqrt(numpy.array([[ 0.00333233, 0.00549598]], dtype='float32'))
+
class CallType(Enum):
CENTRAL = 0
STAND = 1
diff --git a/model.py b/model.py
@@ -21,7 +21,7 @@ from fuel.transformers import Batch
from fuel.streams import DataStream
from fuel.schemes import ConstantScheme
-from blocks.algorithms import GradientDescent, Scale, AdaDelta
+from blocks.algorithms import GradientDescent, Scale, AdaDelta, Momentum
from blocks.graph import ComputationGraph
from blocks.main_loop import MainLoop
from blocks.extensions import Printing
@@ -45,13 +45,22 @@ n_end_pts = 5
dim_embed = 50
dim_hidden = 200
-learning_rate = 0.01
+learning_rate = 0.002
+momentum = 0.9
batch_size = 32
def main():
# The input and the targets
x_firstk = tensor.matrix('first_k')
+ n = x_firstk.shape[0]
+ x_firstk = (x_firstk.reshape((n, n_begin_end_pts, 2)) - data.porto_center[None, None, :]) / data.data_std[None, None, :]
+ x_firstk = x_firstk.reshape((n, 2 * n_begin_end_pts))
+
x_lastk = tensor.matrix('last_k')
+ n = x_lastk.shape[0]
+ x_lastk = (x_lastk.reshape((n, n_begin_end_pts, 2)) - data.porto_center[None, None, :]) / data.data_std[None, None, :]
+ x_lastk = x_lastk.reshape((n, 2 * n_begin_end_pts))
+
x_client = tensor.lvector('origin_call')
x_stand = tensor.lvector('origin_stand')
y = tensor.matrix('destination')
@@ -75,9 +84,10 @@ def main():
# hidden = theano.printing.Print("hidden")(hidden)
outputs = output_layer.apply(hidden)
+ # Normalize & Center
+ outputs = data.data_std * outputs + data.porto_center
+
# Calculate the cost
- # cost = (outputs - y).norm(2, axis=1).mean()
- # outputs = numpy.array([[ -8.621953, 41.162142]], dtype='float32') + 0 * outputs
cost = (outputs - y).norm(2, axis=1).mean()
cost.name = 'cost'
hcost = hdist.hdist(outputs, y).mean()
@@ -88,7 +98,7 @@ def main():
stand_embed_table.weights_init = IsotropicGaussian(0.001)
hidden_layer.weights_init = IsotropicGaussian(0.01)
hidden_layer.biases_init = Constant(0.001)
- output_layer.weights_init = IsotropicGaussian(0.001)
+ output_layer.weights_init = IsotropicGaussian(0.01)
output_layer.biases_init = Constant(0.001)
client_embed_table.initialize()
@@ -119,8 +129,8 @@ def main():
params = VariableFilter(bricks=[Linear])(cg.parameters)
algorithm = GradientDescent(
cost=cost,
- step_rule=AdaDelta(decay_rate=0.5),
- # step_rule=Scale(learning_rate=learning_rate),
+ # step_rule=AdaDelta(decay_rate=0.5),
+ step_rule=Momentum(learning_rate=learning_rate, momentum=momentum),
params=params)
extensions=[DataStreamMonitoring([cost, hcost], valid_stream,