commit 3f3ab2bfe3ebfa266d433012be1c89c722d63352
parent 32b078f28add3d22529e55aeac6674d924e9b510
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date: Thu, 2 Jul 2015 11:15:37 -0400
Unify parameters for joint_simple_tgtcls_111_cswdtx_bigger{,_dropout}
Diffstat:
5 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger.py
@@ -29,14 +29,14 @@ dim_embeddings = [
# Common network part
dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings)
-dim_hidden = [1000]
+dim_hidden = [5000]
# Destination prediction part
-dim_hidden_dest = [400]
+dim_hidden_dest = [1000]
dim_output_dest = dest_tgtcls.shape[0]
# Time prediction part
-dim_hidden_time = [400]
+dim_hidden_time = [500]
dim_output_time = len(time_tgtcls)
# Cost ratio between distance cost and time cost
@@ -46,8 +46,7 @@ embed_weights_init = IsotropicGaussian(0.01)
mlp_weights_init = IsotropicGaussian(0.1)
mlp_biases_init = Constant(0.01)
-learning_rate = 0.000001
-momentum = 0.99
+# use adadelta, so no learning_rate or momentum
batch_size = 200
valid_set = 'cuts/test_times_0'
diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py
@@ -46,11 +46,10 @@ embed_weights_init = IsotropicGaussian(0.01)
mlp_weights_init = IsotropicGaussian(0.1)
mlp_biases_init = Constant(0.01)
-# apply_dropout = True
-# dropout_p = 0.5
+apply_dropout = True
+dropout_p = 0.5
-learning_rate = 0.001
-momentum = 0.9
+# use adadelta, so no learning_rate or momentum
batch_size = 200
valid_set = 'cuts/test_times_0'
diff --git a/config/memory_network_1.py b/config/memory_network_1.py
@@ -31,6 +31,7 @@ candidate_encoder.dim_hidden = [100, 100, 100]
candidate_encoder.weights_init = IsotropicGaussian(0.01)
candidate_encoder.biases_init = Constant(0.001)
+normalize_representation = True
embed_weights_init = IsotropicGaussian(0.001)
diff --git a/model/memory_network.py b/model/memory_network.py
@@ -21,11 +21,11 @@ class Model(Initializable):
self.context_embedder = ContextEmbedder(config)
- self.prefix_encoder = MLP(activations=[Rectifier() for _ in config.prefix_encoder.dim_hidden],
- dims=[config.prefix_encoder.dim_input] + config.prefix_encoder.dim_hidden,
+ self.prefix_encoder = MLP(activations=[Rectifier() for _ in config.prefix_encoder.dim_hidden] + [config.representation_activation()],
+ dims=[config.prefix_encoder.dim_input] + config.prefix_encoder.dim_hidden + [config.representation_size],
name='prefix_encoder')
- self.candidate_encoder = MLP(activations=[Rectifier() for _ in config.candidate_encoder.dim_hidden],
- dims=[config.candidate_encoder.dim_input] + config.candidate_encoder.dim_hidden,
+ self.candidate_encoder = MLP(activations=[Rectifier() for _ in config.candidate_encoder.dim_hidden] + [config.representation_activation()],
+ dims=[config.candidate_encoder.dim_input] + config.candidate_encoder.dim_hidden + [config.representation_size],
name='candidate_encoder')
self.softmax = Softmax()
@@ -46,11 +46,15 @@ class Model(Initializable):
prefix_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.prefix_extremities.items())
prefix_inputs = tensor.concatenate(prefix_extremities + prefix_embeddings, axis=1)
prefix_representation = self.prefix_encoder.apply(prefix_inputs)
+ if self.config.normalize_representation:
+ prefix_representation = prefix_representation / tensor.sqrt((prefix_representation ** 2).sum(axis=1, keepdims=True))
candidate_embeddings = tuple(self.context_embedder.apply(**{k: kwargs['candidate_%s'%k] for k in self.context_embedder.inputs }))
candidate_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.candidate_extremities.items())
candidate_inputs = tensor.concatenate(candidate_extremities + candidate_embeddings, axis=1)
candidate_representation = self.candidate_encoder.apply(candidate_inputs)
+ if self.config.normalize_representation:
+ candidate_representation = candidate_representation / tensor.sqrt((candidate_representation ** 2).sum(axis=1, keepdims=True))
similarity_score = tensor.dot(prefix_representation, candidate_representation.T)
similarity = self.softmax.apply(similarity_score)
diff --git a/train.py b/train.py
@@ -70,6 +70,7 @@ class SaveLoadParams(SimpleExtension):
with open(self.path, 'w') as f:
logger.info('Saving parameters to %s...'%self.path)
cPickle.dump(self.model.get_param_values(), f, protocol=cPickle.HIGHEST_PROTOCOL)
+ logger.info('Done saving.')
def do_load(self):
try:
@@ -153,8 +154,10 @@ if __name__ == "__main__":
Printing(every_n_batches=1000),
SaveLoadParams(dump_path, cg,
- before_training=config.load_model, # before training -> load params
+ before_training=True, # before training -> load params
every_n_batches=1000, # every N batches -> save params
+ after_epoch=True, # after epoch -> save params
+ after_training=True, # after training -> save params
),
]