taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 6cf80aff3e4dc57d13a33c2946bc0ae57cfae6b8
parent 5096e0cdae167122d07b09cd207a04f28ea5c3f5
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date:   Thu,  2 Jul 2015 14:32:23 -0400

More determinism

Diffstat:
Mdata/transformers.py | 1+
Mmodel/mlp.py | 5+++++
Mtrain.py | 18+++++++++++-------
3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/data/transformers.py b/data/transformers.py @@ -10,6 +10,7 @@ from fuel.transformers import Batch, Mapping, SortMapping, Transformer, Unpack import data +fuel.config.default_seed = 123 def at_least_k(k, v, pad_at_begin, is_longitude): if len(v) == 0: diff --git a/model/mlp.py b/model/mlp.py @@ -1,5 +1,8 @@ from theano import tensor +import fuel +import blocks + from fuel.transformers import Batch, MultiProcessing from fuel.streams import DataStream from fuel.schemes import ConstantScheme, ShuffledExampleScheme @@ -11,6 +14,8 @@ from data.hdf5 import TaxiDataset, TaxiStream from data.cut import TaxiTimeCutScheme from model import ContextEmbedder +blocks.config.default_seed = 123 +fuel.config.default_seed = 123 class FFMLP(Initializable): def __init__(self, config, output_layer=None, **kwargs): diff --git a/train.py b/train.py @@ -87,6 +87,9 @@ if __name__ == "__main__": else: step_rule = AdaDelta() + logger.info("Fuel seed: %d" % fuel.config.default_seed) + logger.info("Blocks seed: %d" % blocks.config.default_seed) + params = cg.parameters algorithm = GradientDescent( cost=cost, @@ -107,13 +110,14 @@ if __name__ == "__main__": prefix='valid', every_n_batches=1000), Printing(every_n_batches=1000), - - # SaveLoadParams(dump_path, cg, - # before_training=True, # before training -> load params - # every_n_batches=1000, # every N batches -> save params - # after_epoch=True, # after epoch -> save params - # after_training=True, # after training -> save params - # ), + # FinishAfter(every_n_batches=10), + + SaveLoadParams(dump_path, cg, + before_training=True, # before training -> load params + every_n_batches=1000, # every N batches -> save params + after_epoch=True, # after epoch -> save params + after_training=True, # after training -> save params + ), RunOnTest(model_name, model,