taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 1e8da55c32746e7bf898717c032144b056256d3c
parent ccc92aaa1eb24fdf00afb254f56982b1bac406d1
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date:   Tue, 14 Jul 2015 15:43:13 -0400

Add memory network config files (not sure how usefull they are)

Diffstat:
Aconfig/memory_network_2.py | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aconfig/memory_network_3.py | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 114 insertions(+), 0 deletions(-)

diff --git a/config/memory_network_2.py b/config/memory_network_2.py @@ -0,0 +1,57 @@ +from blocks import roles +from blocks.bricks import Rectifier, Tanh, Logistic +from blocks.filter import VariableFilter +from blocks.initialization import IsotropicGaussian, Constant + +import data +from model.memory_network import Model, Stream + + +n_begin_end_pts = 10 # how many points we consider at the beginning and end of the known trajectory + +dim_embeddings = [ + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10), + ('week_of_year', 52, 10), + ('day_of_week', 7, 10), + ('qhour_of_day', 24 * 4, 10), + ('day_type', 3, 10), +] + + +class MLPConfig(object): + __slots__ = ('dim_input', 'dim_hidden', 'dim_output', 'weights_init', 'biases_init') + +prefix_encoder = MLPConfig() +prefix_encoder.dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) +prefix_encoder.dim_hidden = [1000, 1000] +prefix_encoder.weights_init = IsotropicGaussian(0.01) +prefix_encoder.biases_init = Constant(0.001) + +candidate_encoder = MLPConfig() +candidate_encoder.dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) +candidate_encoder.dim_hidden = [1000, 1000] +candidate_encoder.weights_init = IsotropicGaussian(0.01) +candidate_encoder.biases_init = Constant(0.001) + +representation_size = 1000 +representation_activation = Tanh +normalize_representation = True + +embed_weights_init = IsotropicGaussian(0.001) + +dropout = 0.5 +dropout_inputs = VariableFilter(bricks=[Rectifier], name='output') + +noise = 0.01 +noise_inputs = VariableFilter(roles=[roles.PARAMETER]) + +batch_size = 512 + +valid_set = 'cuts/test_times_0' +max_splits = 1 +num_cuts = 1000 + +train_candidate_size = 10000 +valid_candidate_size = 20000 + diff --git a/config/memory_network_3.py b/config/memory_network_3.py @@ -0,0 +1,57 @@ +from blocks import roles +from blocks.bricks import Rectifier, Tanh, Logistic +from blocks.filter import VariableFilter +from blocks.initialization import IsotropicGaussian, Constant + +import data +from model.memory_network import Model, Stream + + +n_begin_end_pts = 10 # how many points we consider at the beginning and end of the known trajectory + +dim_embeddings = [ + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10), + ('week_of_year', 52, 10), + ('day_of_week', 7, 10), + ('qhour_of_day', 24 * 4, 10), + ('day_type', 3, 10), +] + + +class MLPConfig(object): + __slots__ = ('dim_input', 'dim_hidden', 'dim_output', 'weights_init', 'biases_init') + +prefix_encoder = MLPConfig() +prefix_encoder.dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) +prefix_encoder.dim_hidden = [200, 200, 200] +prefix_encoder.weights_init = IsotropicGaussian(0.01) +prefix_encoder.biases_init = Constant(0.001) + +candidate_encoder = MLPConfig() +candidate_encoder.dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) +candidate_encoder.dim_hidden = [200, 200, 200] +candidate_encoder.weights_init = IsotropicGaussian(0.01) +candidate_encoder.biases_init = Constant(0.001) + +representation_size = 500 +representation_activation = Tanh +normalize_representation = True + +embed_weights_init = IsotropicGaussian(0.001) + +dropout = 0.5 +dropout_inputs = VariableFilter(bricks=[Rectifier], name='output') + +noise = 0.01 +noise_inputs = VariableFilter(roles=[roles.PARAMETER]) + +batch_size = 512 + +valid_set = 'cuts/test_times_0' +max_splits = 1 +num_cuts = 1000 + +train_candidate_size = 10000 +valid_candidate_size = 20000 +