taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit ccc92aaa1eb24fdf00afb254f56982b1bac406d1
parent 88ff927f2cec3249952b68224e1fc458033007a1
Author: Alex Auvolat <katchup@adnab.me>
Date:   Tue, 14 Jul 2015 12:59:30 -0400

Update transformers

Diffstat:
Mdata/transformers.py | 9+++++++++
1 file changed, 9 insertions(+), 0 deletions(-)

diff --git a/data/transformers.py b/data/transformers.py @@ -24,6 +24,8 @@ def at_least_k(k, v, pad_at_begin, is_longitude): class Select(Transformer): + produces_examples = True + def __init__(self, data_stream, sources): super(Select, self).__init__(data_stream) self.ids = [data_stream.sources.index(source) for source in sources] @@ -36,6 +38,8 @@ class Select(Transformer): return [data[id] for id in self.ids] class TaxiExcludeTrips(Transformer): + produces_examples = True + def __init__(self, stream, exclude_list): super(TaxiExcludeTrips, self).__init__(stream) self.id_trip_id = stream.sources.index('trip_id') @@ -48,6 +52,8 @@ class TaxiExcludeTrips(Transformer): return data class TaxiExcludeEmptyTrips(Transformer): + produces_examples = True + def __init__(self, stream): super(TaxiExcludeEmptyTrips, self).__init__(stream) self.latitude = stream.sources.index('latitude') @@ -59,8 +65,11 @@ class TaxiExcludeEmptyTrips(Transformer): return data class TaxiGenerateSplits(Transformer): + produces_examples = True + def __init__(self, data_stream, max_splits=-1): super(TaxiGenerateSplits, self).__init__(data_stream) + self.sources = data_stream.sources + ('destination_latitude', 'destination_longitude', 'travel_time') self.max_splits = max_splits self.data = None