make_time_index.py (1337B)
1 #!/usr/bin/env python 2 # Make a valid dataset by cutting the training set at specified timestamps 3 4 import os 5 import sys 6 import importlib 7 8 import h5py 9 import numpy 10 11 import data 12 from data.hdf5 import taxi_it 13 14 import sqlite3 15 16 def make_valid(outpath): 17 times = [] 18 for i, line in enumerate(taxi_it('train')): 19 time = line['timestamp'] 20 latitude = line['latitude'] 21 22 if len(latitude) == 0: 23 continue 24 25 duration = 15 * (len(latitude) - 1) 26 27 times.append((i, int(time), int(time + duration))) 28 if i % 1000 == 0: 29 print times[-1] 30 31 32 with sqlite3.connect(outpath) as timedb: 33 c = timedb.cursor() 34 c.execute(''' 35 CREATE TABLE trip_times 36 (trip INTEGER, begin INTEGER, end INTEGER) 37 ''') 38 print "Adding data..." 39 c.executemany('INSERT INTO trip_times(trip, begin, end) VALUES(?, ?, ?)', times) 40 timedb.commit() 41 print "Creating index..." 42 c.execute('''CREATE INDEX trip_begin_index ON trip_times (begin)''') 43 44 45 if __name__ == '__main__': 46 if len(sys.argv) < 1 or len(sys.argv) > 2: 47 print >> sys.stderr, 'Usage: %s [outfile]' % sys.argv[0] 48 sys.exit(1) 49 outpath = os.path.join(data.path, 'time_index.db') if len(sys.argv) < 2 else sys.argv[1] 50 make_valid(outpath)