maps.py (1464B)
1 import cPickle 2 import numpy as np 3 import matplotlib.pyplot as plt 4 5 import data 6 from data.hdf5 import taxi_it 7 8 9 def compute_number_coordinates(): 10 11 # Count the number of coordinates 12 n_coordinates = 0 13 for ride in taxi_it('train'): 14 n_coordinates += len(ride['latitude']) 15 print n_coordinates 16 17 return n_coordinates 18 19 20 def extract_coordinates(n_coordinates=None): 21 """Extract coordinates from the dataset and store them in a numpy array""" 22 23 if n_coordinates is None: 24 n_coordinates = compute_number_coordinates() 25 26 coordinates = np.zeros((n_coordinates, 2), dtype="float32") 27 28 c = 0 29 for ride in taxi_it('train'): 30 for point in zip(ride['latitude'], ride['longitude']): 31 coordinates[c] = point 32 c += 1 33 34 print c 35 36 cPickle.dump(coordinates, open(data.path + "/coordinates_array.pkl", "wb")) 37 38 39 def draw_map(coordinates, xrg, yrg): 40 41 print "Start drawing" 42 plt.figure(figsize=(30, 30), dpi=100, facecolor='w', edgecolor='k') 43 hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg]) 44 45 plt.imshow(np.log(hist)) 46 plt.gca().invert_yaxis() 47 plt.savefig(data.path + "/analysis/xyhmap2.png") 48 49 50 if __name__ == "__main__": 51 extract_coordinates(n_coordinates=83409386) 52 53 coordinates = cPickle.load(open(data.path + "/coordinates_array.pkl", "rb")) 54 xrg = [41.05, 41.25] 55 yrg = [-8.75, -8.55] 56 draw_map(coordinates, xrg, yrg)