commit 6a0b47a2fc7c4e800f14212ae81dbd56de17fa94
parent 676af1086b141a7803626b040e7da03526b95406
Author: AdeB <adbrebs@gmail.com>
Date: Sat, 25 Apr 2015 10:09:01 -0400
Data analysis updated for the new Dataset class. Coordinates are saved in a light numpy array for fast/light retrieval.
Diffstat:
4 files changed, 86 insertions(+), 29 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+.idea/*
+
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
diff --git a/alex/plots.py b/alex/plots.py
@@ -1,29 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy
-import cPickle
-import scipy
-
-print "Loading data..."
-with open("train_normal.pkl") as f: normal = cPickle.load(f)
-
-print "Extracting x and y"
-xes = [c[0] for l in normal for c in l[-1]]
-yes = [c[1] for l in normal for c in l[-1]]
-
-xrg = [-8.75, -8.55]
-yrg = [41.05, 41.25]
-
-print "Doing 1d histogram"
-#plt.clf(); plt.hist(xes, bins=1000, range=xrg); plt.savefig("xhist.pdf")
-#plt.clf(); plt.hist(yes, bins=1000, range=yrg); plt.savefig("yhist.pdf")
-
-print "Doing 2d histogram"
-#plt.clf(); plt.hist2d(xes, yes, bins=500, range=[xrg, yrg]); plt.savefig("xymap.pdf")
-
-hist, xx, yy = numpy.histogram2d(xes, yes, bins=2000, range=[xrg, yrg])
-
-import ipdb; ipdb.set_trace()
-
-plt.clf(); plt.imshow(numpy.log(hist)); plt.savefig("xyhmap.pdf")
-
-scipy.misc.imsave("xymap.png", numpy.log(hist))
diff --git a/data_analysis/maps.py b/data_analysis/maps.py
@@ -0,0 +1,55 @@
+import cPickle
+import scipy
+import numpy as np
+import matplotlib.pyplot as plt
+
+import data
+
+
+def compute_number_coordinates():
+ train_it = data.train_it()
+
+ # Count the number of coordinates
+ n_coordinates = 0
+ for ride in train_it:
+ n_coordinates += len(ride[-1])
+ print n_coordinates
+
+ return n_coordinates
+
+
+def extract_coordinates(n_coordinates=None):
+ """Extract coordinates from the dataset and store them in a numpy array"""
+
+ if n_coordinates is None:
+ n_coordinates = compute_number_coordinates()
+
+ coordinates = np.zeros((n_coordinates, 2), dtype="float32")
+ train_it = data.train_it()
+
+ c = 0
+ for ride in train_it:
+ for point in ride[-1]:
+ coordinates[c] = point
+ c += 1
+
+ cPickle.dump(coordinates, open(data.DATA_PATH + "/coordinates_array.pkl", "wb"))
+
+
+def draw_map(coordinates, xrg, yrg):
+
+ hist, xx, yy = np.histogram2d(coordinates[:, 0], coordinates[:, 1], bins=2000, range=[xrg, yrg])
+
+ plt.imshow(np.log(hist))
+ plt.savefig(data.DATA_PATH + "/analysis/xyhmap.pdf")
+
+ scipy.misc.imsave(data.DATA_PATH + "/analysis/xymap.png", np.log(hist))
+
+
+if __name__ == "__main__":
+ # extract_coordinates(n_coordinates=83360928)
+
+ coordinates = cPickle.load(open(data.DATA_PATH + "/coordinates_array.pkl", "rb"))
+ xrg = [-8.75, -8.55]
+ yrg = [41.05, 41.25]
+ draw_map(coordinates, xrg, yrg)
diff --git a/data_analysis/maps_old.py b/data_analysis/maps_old.py
@@ -0,0 +1,29 @@
+import matplotlib.pyplot as plt
+import numpy
+import cPickle
+import scipy
+
+print "Loading data..."
+with open("../train_normal.pkl") as f: normal = cPickle.load(f)
+
+print "Extracting x and y"
+xes = [c[0] for l in normal for c in l[-1]]
+yes = [c[1] for l in normal for c in l[-1]]
+
+xrg = [-8.75, -8.55]
+yrg = [41.05, 41.25]
+
+print "Doing 1d histogram"
+#plt.clf(); plt.hist(xes, bins=1000, range=xrg); plt.savefig("xhist.pdf")
+#plt.clf(); plt.hist(yes, bins=1000, range=yrg); plt.savefig("yhist.pdf")
+
+print "Doing 2d histogram"
+#plt.clf(); plt.hist2d(xes, yes, bins=500, range=[xrg, yrg]); plt.savefig("xymap.pdf")
+
+hist, xx, yy = numpy.histogram2d(xes, yes, bins=2000, range=[xrg, yrg])
+
+import ipdb; ipdb.set_trace()
+
+plt.clf(); plt.imshow(numpy.log(hist)); plt.savefig("xyhmap.pdf")
+
+scipy.misc.imsave("xymap.png", numpy.log(hist))