pvalue.py (1309B)
1 #!/usr/bin/env python 2 3 import os 4 import sys 5 6 import math 7 import numpy 8 9 import data 10 11 # Haversine distance calculation 12 # --------- -------- ----------- 13 14 rearth = 6371. 15 deg2rad = 3.141592653589793 / 180. 16 17 def hdist(a, b): 18 lat1 = a[:, 0] * deg2rad 19 lon1 = a[:, 1] * deg2rad 20 lat2 = b[:, 0] * deg2rad 21 lon2 = b[:, 1] * deg2rad 22 23 dlat = abs(lat1-lat2) 24 dlon = abs(lon1-lon2) 25 26 al = numpy.sin(dlat/2)**2 + numpy.cos(lat1) * numpy.cos(lat2) * (numpy.sin(dlon/2)**2) 27 d = numpy.arctan2(numpy.sqrt(al), numpy.sqrt(1.-al)) 28 29 hd = 2. * rearth * d 30 31 return hd 32 33 34 # Read the inputs 35 # ---- --- ------ 36 37 def readcsv(f): 38 return numpy.genfromtxt(f, delimiter=',', skip_header=1)[:, 1:3] 39 40 answer = readcsv(os.path.join(data.path, 'test_answer.csv')) 41 42 tables = [readcsv(f) for f in sys.argv if '.csv' in f] 43 etables = [hdist(t, answer) for t in tables] 44 45 # Calculate p-values 46 # --------- -------- 47 48 pvalue = numpy.zeros((len(tables), len(tables))) 49 50 for i, a in enumerate(etables): 51 for j, b in enumerate(etables): 52 if i == j: 53 continue 54 d = b - a 55 var = (numpy.mean((a - numpy.mean(a))**2) 56 + numpy.mean((b - numpy.mean(b))**2)) / 2. 57 pv = 1 - .5 * (1 + math.erf(numpy.mean(d) / numpy.sqrt(2 * var))) 58 pvalue[i, j] = pv 59 60 print pvalue