-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtocsv.py
52 lines (42 loc) · 1.83 KB
/
tocsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import sys
import bparser
from helpers import *
year = sys.argv[1]
lines, errors, broken, died = bparser.parse('%s/directory/data_converted' % (year))
#lines, errors, broken, died = bparser.parse('test/front')
print "good:%d errord:%d broken:%d" % (len(lines), len(errors), len(broken))
for error in errors:
print error
for broke in broken:
print broke
outfile_boston = open("%s_boston_res.csv" % (year), 'w')
outfile_nonboston = open("%s_nonboston_res.csv" % (year), 'w')
outfile_boston_complete = open("%s_boston_complete.csv" % (year), 'w')
outfile_nonboston_complete = open("%s_nonboston_complete.csv" % (year), 'w')
#compressed files
#last,first,number,street,strsuffix,nh,filepath
#full files
#last,first,number,street,strsuffix,nh,owner,b_number,b_street,b_strsuffix,b_nh,b_owner,prof,business,married,widowed,spouse,filepath
keys = ['owner','b_number','b_street','b_strsuffix','b_nh','b_owner','prof','business','married','widowed','spouse','filepath']
for line in lines:
towrite = "%s,%s," % (line['last'], line['first'])
if 'number' in line:
towrite += line['number']
towrite = "%s,%s,%s,%s" % (towrite, line['street'], line['strsuffix'], line['nh'])
if line['nh'].lower() in boston_nh:
outfile_boston.write(towrite + "," + line['filepath'][30:] + '\n')
else:
outfile_nonboston.write(towrite + line['filepath'][30:] + '\n')
for key in keys:
value = ""
if key in line:
value = key + " " + line[key].__repr__()
towrite += "," + value
if line['nh'].lower() in boston_nh:
outfile_boston_complete.write(towrite + "," + line['filepath'][30:] + '\n')
else:
outfile_nonboston_complete.write(towrite + line['filepath'][30:] + '\n')
outfile_boston.close()
outfile_nonboston.close()
outfile_boston_complete.close()
outfile_nonboston_complete.close()