|
| 1 | +""" |
| 2 | +Your task in this exercise has two steps: |
| 3 | +
|
| 4 | +- audit the OSMFILE and change the variable 'mapping' to reflect the changes needed to fix |
| 5 | + the unexpected street types to the appropriate ones in the expected list. |
| 6 | + You have to add mappings only for the actual problems you find in this OSMFILE, |
| 7 | + not a generalized solution, since that may and will depend on the particular area you are auditing. |
| 8 | +- write the update_name function, to actually fix the street name. |
| 9 | + The function takes a string with street name as an argument and should return the fixed name |
| 10 | + We have provided a simple test so that you see what exactly is expected |
| 11 | +""" |
| 12 | +import xml.etree.cElementTree as ET |
| 13 | +from collections import defaultdict |
| 14 | +import re |
| 15 | +import pprint |
| 16 | +import lxml.etree as etree |
| 17 | +import pickle |
| 18 | + |
| 19 | + |
| 20 | + |
| 21 | +OSMFILE = "madison.osm" |
| 22 | +street_type_re = re.compile(r'\b\S+\.?$', re.IGNORECASE) |
| 23 | +cardinal_direction_re= re.compile(r'^\S+',re.IGNORECASE ) |
| 24 | + |
| 25 | + |
| 26 | +expected = ["Street", "Avenue", "Boulevard", "Drive", "Court", "Place", "Square", "Lane", "Road", |
| 27 | + "Trail", "Parkway", "Commons"] |
| 28 | + |
| 29 | + |
| 30 | + |
| 31 | +street_mapping = {'Ave': 'Avenue', |
| 32 | + 'Cir': "Circle", |
| 33 | + 'Dr': 'Drive', |
| 34 | + 'Ln': 'Lane', |
| 35 | + 'Ln.': 'Lane', |
| 36 | + 'Ct': 'Court', |
| 37 | + 'St': 'Street', |
| 38 | + 'St.': 'Street', |
| 39 | + 'street': 'Street', |
| 40 | + 'Blvd': 'Boulevard', |
| 41 | + 'Pkwy': 'Parkway', |
| 42 | + 'Rd': 'Road', |
| 43 | + 'Rd.': 'Road'} |
| 44 | + |
| 45 | +cardinal_mapping = {'E': 'East', |
| 46 | + 'E.': 'East', |
| 47 | + 'N': 'North', |
| 48 | + 'N.': 'North', |
| 49 | + 'S': 'South', |
| 50 | + 'S.': 'South', |
| 51 | + 'W': 'West', |
| 52 | + 'W.': 'West'} |
| 53 | + |
| 54 | + |
| 55 | + |
| 56 | + |
| 57 | +problems = ["Newmarket Mews"] |
| 58 | + |
| 59 | + |
| 60 | + |
| 61 | +def check_street_type(street_types, street_name): |
| 62 | + m = street_type_re.search(street_name) |
| 63 | + if m: |
| 64 | + street_type = m.group() |
| 65 | + if street_type not in expected: #to find ones needing fix |
| 66 | + #if street_type in street_mapping: #this is to fix the mappings |
| 67 | + street_types[street_type].add(street_name) |
| 68 | + |
| 69 | + |
| 70 | + |
| 71 | + |
| 72 | + |
| 73 | +def is_street_name(elem): |
| 74 | + return (elem.attrib['k'] == "addr:street") |
| 75 | + |
| 76 | +def is_postal_code(elem): |
| 77 | + return (elem.attrib['k'] == "addr:postcode") |
| 78 | + |
| 79 | +def is_city(elem): |
| 80 | + return (elem.attrib['k'] == "addr:city") |
| 81 | + |
| 82 | + |
| 83 | +def audit(osmfile): |
| 84 | + osm_file = open(osmfile, "r") |
| 85 | + street_types = defaultdict(set) |
| 86 | + zip_or_city = defaultdict(int) |
| 87 | + #element_list = {'street': street_types, 'cardinal': cardinal} # 'city': city, 'zip': zip_code} |
| 88 | + |
| 89 | + |
| 90 | + for event, elem in ET.iterparse(osm_file, events=("start",)): |
| 91 | + |
| 92 | + if elem.tag == "node" or elem.tag == "way": |
| 93 | + for tag in elem.iter("tag"): |
| 94 | + if is_street_name(tag): |
| 95 | + #check_street_type(street_types, tag.attrib['v']) |
| 96 | + #check_street_direction(street_types, tag.attrib['v']) |
| 97 | + check_invalid_addr(street_types, tag.attrib['v'], elem) |
| 98 | + #if is_postal_code(tag): |
| 99 | + #if is_city(tag): |
| 100 | + #check_zip_city(zip_or_city, tag.attrib['v']) |
| 101 | + osm_file.close() |
| 102 | + #return street_types |
| 103 | + return zip_or_city |
| 104 | + |
| 105 | + |
| 106 | +def check_street_direction(street_directions, street_name): |
| 107 | + m = cardinal_direction_re.search(street_name) |
| 108 | + if m: |
| 109 | + street_dir = m.group() |
| 110 | + if street_dir in cardinal_mapping: |
| 111 | + street_directions[street_dir].add(street_name) |
| 112 | + |
| 113 | + |
| 114 | +def check_invalid_addr(dic, street_name, element): |
| 115 | + if street_name in problems: |
| 116 | + dic[street_name].add(element) |
| 117 | + element = ET.tostring(element, encoding='utf8', method='xml') |
| 118 | + print(element) |
| 119 | + #print(element.dom.toprettyxml(indent = ' ')) |
| 120 | + |
| 121 | +def check_zip_city(dic, zip_or_city): |
| 122 | + dic[zip_or_city] += 1 |
| 123 | + |
| 124 | + |
| 125 | + |
| 126 | + |
| 127 | + |
| 128 | + |
| 129 | +def update_name(name, mapping, regex): |
| 130 | + m = regex.search(name) |
| 131 | + m = m.group() |
| 132 | + for key in mapping: |
| 133 | + if key == m: |
| 134 | + name = re.sub(regex, mapping[key], name) |
| 135 | + return name |
| 136 | + |
| 137 | + |
| 138 | +def test(): |
| 139 | + cor_types = audit(OSMFILE) |
| 140 | + #assert len(st_types) == 3 |
| 141 | + pprint.pprint(dict(cor_types)) |
| 142 | + ''' |
| 143 | + for cor_type, ways in cor_types.iteritems(): |
| 144 | + for name in ways: |
| 145 | + better_name = update_name(name, cardinal_mapping, cardinal_direction_re) |
| 146 | + better_name = update_name(better_name, street_mapping, street_type_re) |
| 147 | + print name, "=>", better_name |
| 148 | +''' |
| 149 | + |
| 150 | +if __name__ == '__main__': |
| 151 | + test() |
0 commit comments