diff --git a/README.md b/README.md new file mode 100644 index 0000000..f73fc15 --- /dev/null +++ b/README.md @@ -0,0 +1,101 @@ +# omconvert + +## What is omm? + +omm is a special file format for OSM data, with '>'s and '<'s and '='s as the delimiters. This is kind of like csv file format. + +It follows the following schema: + +### First part -- Basic Attributes: + +``` + +[type]>[id]>[visible]>[version]>[changeset]>[user]>[uid]> + +``` + +Where: + +type: type of object represented in integer value. + + 0 for node + + 1 for way + + 2 for relation + +visibility: whether the object is deleted or not. + + 0 for false + + 1 for true + +### Second part -- Coordinates/Nodes/Members of object + +For nodes: + +``` + +[Lat]<[Lon]> + +``` + +For ways: + +``` + +[nd ref]<[nd ref]<[nd ref]...<[nd ref]> + +``` + +For relations: + +``` + +[member type]=[member id]=[member role]<[member type]=[member id]=[member role]<[member type]=[member id]=[member role]<...<[member type]=[member id]=[member role]> + +``` + +### Third part -- tagging + +``` + +[tag key]=[tag value]<[tag key]=[tag value]<...<[tag key]=[tag value] + +``` + +## What is omc? + +omc is a changeset version of omm. It is derived from omm, with minor modifications. + +To denote modification acrion, a number with a colon is used. + +For example, denoting would be 3: + + would be 4: + + would be 5: + +## Why omm and omc + +It strikes a balance between compressed form and xml form. + +### Compared to osm & osc + +- Compact ( around ⅓ of the size ) + +### Compared to o5m & o5c & obf + +- Human readable + +- High compression ratio through different methods + +## How to install + +Prerequisites: python 3 interpreter with standard library + +No building is required. Just move the python scripts to your destination folder and ypu can run! + +## License + +It is released under the GNU LGPLv3 license. \ No newline at end of file diff --git a/m2s.py b/m2s.py new file mode 100644 index 0000000..909b408 --- /dev/null +++ b/m2s.py @@ -0,0 +1,235 @@ +def m2s( line ): + import xml.etree.cElementTree as ET # xml process + from datetime import datetime as DT # convert timestamp + from omtools import indent as IX, unescape as ESC + + format = '%Y-%m-%dT%H:%M:%SZ' + op = line.split('>')[0] # object type + if op == '0': + op = 'node' + elif op == '1': + op = 'way' + elif op == '2': + op = 'relation' + else: + print('Object type not recognised.') + quit() + id = line.split('>')[1] # object id + # check if id is an integer + if id.isdigit() == 0 and (id.startswith('-') and id[1:].isdigit()) == 0: + print('ID contains nom-digit characters.') + quit() + vi = line.split('>')[2] # object visibility + if vi == '0': + vi = 'false' + elif vi == '1': + vi = 'true' + else: + print('Unknown visibility.') + quit() + vr = line.split('>')[3] # object version + # check if version is integer + if vr.isdigit() == 0: + print('Version number contains non-digit characters.') + quit() + cs = line.split('>')[4] # changeset + # check if changeset is integer + if cs.isdigit() == 0: + print('Changeset number contains non-digit characters.') + quit() + tm = DT.fromtimestamp(int(line.split('>')[5])).strftime(format) # convert timestamp + ur = line.split('>')[6] # user name + ud = line.split('>')[7] # user id + # check if user id is integer + if ud.isdigit() == 0: + print('User ID contains non-digit characters.') + quit() + + # add root attributes to tree + root = ET.Element(op) + root.attrib['id'] = id + root.attrib['version'] = vr + root.attrib['changeset'] = cs + root.attrib['timestamp'] = tm + root.attrib['user'] = ur + root.attrib['uid'] = ud + coor = line.split('>')[8] + + # actions for node + if op == 'node': + la = coor.split('<')[0] + lo = coor.split('<')[1] + root.attrib['lat'] = la # node latitude + root.attrib['lon'] = lo # node longitude + + # actions for way + elif op == 'way': + ndcount = len(coor.split('<')) + for num2 in range(0, ndcount): + if coor.split('<')[0] == '': + break + nd = ET.SubElement(root, 'nd') + nd.attrib['ref'] = coor.split('<')[num2] # node reference + + # actions for relation + elif op == 'relation': + memcount = len(coor.split('<')) + for num2 in range(0, memcount): + single = coor.split('<')[num2] # member + if single.split('=')[0] == '': + break + mm = ET.SubElement(root, 'member') + # member type + if single.split('=')[0] == '0': + tp = 'node' + if single.split('=')[0] == '1': + tp = 'way' + if single.split('=')[0] == '2': + tp = 'relation' + nd = single.split('=')[1] # member id + rl = single.split('=')[2] # member role + rl = ESC(rl) + mm.attrib['type'] = tp + mm.attrib['ref'] = nd + mm.attrib['role'] = rl + + taglist = line.split('>')[9] # tag list + tagcount = len(taglist.split('<')) + for num3 in range(0, tagcount): + tags = taglist.split('<')[num3] + if tags.split('=')[0] == '\n': + break + tg = ET.SubElement(root, 'tag') + tk = tags.split('=')[0] + tv = tags.split('=')[1] + tk = ESC(tk) + tv = ESC(tv) + tg.attrib['k'] = tk # tag key + tg.attrib['v'] = tv # tag value + + IX(root) + + # write + stringoutput = ET.tostring(root, encoding='utf-8', method='xml') + return stringoutput + + +def endlast( value, out='' ): + if value == 3: + out = '\n' + elif value == 4: + out = '\n' + elif value == 5: + out = b'\n' + return out + +def omc2osc( inputfile, outputfile ): + import xml.etree.cElementTree as ET # xml process + import os # check if file is empty + from time import time as T # timer + + # start timer + start = T() + input = open(inputfile, 'r') + + # create file if not exists + try: + tempfile = open(outputfile, 'r') + except FileNotFoundError: + tempfile = open(outputfile, 'w') + tempfile.write('') + tempfile.close() + + # create root for xml + output = open(outputfile, 'ab+') + if os.stat(outputfile).st_size == 0: + temproot = ET.Element('osmChange') + temproot.attrib['version'] = '0.6' + temproot.attrib['generator'] = 'omconvert 0.1' + treetemp = ET.ElementTree(temproot) + treetemp.write(outputfile, method='xml', short_empty_elements=False) + output.write(b'\n') + output.seek(-13, 2) + output.truncate() + + # parse xml + input = open(inputfile, 'r') + mfindex = [ ] + mfindex.append('') + mfindex.append('') + + for line in input: + # determine modification action + if line.find('3: ') != -1: + mfindex[1] = 3 + output.write(endlast(mfindex[0]).encode('utf-8')) + mfindex[0] = mfindex[1] + output.write(b'\n\n') + elif line.find('4: ') != -1: + mfindex[1] = 4 + output.write(endlast(mfindex[0]).encode('utf-8')) + mfindex[0] = mfindex[1] + output.write(b'\n\n') + elif line.find('5: ') != -1: + mfindex[1] = 5 + output.write(endlast(mfindex[0]).encode('utf-8')) + mfindex[0] = mfindex[1] + output.write(b'\n\n') + else: + stringoutput = m2s(line) + output.write(stringoutput.replace(b' "', b'"')) + + if mfindex[0] == 3: + output.write(b'') + if mfindex[0] == 4: + output.write(b'') + if mfindex[0] == 5: + output.write(b'') + output.write(b'\n') + output.close() + print('Elapsed Time: ' + str(T() - start) + 's') + + +def omm2osm( inputfile, outputfile ): + import xml.etree.cElementTree as ET # for xml + import os # for detecting if file is empty + from time import time as T # for elapsed time + + # start timer + start = T() + + #create output file if not exists + try: + tempfile = open(outputfile, 'r') + except FileNotFoundError: + tempfile = open(outputfile, 'w') + tempfile.write('') + tempfile.close() + + #create root xml root if not exists + output = open(outputfile, 'ab+') + if os.path.getsize(outputfile) == 0: + temproot = ET.Element('osm') + temproot.attrib['version'] = '0.6' + temproot.attrib['generator'] = 'omconvert 0.1' + temproot.attrib['copyright'] = 'OpenStreetMap and contributors' + temproot.attrib['attribution'] = 'http://www.openstreetmap.org/copyright' + temproot.attrib['license'] = 'http://opendatacommons.org/licenses/odbl/1-0/' + temptree = ET.ElementTree(temproot) + temptree.write(outputfile, encoding='utf-8', method='xml', short_empty_elements=False) + output.write(b'\n') + output.seek(-7, 2) + output.truncate() + + input = open(inputfile, 'r') + for line in input: + stringoutput = m2s(line) + if line.split('>')[0] == '0': + stringoutput = b'\n' + stringoutput + b'\n' + elif line.split('>')[0] == '2': + stringoutput = stringoutput[:-1] + output.write(stringoutput.replace(b' "', b'"')) + + output.write(b'\n') + output.close() + print('Elapsed Time: ' + str(T() - start) + 's') \ No newline at end of file diff --git a/omconvert.py b/omconvert.py new file mode 100644 index 0000000..384db13 --- /dev/null +++ b/omconvert.py @@ -0,0 +1,27 @@ +# ! /bin/python + +import sys +#for sys.argv +from s2m import osm2omm as MM, osc2omc as MC +from m2s import omm2osm as SM, omc2osc as SC + +#Check if valid number of arguments. +if len(sys.argv) != 3: + print('Invalid number of arguments.') + print('First argument for input file name.') + print('Second argument for output file name.') + +#Direct to different functions. +if sys.argv[1].find('.osm') != -1 and sys.argv[2].find('.omm') != -1: + MM(sys.argv[1], sys.argv[2]) +elif sys.argv[1].find('.omm') != -1 and sys.argv[2].find('.osm') != -1: + SM(sys.argv[1], sys.argv[2]) +elif sys.argv[1].find('.osc') != -1 and sys.argv[2].find('.omc') != -1: + MC(sys.argv[1], sys.argv[2]) +elif sys.argv[1].find('.omc') != -1 and sys.argv[2].find('.osc') != -1: + SC(sys.argv[1], sys.argv[2]) +else: + print('Invalid file type or file extension.') + print('Input: .osm or .omm (OSM), or .osc or .omc(OSM Change)') + print('Output: .osm or .omm (OSM), or .osc or .omc(OSM Change)') + print('Currently reading from standard input or printing to standard output is not supported.') \ No newline at end of file diff --git a/omtools.py b/omtools.py new file mode 100644 index 0000000..1e214a0 --- /dev/null +++ b/omtools.py @@ -0,0 +1,29 @@ +# ! /bin/python + +def indent(elem, level=0): + i = '\n' + level*" " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for elem in elem: + indent(elem, level+1) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + + +def escape( input ): + input.replace('<', '<') + input.replace('>', '>') + input.replace('=', '&eq;') + return input + +def unescape( input ): + input.replace('<', '<') + input.replace('>', '>') + input.replace('&eq;', '=') + return input \ No newline at end of file diff --git a/s2m.py b/s2m.py new file mode 100644 index 0000000..2c6d685 --- /dev/null +++ b/s2m.py @@ -0,0 +1,159 @@ +def s2m( elem ): + import xml.etree.cElementTree as ET # xml processing + from datetime import datetime as DT # convert timestamp + from omtools import escape as ESC + + format = '%Y-%m-%dT%H:%M:%SZ' + id = elem.attrib.get('id') # object id + if elem.attrib.get('visible') == 'false': + vi = '0' + else: + vi = '1' + # check if object id is integer + if id.isdigit() == 0: + print('ID contains non-digit characters.') + quit() + vr = elem.attrib.get('version') # object version + # check if object version is integer + if vr.isdigit() == 0: + print('Version number contains non-digit characters.') + quit() + cs = elem.attrib.get('changeset') # changeset + # check if changeset is integer + if cs.isdigit() == 0: + print('Changeset number contains non-digit characters.') + quit() + tm = str(int(DT.timestamp(DT.strptime(elem.attrib.get('timestamp'), format)))) # convert timestamp + ur = elem.attrib.get('user') # user name + ud = elem.attrib.get('uid') # user id + # check if user id is integer + if ud.isdigit() == 0: + print('User ID contains non-digit characters.') + quit() + row = id + '>' + vi + '>' + vr + '>' + cs + '>' + tm + '>' + ur + '>' + ud + '>' + + # actions for node + if elem.tag == 'node': + op = '0' + num2 = 0 + la = elem.attrib.get('lat') # node latitude + lo = elem.attrib.get('lon') # node longitude + row = op + '>' + row + la + '<' + lo + '>' + + # actions for way + elif elem.tag == 'way': + op = '1' + num2 = 0 + row = op + '>' + row + if elem.find('nd') != None: + while elem[num2].tag == 'nd': + nd = elem[num2].attrib.get('ref') # individual node + row += nd + '<' + num2 += 1 + row = row[:-1] # remove last < + + # actions for relation + elif elem.tag == 'relation': + op = '2' + num2 = 0 + row = op + '>' + row + if elem.find('member') != None: + while elem[num2].tag == 'member': + if elem[num2].attrib.get('type') == 'node': + mt = '0' + elif elem[num2].attrib.get('type') == 'way': + mt = '1' + elif elem[num2].attrib.get('type') == 'relation': + mt = '2' + else: + print('Member type not recognised.') + quit() + md = elem[num2].attrib.get('ref') # member id + mr = elem[num2].attrib.get('role') # member role + mr = ESC(mr) + row += mt + '=' + md + '=' + mr + '<' # member entry + num2 += 1 + row = row[:-1] + + else: + print('Object type not recognised.') + quit() + + row += '>' + num3 = len(elem.getchildren()) + + for index in range(num2, num3): + tk = elem[index].attrib.get('k') # tag key + tk = ESC(tk) + tv = elem[index].attrib.get('v') # tag value + tv = ESC(tv) + row += tk + '=' + tv + '<' + index += 1 + + row = row[:-1] + '\n' # finish row + + return row + + +def osc2omc( inputfile, outputfile): + import xml.etree.cElementTree as ET # xml processing + from time import time as T # timer + + # start timer + start = T() + output = open(outputfile, 'a') + + # parse xml + context = ET.iterparse(inputfile, events=("start", "end")) + context = iter(context) + event, root = next(context) + + for event, elem in context: + if event == "start" and ( elem.tag == "create" or elem.tag == "modify" or elem.tag == "delete" ): + # determine modification action + if elem.tag == 'create': + output.write('3: \n') + root.clear() + continue + elif elem.tag == 'modify': + output.write('4: \n') + root.clear() + continue + elif elem.tag == 'delete': + output.write('5: \n') + root.clear() + continue + else: + print('Modification method not identified.') + quit() + if event == "end" and ( elem.tag == "node" or elem.tag == "way" or elem.tag == "relation" ): + # write to file + output.write(m2s(elem)) + root.clear() + + # close file + output.close() + print('Elapsed Time: ' + str(T() - start) + 's') + + +def osm2omm( inputfile, outputfile ): + import xml.etree.cElementTree as ET # xml processing + from time import time as T # timer + + # start timer + start = T() + output = open(outputfile, 'a') + + # parse xml + context = ET.iterparse(inputfile, events=("start", "end")) + context = iter(context) + event, root = next(context) + + for event, elem in context: + if event == "end" and ( elem.tag == "node" or elem.tag == "way" or elem.tag == "relation" ): + output.write(s2m(elem)) + root.clear() + + # close file + output.close() + print('Elapsed time: ' + str(T() - start) + 's') \ No newline at end of file