-
Notifications
You must be signed in to change notification settings - Fork 2
/
combine_nests.py
71 lines (59 loc) · 2.04 KB
/
combine_nests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import glob
import os
import re
import sys
from zipfile import ZIP_DEFLATED
from zipfile import ZipFile
import geopandas
import pandas as pd
import tools
def get_site(path):
path = os.path.basename(path)
regex = re.compile("(\\w+)_\\d+.*_processed_nests")
return regex.match(path).group(1)
def load_shapefile(x):
shp = geopandas.read_file(x)
# Force correct types
# Empty shape files don't see to maintain provided types
# when written and loaded
shp = shp.astype({
'nest_id': 'int',
'Site': 'str',
'Year': 'str',
'xmean': 'float',
'ymean': 'float',
'first_obs': 'str',
'last_obs': 'str',
'num_obs': 'int',
'species': 'str',
'sum_top1': 'float',
'num_top1': 'int',
'bird_match': 'str'
})
shp["site"] = get_site(x)
return shp
def combine(paths):
"""Take prediction shapefiles and wrap into a single file"""
shapefiles = []
for x in paths:
try:
shapefiles.append(load_shapefile(x))
except:
print(f"Mistructured file path: {x}. File not added to processed_nests.shp")
summary = geopandas.GeoDataFrame(pd.concat(shapefiles, ignore_index=True), crs=shapefiles[0].crs)
return summary
if __name__ == "__main__":
working_dir = tools.get_working_dir()
nests_path = f"{working_dir}/processed_nests/"
output_path = f"{working_dir}/everwatch-workflow/App/Zooniverse/data/"
nest_files = sys.argv[1:]
# write output to zooniverse app
df = combine(nest_files)
df.to_file(os.path.join(output_path, "nest_detections_processed.shp"))
# Zip the shapefile for storage efficiency
with ZipFile(os.path.join(output_path, "nest_detections_processed.zip"), 'w', ZIP_DEFLATED) as zip:
for ext in ['cpg', 'dbf', 'prj', 'shp', 'shx']:
focal_file = os.path.join(output_path, f"nest_detections_processed.{ext}")
file_name = os.path.basename(focal_file)
zip.write(focal_file, arcname=file_name)
os.remove(focal_file)