1
+ """Extract training data from NEON AOP data."""
1
2
import geopandas as gpd
2
3
import cv2
3
4
import os
4
5
import re
5
6
import numpy as np
6
7
import pandas as pd
7
- import matplotlib .pyplot as plt
8
- from shapely .geometry import Point
9
8
import rasterio
10
9
from deepforest import main
11
10
from deepforest import utilities
14
13
15
14
def extract_training_data (vst_data ,
16
15
geo_data_frame ,
17
- year , dpID = 'DP3.30010.001' ,
18
- savepath = '/content' ,
19
- site = 'DELA' ):
16
+ year ,
17
+ dpID = 'DP3.30010.001' ,
18
+ savepath = '/content' ,
19
+ site = 'DELA' ):
20
20
"""
21
- Extracting training data with geo_data_frame and image predictions.
21
+ Extract training data with geo_data_frame and image predictions.
22
22
23
23
Parameters
24
24
------------
@@ -37,12 +37,13 @@ def extract_training_data(vst_data,
37
37
int(x[:-1]))).astype(str)
38
38
geo_data_frame = gpd.GeoDataFrame(vst_data, geometry=geometry, crs=epsg_codes.iloc[0])
39
39
40
- extract_training_data(vst_data=vst_data, geo_data_frame=geo_data_frame, year='2018', dpID='DP3.30010.001',
41
- savepath='/content', site='DELA')
40
+ extract_training_data(vst_data=vst_data, geo_data_frame=geo_data_frame, year='2018',
41
+ dpID='DP3.30010.001', savepath='/content', site='DELA')
42
42
"""
43
43
retrieve_aop_data (vst_data , year , dpID , savepath )
44
44
site_level_data = vst_data [vst_data .plotID .str .contains (site )]
45
- get_tiles = ((site_level_data .easting / 1000 ).astype (int ) * 1000 ).astype (str ) + "_" + ((site_level_data .northing / 1000 ).astype (int ) * 1000 ).astype (str )
45
+ get_tiles = (((site_level_data .easting / 1000 ).astype (int ) * 1000 ).astype (str ) + "_" +
46
+ ((site_level_data .northing / 1000 ).astype (int ) * 1000 ).astype (str ))
46
47
print ("get_tiles" )
47
48
print (get_tiles .unique ())
48
49
@@ -85,46 +86,87 @@ def extract_training_data(vst_data,
85
86
easting = row .easting
86
87
northing = row .northing
87
88
88
- x_min = int (affine [2 ] + 10 / affine [0 ] - easting )
89
- y_min = int (affine [5 ] + 10 / affine [0 ] - northing )
90
- x_max = int (affine [2 ] - 10 / affine [0 ] - easting )
91
- y_max = int (affine [5 ] - 10 / affine [0 ] - northing )
89
+ x_min = int (affine [2 ] + 10 / affine [0 ] - easting )
90
+ y_min = int (affine [5 ] + 10 / affine [0 ] - northing )
91
+ x_max = int (affine [2 ] - 10 / affine [0 ] - easting )
92
+ y_max = int (affine [5 ] - 10 / affine [0 ] - northing )
92
93
93
- section_file = os . path . join ( output_folder , f"section_{ x_min } _{ y_min } _{ x_max } _{ y_max } .tif" )
94
+ file_name = f"section_{ x_min } _{ y_min } _{ x_max } _{ y_max } .tif"
94
95
96
+ section_file = os .path .join (output_folder , file_name )
95
97
96
98
if section_file not in section_files :
99
+ section = image [y_max :y_min , x_max :x_min , :]
100
+ print ("Section shape:" , section .shape )
97
101
98
- section = image [y_max :y_min , x_max :x_min , :]
99
- print ("Section shape:" , section .shape )
102
+ section_meta = src .meta .copy ()
103
+ section_meta ['width' ] = (affine [2 ] + x_min ) - (affine [2 ] +
104
+ x_max )
105
+ section_meta ['height' ] = (affine [5 ] + y_min ) - (affine [5 ] +
106
+ y_max )
107
+ section_meta ['transform' ] = rasterio .Affine (
108
+ affine [0 ], 0 , (affine [2 ] - x_min ), 0 , affine [4 ],
109
+ (affine [5 ] - y_min ))
100
110
101
- section_meta = src .meta .copy ()
102
- section_meta ['width' ], section_meta ['height' ] = (affine [2 ] + x_min ) - (affine [2 ] + x_max ), (affine [5 ] + y_min ) - (affine [5 ] + y_max )
103
- section_meta ['transform' ] = rasterio .Affine (affine [0 ], 0 , (affine [2 ] - x_min ), 0 , affine [4 ], (affine [5 ] - y_min ))
111
+ section_np = np .moveaxis (section , - 1 , 0 )
104
112
113
+ with rasterio .open (section_file , 'w' , ** section_meta ) as dst :
114
+ dst .write (section_np )
115
+ section_affine = dst .transform
105
116
106
- section_np = np . moveaxis ( section , - 1 , 0 )
117
+ section_files [ section_file ] = section_affine
107
118
108
- with rasterio .open (section_file , 'w' , ** section_meta ) as dst :
109
- dst .write (section_np )
110
- section_affine = dst .transform
119
+ print ("Crop affine: " )
120
+ print (section_affine )
111
121
112
- section_files [section_file ] = section_affine
113
-
114
- print ("Crop affine: " )
115
- print (section_affine )
116
-
117
- print ("Expected file path:" , section_file )
122
+ print ("Expected file path:" , section_file )
118
123
119
124
prediction = model .predict_image (path = section_file )
120
125
121
- gdf = utilities .boxes_to_shapefile (prediction , root_dir = os .path .dirname (section_file ), projected = True )
126
+ gdf = utilities .boxes_to_shapefile (
127
+ prediction ,
128
+ root_dir = os .path .dirname (section_file ),
129
+ projected = True )
122
130
123
131
all_predictions .append (gdf )
124
132
125
133
all_predictions_df = pd .concat (all_predictions )
126
134
135
+ all_predictions_df ['temp_geo' ] = all_predictions_df ['geometry' ]
136
+
127
137
merged_data = gpd .sjoin (geo_data_frame , all_predictions_df , how = "inner" , op = "within" )
138
+ merged_data .drop (columns = ['geometry' ], inplace = True )
139
+ merged_data .rename (columns = {'temp_geo' : 'geometry' }, inplace = True )
140
+ canopy_position_mapping = {
141
+ np .nan : 0 ,
142
+ 'Full shade' : 1 ,
143
+ 'Mostly shaded' : 2 ,
144
+ 'Partially shaded' : 3 ,
145
+ 'Full sun' : 4 ,
146
+ 'Open grown' : 5
147
+ }
148
+
149
+ predictions = merged_data
150
+
151
+ predictions_copy = predictions .copy ()
152
+
153
+ cp = 'canopyPosition'
154
+
155
+ predictions_copy [cp ] = predictions_copy [cp ].replace (canopy_position_mapping )
156
+
157
+ duplicate_mask = predictions_copy .duplicated (subset = ['xmin' , 'ymin' , 'xmax' , 'ymax' ],
158
+ keep = False )
159
+
160
+ duplicate_entries = predictions [duplicate_mask ]
161
+
162
+ print (duplicate_entries )
163
+
164
+ predictions_sorted = predictions .sort_values (by = ['height' , cp , 'stemDiameter' ],
165
+ ascending = [False , False , False ])
166
+
167
+ duplicates_mask = predictions_sorted .duplicated (
168
+ subset = ['xmin' , 'ymin' , 'xmax' , 'ymax' ], keep = 'first' )
128
169
170
+ clean_predictions = predictions_sorted [~ duplicates_mask ]
129
171
130
- return merged_data
172
+ return clean_predictions
0 commit comments