From 864f78b7f916e69fbf5a8d270cbeddf37c4b8d30 Mon Sep 17 00:00:00 2001 From: Luke Bergmann Date: Wed, 9 Dec 2020 20:40:29 -0800 Subject: [PATCH] Add initial ptobler library code --- helloworld/ptobler/__init__.py | 35 +++ helloworld/ptobler/analysis.py | 263 ++++++++++++++++++ helloworld/ptobler/inversedomain.py | 156 +++++++++++ helloworld/ptobler/pointdata.py | 367 +++++++++++++++++++++++++ helloworld/ptobler/project.py | 403 ++++++++++++++++++++++++++++ 5 files changed, 1224 insertions(+) create mode 100644 helloworld/ptobler/__init__.py create mode 100644 helloworld/ptobler/analysis.py create mode 100644 helloworld/ptobler/inversedomain.py create mode 100644 helloworld/ptobler/pointdata.py create mode 100644 helloworld/ptobler/project.py diff --git a/helloworld/ptobler/__init__.py b/helloworld/ptobler/__init__.py new file mode 100644 index 0000000..37a9342 --- /dev/null +++ b/helloworld/ptobler/__init__.py @@ -0,0 +1,35 @@ +from .pointdata import random_points +from .pointdata import random_points_df +from .pointdata import tag_with_ID_and_dir +from .pointdata import add_offset_points +from .pointdata import make_and_combine_offsets +from .pointdata import make_random_points +from .pointdata import make_lattice +from .pointdata import add_points +from .pointdata import get_file_name + +from .inversedomain import get_forward_projection +from .inversedomain import get_back_projection +from .inversedomain import get_convex_hull +from .inversedomain import get_alpha_shape +from .inversedomain import back_project +from .inversedomain import estimate_inverse_domain +from .inversedomain import estimate_inverse_cuts + +from .project import empirically_project +from .project import get_barycentric_coordinates +from .project import naive_interpolation +from .project import linear_interpolation +from .project import add_wrap +from .project import get_proj4string +from .project import proj4_project +from .project import make_1x3_gdf +from .project import make_1x3_clipped_gdf +# from .project import empirical_projection_calculate_domain +# from .project import load_empirical_projection + +from .analysis import add_projected_coord_columns +from .analysis import perform_analysis_and_add_columns +from .analysis import handle_discontinuities +from .analysis import summary_analysis_of_files +from .analysis import get_empty_analysis_dataframe diff --git a/helloworld/ptobler/analysis.py b/helloworld/ptobler/analysis.py new file mode 100644 index 0000000..bae42a4 --- /dev/null +++ b/helloworld/ptobler/analysis.py @@ -0,0 +1,263 @@ +import numpy as np +import copy + +import pandas + + +def get_empty_analysis_dataframe(): + return pandas.DataFrame(columns=[ + 'full_filename', + 'is_empirical_proj', + 'data_filename', 'proj_filename', + 'data_num_points', 'proj_num_points', + 'data_spatial_distribution', 'proj_spatial_distribution', + 'proj_name', + 'h_mean', 'h_stddev', + 'k_mean', 'k_stddev', + 's_mean', 's_stddev', + 'omega_mean', 'omega_stddev' + ], data=[]) + + +def add_projected_coord_columns(pts): + """ + Reshapes projected dataframe with offset points for analysis + + Mainly for internal use. Unpacks the x, y associated with the various + E, W, N, S, offsets and makes xW, yW, xE, yE, etc columns that will + used in the analysis + + Arguments + --------- + pts: a dataframe with lon, lat, x, y, ID and dir attributes, as + follows: + ID` = unique ID for each 'parent' point, shared with + its 4 shifted 'child' points + dir` = a code where: . indicates the source point; and + W, E, S, N indicates displacement from parent + with same ID + lon, lat = the longitude and latitude values + x, y = the projected coordinates + Returns + ------- + reshaped dataframe with columns for lon, lat, x, y at each of the + offset directions (16 additional columns in all) + """ + results = copy.copy(pts) + results = results[results.dir == '.'] + for dirn in ['W', 'E', 'S', 'N']: + displaced_coord = pts[pts.dir == dirn][['ID', 'lon', 'lat', 'x', 'y']] + results = results.merge(displaced_coord, on='ID', suffixes=('', dirn)) + return results + + +def handle_discontinuities(row, verbosity=0, suppress=0): + """ + Detects unusually rapid change in the partial differences of the supplied data + + Attempts to check to see if there is an interruption in the map projection + that runs *through* a set of offsetted points. + + Assume that deviation of a factor of 100 in either of the following pairs of distances (absolute values) + means an interruption: + - between x-xW and xE-x, or + - between yN-y and y-yS + + If an interruption is detected, turn the distortion measures for the row into NaNs and flag the row. + + Arguments + --------- + row: a single row of data from the analysis table + verbosity: 0 will print minimal information for user, >0 will show too much information + suppress: suppress any results where the error_flag < 1/suppress or > suppress, default 0 + causes no suppression of results + Returns + ------- + a row with suspect results tagged as above + """ + ZERO_TOL = 10 ** -7 + + if (abs(np.float64(row.x) - row.xW) < ZERO_TOL) and (abs(row.xE - row.x) < ZERO_TOL): + dxRatioEW = 1.0 + else: + dxRatioEW = abs(row.xE - row.x) / abs(np.float64(row.x) - row.xW) + + if (abs(np.float64(row.x) - row.xS) < ZERO_TOL) and (abs(row.xN - row.x) < ZERO_TOL): + dxRatioNS = 1.0 + else: + dxRatioNS = abs(row.xN - row.x) / abs(np.float64(row.x) - row.xS) + + if (abs(np.float64(row.y) - row.yW) < ZERO_TOL) and (abs(row.yE - row.y) < ZERO_TOL): + dyRatioEW = 1.0 + else: + dyRatioEW = abs(row.yE - row.y) / abs(np.float64(row.y) - row.yW) + + if (abs(np.float64(row.y) - row.yS) < ZERO_TOL) and (abs(row.yN - row.y) < ZERO_TOL): + dyRatioNS = 1.0 + else: + dyRatioNS = abs(row.yN - row.y) / abs(np.float64(row.y) - row.yS) + + offset_ratios = np.array([dxRatioEW, dxRatioNS, dyRatioEW, dyRatioNS]) + row['error_flag'] = np.max(offset_ratios) # 'projection_interruption_likely' + + if not suppress == 0: + if any(np.isnan(offset_ratios)) or any(offset_ratios <= (1/ suppress)) or any(offset_ratios >= suppress): + row['cos_lat'] = float('NaN') + row['sin_theta'] = float('NaN') + row['a_dash'] = float('NaN') + row['b_dash'] = float('NaN') + row['h'] = float('NaN') + row['k'] = float('NaN') + row['s'] = float('NaN') + row['omega'] = float('NaN') + if verbosity > 0: + print("Discountinuity around: ", int(row.lon), ",", int(row.lat)) + return row + + +def perform_analysis_and_add_columns(results, drop_debugging_info=False, verbosity=0, suppress=0): + """ + Performs analysis to calculate the Tissot distortion metrics + + Also carries out the discontinuity detection using handle_discontinuities + + Arguments + --------- + results: the results dataframe being built + drop_debugging_info: bool, if True throwaway detailed data when + done, if False retain information (default False) + verbosity: 0: minimal information printed to console >0: too much information + suppress: threshold for handling discontinuities (default 0 will + cause no suppression) + Returns + ------- + analysis results DataFrame + """ + print('Basic analysis...') + results['error_flag'] = "" + + # Partial derivatives of x, y against lon, lat + dx_dlon = (results.xE - results.xW) / np.radians(results.lonE - results.lonW) + dx_dlat = (results.xN - results.xS) / np.radians(results.latN - results.latS) + dy_dlon = (results.yE - results.yW) / np.radians(results.lonE - results.lonW) + dy_dlat = (results.yN - results.yS) / np.radians(results.latN - results.latS) + + if not drop_debugging_info: + results['dx_dlon'] = dx_dlon + results['dx_dlat'] = dx_dlat + results['dy_dlon'] = dy_dlon + results['dy_dlat'] = dy_dlat + + # Earth radius per spherical EPSG 4047 + R = 6371007 + # latitudinal cosine correction + cos_lat = np.cos(np.radians(results.lat)) + + if not drop_debugging_info: + results['cos_lat'] = cos_lat + + # scale factors in latitudinal and longitudinal directions + h = np.sqrt(np.square(dx_dlat) + np.square(dy_dlat)) / R + k = np.sqrt(np.square(dx_dlon) + np.square(dy_dlon)) / (R * cos_lat) + + # area scale factor, s + sin_theta = (dy_dlat * dx_dlon - dx_dlat * dy_dlon) / (R * R * h * k * cos_lat) + s = h * k * sin_theta + + if not drop_debugging_info: + results['sin_theta'] = sin_theta + + # maximum angular distortion, omega + a_dash = np.sqrt(np.square(h) + np.square(k) + 2 * s) + b_dash = np.sqrt(np.square(h) + np.square(k) - 2 * s) + omega = np.degrees(2 * np.arcsin(b_dash / a_dash)) + + if not drop_debugging_info: + results['a_dash'] = a_dash + results['b_dash'] = b_dash + + # add the results to the data table + results['h'] = h + results['k'] = k + results['s'] = s + results['omega'] = omega + results['a'] = (a_dash + b_dash) / 2 + results['b'] = (a_dash - b_dash) / 2 + + # This needs to be after all the calculations are done -> + # Check for discontinuities in the projection. + print('Discontinuity checking...') + results = results.apply(handle_discontinuities, axis=1, verbosity=verbosity, suppress=suppress) + + # if we're not debugging, drop the columns we no longer need + if drop_debugging_info: + results = results[['ID', 'lon', 'lat', 'x', 'y', 'h', 'k', 's', 'omega', 'error_flag']] + + return results + + +def summary_analysis_of_files(filelist, folder, results_df=None, verbosity=0, suppress=0): + """ + Analyses distortion for files in the list, writing results to specified folder + + Optionally adds to a summary dataframe, if provided, otherwise returns summary of + this set of analyses + + Arguments + --------- + filelist: list of files to analyse - must include offsets and lon, + lat, ID, dir, x, y atttributes + folder: folder to write results to + results_df: a dataframe to append summary results to + verbosity: 0: prints minimal information to console >0 too much information! + suppress: handling of diagnosed discontinuities default 0 will cause no suppression + :return: + the summary results dataframe + """ + if results_df is None: + results_df = get_empty_analysis_dataframe() + for f in filelist: + source = pandas.read_csv(f) + res = add_projected_coord_columns(source) + print('Running analysis on :', f) + res = perform_analysis_and_add_columns(res, drop_debugging_info=False, + verbosity=verbosity, suppress=suppress) + basename = f.rsplit('/', 1)[-1].split('.')[0] + res.to_csv(folder + '/' + basename + '-analysis.csv', index=False) + + # Fill out new row in the results_df dataframe + new_row = results_df.shape[0] + 1 + filename = f.rsplit('/', 1)[-1] + results_df.at[new_row, 'full_filename'] = filename + + if '__using__' in filename: + results_df.at[new_row, 'is_empirical_proj'] = 'True' + results_df.at[new_row, 'data_filename'] = filename.split('__')[0] + results_df.at[new_row, 'proj_filename'] = filename.split('__')[-1].split('.')[0] + results_df.at[new_row, 'proj_num_points'] = int(results_df.at[new_row, 'proj_filename'].split('-')[1]) + results_df.at[new_row, 'proj_spatial_distribution'] = results_df.at[new_row, 'proj_filename'].split('-')[0] + results_df.at[new_row, 'proj_name'] = results_df.at[new_row, 'proj_filename'].split('-')[-1] + else: + results_df.at[new_row, 'is_empirical_proj'] = 'False' + results_df.at[new_row, 'data_filename'] = ("-".join(filename.split('.')[0].split('-')[:-2])) + results_df.at[new_row, 'proj_filename'] = "" + results_df.at[new_row, 'proj_num_points'] = np.NaN + results_df.at[new_row, 'proj_spatial_distribution'] = "" + results_df.at[new_row, 'proj_name'] = filename.split('.')[0].split('-')[-1] + + results_df.at[new_row, 'data_num_points'] = int(results_df.at[new_row, 'data_filename'].split('-')[1]) + results_df.at[new_row, 'data_spatial_distribution'] = results_df.at[new_row, 'data_filename'].split('-')[0] + + results_df.at[new_row, 'h_mean'] = res['h'].mean(skipna=True) + results_df.at[new_row, 'h_stddev'] = res['h'].std(skipna=True) + results_df.at[new_row, 'k_mean'] = res['k'].mean(skipna=True) + results_df.at[new_row, 'k_stddev'] = res['k'].std(skipna=True) + results_df.at[new_row, 'a_mean'] = res['a'].mean(skipna=True) + results_df.at[new_row, 'a_stddev'] = res['a'].std(skipna=True) + results_df.at[new_row, 'b_mean'] = res['b'].mean(skipna=True) + results_df.at[new_row, 'b_stddev'] = res['b'].std(skipna=True) + results_df.at[new_row, 's_mean'] = res['s'].mean(skipna=True) + results_df.at[new_row, 's_stddev'] = res['s'].std(skipna=True) + results_df.at[new_row, 'omega_mean'] = res['omega'].mean(skipna=True) + results_df.at[new_row, 'omega_stddev'] = res['omega'].std(skipna=True) + return results_df diff --git a/helloworld/ptobler/inversedomain.py b/helloworld/ptobler/inversedomain.py new file mode 100644 index 0000000..9d59720 --- /dev/null +++ b/helloworld/ptobler/inversedomain.py @@ -0,0 +1,156 @@ +import numpy +import scipy.spatial + +import geopandas +from shapely.geometry import Polygon +from pysal.lib.cg.alpha_shapes import alpha_shape_auto + +import numba + + +def get_forward_projection(df): + """ + Returns a dictionary of (lon, lat) -> (x, y) + + Dataframe is assumed to have x. y, lon, lat columns + + Arguments + --------- + df: a pandas.DataFrame with x, y, lon, lat columns + Returns + ------- + a dictionary with keys (lon, lat) values (x, y) + """ + return dict(zip(zip(df.lon, df.lat), zip(df.x, df.y))) + + +def get_back_projection(df): + """ + Returns a dictionary of (x,y) -> (lon, lat) + + Dataframe is assumed to have x. y, lon, lat columns + + Arguments + --------- + df: a pandas.DataFrame with x, y, lon, lat columns + Returns + ------- + a dictionary with keys (x, y) values (lon, lat) + """ + return dict(zip(zip(df.x, df.y), zip(df.lon, df.lat))) + + +def get_convex_hull(df, xvar='x', yvar='y'): + """ + Returns convex hull of specified attributes in df as a list of (x,y) tuples + + Note: use scipy.spatial.ConvexHull because shapely convex_hull appears unreliable + see https://github.com/Toblerity/Shapely/issues/541 due to an upstream issue + in GEOS + + Arguments + --------- + df: a pandas.DataFrame that has columns named in xvar, yvar + xvar: name of the column containing eastings (default 'x') + yvar: name of the column containing northings (default 'y') + Returns + ------- + a list of (x, y) tuples + """ + points = numpy.array(df[[xvar, yvar]]) + hull = scipy.spatial.ConvexHull(points) + return [tuple(points[i]) for i in hull.vertices] + + +def get_alpha_shape(df, xvar='x', yvar='y'): + """ + Returns optimized alpha shape of specified attributes in df as a list of (x,y) tuples + + Arguments + --------- + df: a pandas.DataFrame that has columns named in xvar, yvar + xvar: name of the column containing eastings (default 'x') + yvar: name of the column containing northings (default 'y') + Returns + ------- + a list of (x, y) tuples + """ + points = numpy.array([(xy[0], xy[1]) for xy in zip(df[xvar], df[yvar])]) + return [xy for xy in alpha_shape_auto(points).boundary.coords] + + +def back_project(points, Q): + """ + Returns a list of tuples of points projected by lookup in dictionary Q + + Arguments + --------- + points: a list of (x, y) tuples + Q: a dictionary with keys including the (x, y) tuples and values + their corresponding (lon, lat) tuples + Returns + ------- + a list of (lon, lat) tuples + """ + return [Q[xy] for xy in points] + + +def estimate_inverse_cuts(df, alpha_shape=False, method='CG', as_list=False): + """ + Returns an estimate of the cuts or holes in the domain + + Currently only estimation by computational geometry (convex hull or alpha shape) + is supported + + Arguments + --------- + df: pandas.DataFrame with x, y, lon, lat columns + alpha_shape: bool, if True returns an alphashape, if False a convex hull + (default True) + method: a string indicate the method to use. 'CG' is the only supported + method (computational geometry) at present (default 'CG') + as_list: bool if True returns list of 2-tuples, False returns a Polygon + (default False) + Returns + ------- + a geopandas.GeoDataFrame or list of 2-tuples + """ + if method == 'CG': + if alpha_shape: + poly = back_project(get_alpha_shape(df), get_back_projection(df)) + else: + poly = back_project(get_convex_hull(df), get_back_projection(df)) + if as_list: + return poly + else: + poly = Polygon(poly) + gs = geopandas.GeoSeries([poly], crs='+proj=longlat +R=6371007 +no_defs') + return geopandas.GeoDataFrame(geometry=gs) + + return None + + +def estimate_inverse_domain(df, alpha_shape=False, method='CG'): + """ + Returns an estimate of the domain of the projection + + Currently only estimation by computational geometry is supported + + Arguments + --------- + df: pandas.DataFrame with x, y, lon, lat columns + alpha_shape: bool if True estimates holes in the domain as an + alpha shape, if False uses convex hull (default False) + method: string indicating method. Currently only computational + geometry is supported (default 'CG') + Returns + a geopandas.GeoDataFrame (with holes) + """ + if method == 'CG': + cuts = estimate_inverse_cuts(df, alpha_shape, as_list=True) + exterior = get_convex_hull(df, xvar='lon', yvar='lat') + poly = Polygon(exterior, [cuts]) + gs = geopandas.GeoSeries([poly], crs='+proj=longlat +R=6371007 +no_defs') + return geopandas.GeoDataFrame(geometry=gs) + + return None diff --git a/helloworld/ptobler/pointdata.py b/helloworld/ptobler/pointdata.py new file mode 100644 index 0000000..6aca59e --- /dev/null +++ b/helloworld/ptobler/pointdata.py @@ -0,0 +1,367 @@ +""" +Module to manage making point datasets for ptobler PROJECTIONS work +""" + +import random +import math +import copy +import os + +import numpy as np +import pandas + + +def random_point(): + """ + Returns a lon lat point evenly distributed on a sphere + + Returns + ------- + a 2-tuple (lon, lat) in degrees + """ + lon = 2 * math.pi * random.random() - math.pi + lat = math.acos(2 * random.random() - 1) - math.pi/2 + return math.degrees(lon), math.degrees(lat) + + +def random_points(npoints=1000): + """ + Returns a list of lon lat points evenly distributed on a sphere + + Arguments + --------- + npoints: integer number of points required (default 1000) + Returns + ------- + a list of 2-tuples (lon, lat) in degrees + """ + return [random_point() for _ in range(npoints)] + + +def random_points_df(npoints=1000): + """ + Returns a pandas.DataFrame of lon lat points evenly distributed on a sphere + + Arguments + --------- + npoints: integer number of points required (default 1000) + Returns + ------- + a pandas.DataFrame with npoints (lon, lat) columns + """ + pts = random_points(npoints) + points = pandas.DataFrame({'lon': [p[0] for p in pts], 'lat': [p[1] for p in pts]}) + return points + + +def tag_with_ID_and_dir(df, direction='.'): + """ + Part of making offsets to existing points in a pandas.DataFrame. Adds dir and ID attributes to a pandas.DataFrame. + + Arguments + --------- + df: a pandas.DataFrame + direction: a string for the dir attribute (default '.') + Returns + ------- + The tagged pandas.DataFrame + """ + df['dir'] = direction + df['ID'] = list(range(df.shape[0])) + return df + + +def add_offset_points(df, offset=(0, 0), direction='.'): + """ + Returns an offset copy of the supplied pandas.DataFrame + + The copy is displaced by the specified (x, y) offset. Offset points retain + any attributes of the original, but the 'dir' attribute is set to the + supplied direction value. + + Arguments + --------- + df: a pandas.DataFrame + offset: a 2-tuple of the x, y translation (default (0, 0)) + direction: a string for the dir attribute, e.g.if offset is (0,1) + 'N' would be an appropriate value (default '.') + Returns + ------- + The tagged pandas.DataFrame + """ + df2 = copy.copy(df) + df2.lon += offset[0] + df2.lat += offset[1] + df2.dir = direction + return df2 + + +def make_and_combine_offsets(df, d=0.001): + """ + Returns a new pandas.DataFrame with duplicate 4x offset points + + The returned DataFrame contains the original points along with four + N/E/S/W offsets by a distance d. Generally the points are expected to be in + lon lat format, but any units are fine. New points will have the same ID as + their parent points, and the dir tag will be a character from NESW as appropriate. + + Arguments + --------- + df: a pandas.DataFrame, which should have ID and dir attributes + d: the distance offset to apply in each of the four directions (default 0.001) + Returns + ------- + A pandas.DataFrame containing the original points and four + offset copies + """ + # make a bunch of offset tuples left a bit, right a bit, down a bit, up a bit + offsets = [(-d, 0), (d, 0), (0, -d), (0, d)] + offset_copies = [add_offset_points(df, offset=o, direction=s) for o, s in zip(offsets, ['W', 'E', 'S', 'N'])] + df2 = copy.copy(df) + # merge in the offset copies + for o in offset_copies: + df2 = pandas.concat([df2, o], sort=True, ignore_index=True) + return df2 + + +def add_points(df=None, lons=(-180, -180, 180, 180), lats=(-90, 90, 90, -90)): + """ + Adds the requested set of points to the supplied pandas.DataFrame (which can be None) + + Added points are inserted at the beginning of the DataFrame for more convenient + inspection, but their ID numbers follow those in the supplied DataFrame. If df is + None, only the specified points will be in the returned DataFrame. + + Arguments + --------- + df: a DataFrame to add points to + lons: list of the longitudes of points to add (default [-180, -180, 180, 180]) + lats: list of the latitudes of points to add (default [-90, 90, 90, -90]) + Returns + a pandas.DataFrame with ID, dir, lon, lat + """ + pts = pandas.DataFrame({'lon': lons, 'lat': lats}) + if df is None: + df = pts + df = tag_with_ID_and_dir(df) + else: + if 'ID' in df.columns: + n1 = max(df.ID) + n2 = pts.shape[0] + pts['ID'] = [x for x in range(n1 + 1, n1 + n2 + 1)] + pts['dir'] = '.' + df = pandas.concat([pts, df], sort=True, ignore_index=True) + else: + df = pandas.concat([pts, df], sort=True, ignore_index=True) + df = tag_with_ID_and_dir(df) + return df[['ID', 'dir', 'lon', 'lat']] + + +def make_random_points(df=None, num_points=1000): + """ + Returns a point dataset as a pandas.DataFrame + + Arguments + --------- + df: a base dataset to augment, default None, when a random point + dataset will be generated + num_points: number of random points to generate if needed (default 1000) + Returns + ------- + a pandas.DataFrame, with ID, dir, lon, lat attributes + """ + if df is None: + df = random_points_df(npoints=num_points) + else: + df = pandas.concat([df, random_points_df(npoints=num_points)], ignore_index=True, sort=True) + + # tag the input data with ID and shift attributes + df = tag_with_ID_and_dir(df) + return df[['ID', 'dir', 'lon', 'lat']] + + +def make_lattice(df=None, lon_min=-180, lon_max=180, lon_num=25, lat_min=-90, lat_max=90, lat_num=13): + """ + Makes a regularly spaced lattice of lon lat coordinates as a pandas.DataFrame + + Returned DataFrame has lon lat attributes, and optionally includes points + from a provided GeoDataFrame of points (this is redundant, and it is + recommended to use x = make_lattice, followed by add_points(x ...) to do this instead + + Arguments + --------- + df: a pandas.DataFrame of lon lat Points to add lattice + points to (default None) + lon_min: minimum longitude (inclusive) (default -180) + lon_max: maximum longitude (inclusive) (default 180) + lon_num: number of meridians (default 25) + lat_min: minimum latitude (inclusive) (default -90) + lat_max: maximum longitude (inclusive) (default 90) + lat_num: number of parallels (default 13) + Returns + ------- + a pandas.DataFrame with ID dir lon lat + """ + mypoints = [] + for lon in np.linspace(lon_min, lon_max, int(lon_num)): + for lat in np.linspace(lat_min, lat_max, int(lat_num)): + mypoints.append([lon, lat]) + + gratpoints_df = pandas.DataFrame({'lon': [ll[0] for ll in mypoints], + 'lat': [ll[1] for ll in mypoints]}) + if df is not None: + gratpoints_df = pandas.concat([df, gratpoints_df], sort=True, ignore_index=True) + + return tag_with_ID_and_dir(gratpoints_df) + + +def get_file_name(basename=None, grid_reqd=False, npoints=1000, + offsets_reqd=False, bbox_reqd=False, idx=0): + """ + Returns a filesname suitable for subsequent processing + + Generally called by the make_point_datasets function + + Arguments + --------- + basename: default None + grid_reqd: default False + npoints: default 1000 + offsets_reqd: default False + bbox_reqd: defaule False + idx: default 0 + Returns + ------- + filename string -n-<|bb>-idx + examples: + lll-329-offsets-bb-00.csv + rnd-1000-no-offsets-00.csv + ... + """ + if basename is not None: + filename = basename + '-' + else: + filename = "lll-" if grid_reqd else "rnd-" + filename += str(npoints) + filename += '-offsets' if offsets_reqd else "-no-offsets" + filename += "-bb" if bbox_reqd else "" + return filename + '-' + ('0' + str(idx))[-2:] + + +def make_point_datasets(folder, basename=None, nfiles=1, num_points=1000, + grid_reqd=False, grid_spec=(-180, 180, 25, -90, 90, 13), + offsets_reqd=False, offset_d=0.001, + bbox_reqd=False, bbbounds=(-180, -90, -180, 90, 180, 90, 180, -90), + write=True): + """ + Makes point datasets according to the requested options + + Setting write False will return an example, setting it True will write the requested + files and return a list of the filenames written (including the folder) + + Arguments + --------- + folder: (Required) folder name for the output files + basename: a basename if desired, default None will produce auto-generated name(s) + nfiles: number of files to write to this specification, indexed 00, 01, 02... etc + num_points: number of points in the random version (default 1000) + grid_reqd: True will produce a regular lattice of points according to grid_spec + grid_spec: a list (lon_min, lon_max, lon_n, lat_min, lat_max, lat_n) with min and max + in each direction, and tne number of meridians/parallels required. Default of + (-180, 180, 25, -90, 90, 13) is a 15 degree spaced lattice + offsets_reqd: True will produce the original points and 4 offset copies, each with + the same ID as the original points, and a code NESW indicating the direction + of offset (default False) + offset_d: distance (in angular degrees) to offset (default 0.001) + bbox_reqd: True will add points according to bbbounds, these will be offset to, if + offset is requested (default False) + bbbounds: points to add as a bounding box as (lon1, lat1, lon2, lat2, ...) Default + (-180, -90, -180, 90, 180, 90, 180, -90). Note that any number of points can + be 'injected' in this way, for example, the outline of a particular place might + be requested. + write: if True, files are written to folder, and a list of their names is returned, + if False no file is written and an example is returned for inspection + Returns + ------- + if write True then a list of written filenames, if False an example pandas.DataFrame + """ + if not os.path.exists(folder): + os.mkdir(folder) + + nfiles = nfiles if write else 1 + filenames = [] + for i in range(nfiles): + if bbox_reqd: + lons = [bbbounds[i] for i in range(0, len(bbbounds), 2)] + lats = [bbbounds[i] for i in range(1, len(bbbounds), 2)] + gdf = add_points(None, lons, lats) + else: + gdf = None + + if grid_reqd: + gdf = make_lattice(gdf, *grid_spec) + else: + gdf = make_random_points(gdf, num_points=num_points) + + if offsets_reqd: + gdf = make_and_combine_offsets(gdf, d=offset_d) + + n = max(gdf.ID) + 1 + fname = folder + '/' + get_file_name(basename, grid_reqd, n, offsets_reqd, bbox_reqd, i) + '.csv' + if write: + gdf.to_csv(fname, index=False) + filenames.append(fname) + if write: + return filenames + else: + return gdf + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description='Make random lon-lat points data') + + parser.add_argument('-n', type=int, + default=500, dest='NUM_POINTS', + help='the number of points required (default 500)') + parser.add_argument('--nfiles', type=int, + default=1, dest='NUM_FILES', + help='the number of files to make (default 1)') + parser.add_argument('--dest', type=str, + default='temp_data/input', dest='OUTPUT_FOLDER', + help='the output folder, which must already exist (default temp_data/input') + parser.add_argument('--fname', type=str, + default='', dest='BASENAME', + help="basename for files, if not set defaults to 'rnd' or 'lll'") + parser.add_argument('-o', dest='OFFSETS_REQUIRED', action='store_true', + help='offset points required, if dist other than 0.001 is required specify with --dist') + parser.add_argument('--dist', type=float, dest='OFFSET_DIST', + default=0.001, + help='offset distance in degrees (default 0.001)') + parser.add_argument('--bb', dest='BBOX_REQUIRED', action='store_true', + help="indicates that four 'corners' of the domain are required (defaults to world)") + parser.add_argument('--bbox', type=float, nargs=8, dest='BBOX', + metavar=('lon1', 'lat1', 'lon2', 'lat2', 'lon3', 'lat3', 'lon4', 'lat4'), + default=[-180, -90, -180, 90, 180, 90, 180, -90], + help='list of 8 values specifying 4 lon lat pairs') + parser.add_argument('--ll', dest='REG_GRID_REQUIRED', action='store_true', + help='indicates that a regular grid lattice is required') + parser.add_argument('--ll-spacing', type=float, nargs=6, + metavar=('lon_min', 'lon_max', 'num_meridians', 'lat_min', 'lat_max', 'num_parallels'), + dest='LATTICE_SPEC', + default=[-180, 180, 25, -90, 90, 13], + help='list of 8 values specifying 4 lon lat pairs') + + args = parser.parse_args() + + make_point_datasets(folder=args.OUTPUT_FOLDER, + basename=args.BASENAME, + nfiles=args.NUM_FILES, + num_points=args.NUM_POINTS, + grid_reqd=args.REG_GRID_REQUIRED, + grid_spec=args.LATTICE_SPEC, + offsets_reqd=args.OFFSETS_REQUIRED, + offset_d=args.OFFSET_DIST, + bbox_reqd=args.BBOX_REQUIRED, + bbbounds=args.BBOX) diff --git a/helloworld/ptobler/project.py b/helloworld/ptobler/project.py new file mode 100644 index 0000000..04fdf0d --- /dev/null +++ b/helloworld/ptobler/project.py @@ -0,0 +1,403 @@ +import scipy +import scipy.interpolate +import scipy.spatial +import pandas +import geopandas +import shapely +import shapely.geometry +import numpy + +import ptobler.inversedomain as ptobid +import ptobler.pointdata as ptobpd + + +SPHERE = '+proj=longlat +R=6371007 +no_defs' +PROJECTIONS = { + 'briesemeister': '+proj=ob_tran +o_proj=hammer +o_lat_p=45 +o_lon_p=-10 +lon_0=0 +R=6371007 +no_defs', + 'mollweide': '+proj=moll +lon_0=0 +x_0=0 +y_0=0 +units=m +R=6371007 +no_defs', +} + + +# def empirical_projection_calculate_domain(df): +# """ +# Returns a tuple of +# +# Arguments +# --------- +# df: a DataFrame with lon, lat +# Returns +# ------- +# +# """ +# projpoints_longlat = geopandas.GeoDataFrame(df, geometry= geopandas.points_from_xy(df.lon, df.lat)) +# domain_longlat = geopandas.GeoSeries( +# shapely.geometry.MultiPoint(geopandas.points_from_xy(df.lon, df.lat)).convex_hull) +# return projpoints_longlat, domain_longlat + + +def empirically_project(proj_df, points_to_interpolate, discontinuities=None, estimate_cut=True, + method='naive', wrap=None, verbose=False): + """ + The top level projection function - delegates the work to specific functions for different methods + + Only 'linear' and 'naive' methods currently implemented + + Arguments + --------- + proj_df: a dataframe with the lon, lat, x, y columns + points_to_interpolate: a DataFrame with lon, lat atrributes of locations where + estimated x, y is required + discontinuities: a geopandas.GeoDataFrame Polygon dataset where interpolation is + unsafe. If None (the default) uses ptobler.inversedomain.estimate_inverse_cuts + estimate_cut: if True and no discontinuities provided one will be estimated, if + False no cut will be applied, default True + method: string indicating method to use. Currently supports + 'naive' (the default) for linear interpolation without discontinuities; + 'linear' for linear interpolation with discontinuities; + wrap: float amount in degrees to wrap the empirical projection at poles + and dateline (default None) + Returns + ------- + a DataFrame with lon, lat, x, y columns + """ + if method == 'linear': + print('Linear interpolation starting...') + if estimate_cut and discontinuities is None: + print('Estimating cut regions...') + discontinuities = ptobid.estimate_inverse_cuts(proj_df) + proj_df = add_wrap(proj_df, wrap=wrap) + return linear_interpolation(proj_df[['lon', 'lat']], + proj_df[['x', 'y']], + discontinuities, + points_to_interpolate, + verbose=verbose) + + elif method == 'naive': + return naive_interpolation(proj_df, points_to_interpolate) + + else: + return None + + +def naive_interpolation(proj_df, points_to_interpolate): + """ + Performs a naive linear interpolation (no allowance for interruptions) + + Arguments + --------- + proj_df: dataframe with lon, lat, x, y + points_to_interpolate: a dataframe with lon, lat attributes where an estimated + x, y is required + Returns + ------- + the points to interpolated DataFrame populated with interpolated x, y values + """ + proj_x = scipy.interpolate.griddata( + proj_df[['lon', 'lat']], # points + proj_df['x'], # values + points_to_interpolate[['lon', 'lat']], # (grid_x, grid_y) + method='linear') + proj_y = scipy.interpolate.griddata( + proj_df[['lon', 'lat']], # points + proj_df['y'], # values + points_to_interpolate[['lon', 'lat']], # (grid_x, grid_y) + method='linear') + points_to_interpolate['x'] = proj_x + points_to_interpolate['y'] = proj_y + if len(numpy.bincount(numpy.isnan(proj_x))) > 1: + print("Warning: Projections returned NaN.") + return points_to_interpolate + + +def add_wrap(df, wrap=None): + if wrap is None: + return df + # # north + # subset = df[df.lat > (90 - wrap)][:] + # subset.lat = 180 - subset.lat + # subset.lon = subset.lon % 360 - 180 + # if 'ID' in subset.columns: + # subset.ID += subset.ID + subset.shape[0] + # df = pandas.concat([df, subset], sort=True, ignore_index=False) + # + # # south + # subset = df[df.lat < (-90 + wrap)][:] + # subset.lat = -180 - subset.lat + # subset.lon = subset.lon % 360 - 180 + # if 'ID' in subset.columns: + # subset.ID += subset.ID + subset.shape[0] + # df = pandas.concat([df, subset], sort=True, ignore_index=False) + + # east + subset = df[df.lon > (180 - wrap)][:] + subset.lon = subset.lon - 360 + if 'ID' in subset.columns: + subset.ID += subset.ID + subset.shape[0] + df = pandas.concat([df, subset], sort=True, ignore_index=False) + + # west + subset = df[df.lon < (-180 + wrap)][:] + subset.lon = subset.lon + 360 + if 'ID' in subset.columns: + subset.ID += subset.ID + subset.shape[0] + + return pandas.concat([df, subset], sort=True, ignore_index=False) + + +def get_barycentric_coordinates(transform_matrix, point_coords): + """ + NO CLUE WHAT THIS DOES!!! + + Arguments + --------- + transform_matrix: ?? + point_coords: the coordinates to be transformed + Returns + ------- + numpy array of ?? + """ + b = transform_matrix[:2].dot(point_coords - transform_matrix[2]) + return numpy.append(b, 1-b.sum(axis=0)) + + +def linear_interpolation(control_pt_ll, control_pt_xy, + interp_discontinuities=None, pts_to_interpolate_ll=None, verbose=False): + """ + Performs linear interpolation in a triangulation of the supplied control points + + Arguments + --------- + control_pt_ll: pandas.DataFrame with lon, lat coords for which x, y values are known + control_pt_xy: x, y coordinates in the projected space of the lon, lats + interp_discontinuities: a Polygon geopandas.GeoDataFrame delineating discontinuity regions + in the projection. Defaults to an empty geopandas.GeoDataFrame if None is supplied + pts_to_interpolate_ll: lon, lat coordinates at which projected coordinates are required. Defaults + to a coarse lon lat grid at 15 degree spacing if None is supplied + :return: + """ + print('Performing interpolation...') + # if interp_discontinuities is None: + # interp_discontinuities = geopandas.GeoDataFrame(geometry=[], crs=SPHERE) + + if pts_to_interpolate_ll is None: + pts_to_interpolate_ll = ptobpd.make_lattice(lon_num=25, lat_num=13) + + pts_to_interp = pts_to_interpolate_ll[['lon', 'lat']].to_numpy(copy=True) + + points_array = control_pt_ll[['lon', 'lat']].to_numpy(copy=True) + tri = scipy.spatial.Delaunay(points_array) + + delaunay_triangles_gdf = geopandas.GeoDataFrame( + geometry=[shapely.geometry.Polygon(points_array[s]) for s in tri.simplices], + crs=SPHERE) + + if interp_discontinuities is not None: + simplexes_intersecting_discontinuities_gdf = geopandas.sjoin( + delaunay_triangles_gdf, + interp_discontinuities, + op='intersects') + + simplexes_intersecting_discontinuities_gdf.rename( + columns={'index_right': 'intersecting_domain_edge_segment'}, + inplace=True) + + bad_simplexes = simplexes_intersecting_discontinuities_gdf['geometry'] #.drop_duplicates() + bad_simplex_indices = bad_simplexes.index + + triangles_found = tri.find_simplex(pts_to_interp) + + results_array = [] + bad_points = [] + for p_num in range(pts_to_interp.shape[0]): + if (triangles_found[p_num].size == 1) and (triangles_found[p_num] < 0): + results_array.append([numpy.NaN, numpy.NaN]) + if verbose: + print("Triangle not found for: ", pts_to_interp[p_num]) + else: + # for one to many transformations, this needs to be modified... perhaps it is an array? + curr_triangle = triangles_found[p_num] + p = pts_to_interp[p_num] + + if interp_discontinuities is not None and curr_triangle in bad_simplex_indices: + bad_points.append(p.tolist()) + results_array.append([numpy.NaN, numpy.NaN]) + else: + bc = get_barycentric_coordinates(tri.transform[curr_triangle], p) + curr_triangle_values = control_pt_xy.iloc[tri.simplices[curr_triangle]][{'x', 'y'}] + interpolated_x = curr_triangle_values['x'].to_numpy(copy=True).dot(bc) + interpolated_y = curr_triangle_values['y'].to_numpy(copy=True).dot(bc) + results_array.append([interpolated_x, interpolated_y]) + + if len(bad_points) > 0: + if verbose: + print("Unable to interpolate points because in bad simplex: ", bad_points) + else: + print("Unable to interpolate: %d" % (len(bad_points))) + + pts_to_interpolate_ll['x'] = [xy[0] for xy in results_array] + pts_to_interpolate_ll['y'] = [xy[1] for xy in results_array] + return pts_to_interpolate_ll + + +def get_xy_from_pts(pts): + """ + Utility function, returns x, y values as two lists from a geopandas.GeoSeries of Points + + Arguments + --------- + pts: a Point Geoseries + Returns + ------- + a 2-tuple of lists of the x and y coordinates + """ + return [p.coords[0][0] for p in pts], [p.coords[0][1] for p in pts] + + +def make_points(pts): + """ + Returns a geopandas.GeoSeries of Points from a list of 2-tuples of x, y coordinates + + Arguments + --------- + pts: a list of 2-tuples of (x, y) coordinates + Returns + ------- + a Point GeoSeries + """ + return geopandas.GeoSeries(shapely.geometry.Point(p[0], p[1]) for p in pts) + + +def get_proj4string(shortform=None): + """ + + :param shortform: + :return: + """ + if shortform is None: + return PROJECTIONS.keys() + else: + return PROJECTIONS[shortform] + + +def proj4_project(f, folder, proj, write=False): + """ + Arguments + --------- + proj: a tuple (shortform name, proj4 string) + """ + basename = f.rsplit('/', 1)[-1].split('.')[0] + pts = pandas.read_csv(f) + pts = geopandas.GeoDataFrame(pts, geometry=make_points(zip(pts.lon, pts.lat))) + pts.crs = SPHERE + pts = pts.to_crs(proj[1]) + pts['x'], pts['y'] = get_xy_from_pts(pts.geometry.envelope) + ptsdf = pts.drop(['geometry'], axis=1) + if write: + fname = folder + '/' + basename + '-p4-' + proj[0] + '.csv' + ptsdf.to_csv(fname, index=False) + return fname + else: + return ptsdf + +if __name__ == '__main__': + + ## Code attempting to test the intersection of the delaunay triangulation + ## with a cuts geodataframe causing apparent problems + emp_proj_df = geopandas.read_file( + '../temp_data/distancebearing/empirical_projections/dgg-7292-no-offsets-handmade-distancebearing.csv' + ) + to_interp = geopandas.read_file( + '../temp_data/unprojected_data/world_better.csv' + ) + + clon = -119.70 + clat = 34.416667 + cut_polys = geopandas.GeoSeries([shapely.geometry.Point(clon, clat), + shapely.geometry.Point(clon + 360, -clat)]).buffer(1).unary_union + + cuts = geopandas.GeoDataFrame(geometry=geopandas.GeoSeries([cut_polys])) + cuts.crs = SPHERE + + linear_interpolation(emp_proj_df[['lon', 'lat']], + emp_proj_df[['x', 'y']], + interp_discontinuities=cuts, + pts_to_interpolate_ll=to_interp[['lon', 'lat']]) + + + + +def make_1x3_gdf(gdf): + """ + Makes a 1 x 3 duplicate of the input (assumed whole world, lon, lat) input data + + The coordinates extended beyond -180/+180 lon and -90/+90 lat are transformed + appropriately such that they are 'mirrored' in the dateline and/or poles. While + lon, lat values are changed the x, y coordinates accompany the transformed lon, + lat values unchanged. + + Arguments + --------- + gdf: a geopandas.GeoDataFrame to extend, with lon, lat coordinates + Returns + ------- + an extended geopandas.GeoDataFrame + """ + gdf_1x3 = pandas.concat([ + geopandas.GeoDataFrame(gdf.copy(), geometry=geopandas.GeoSeries.translate(gdf.copy(), xoff=-360.0)), + gdf.copy(), + geopandas.GeoDataFrame(gdf.copy(),geometry=geopandas.GeoSeries.translate(gdf.copy(), xoff=360.0)) + ], ignore_index=True) + + # gdf_3x3 = pandas.concat( + # [ + # geopandas.GeoDataFrame( + # gdf_row.copy(), + # geometry=geopandas.GeoSeries.translate( + # geopandas.GeoSeries.scale( + # gdf_row.copy(), + # xfact=1,yfact=-1,origin=(0,0)), + # xoff=180.0, + # yoff=-180.0) + # ), + # gdf_row.copy(), + # geopandas.GeoDataFrame( + # gdf_row.copy(), + # geometry=geopandas.GeoSeries.translate( + # geopandas.GeoSeries.scale( + # gdf_row.copy(), + # xfact=1,yfact=-1,origin=(0,0)), + # xoff=180.0, + # yoff=180.0) + # ) + # ], + # ignore_index=True + # ) + gdf_1x3.crs = gdf.crs + # gdf_1x3['lon'] = gdf_1x3.geometry.apply(lambda x: x.coords[0][0]) + # gdf_1x3['lat'] = gdf_1x3.geometry.apply(lambda x: x.coords[0][1]) + return gdf_1x3 + + +def make_1x3_clipped_gdf(gdf, lon_min=-200, lon_max=200): + """ + Clips the supplied geopandas.GeoDataFrame to the supplied extents + + Arguments + --------- + gdf: the geopandas.GeoDataFrame to clip + lon_min: minimum longitude (default -200) + lon_max: maximum longitude (default 200) + Returns + ------- + a clipped geopandas.GeoDataFrame + """ + bbox = shapely.geometry.Polygon([(lon_min, -90), (lon_min, 90), (lon_max, 90), (lon_max, -90)]) + bbox = geopandas.GeoDataFrame(geometry=geopandas.GeoSeries([bbox])) + bbox.crs = gdf.crs + return geopandas.overlay(make_1x3_gdf(gdf), bbox, how='intersection') + # return gdf[ + # (gdf['lon'] > lon_min) & + # (gdf['lon'] < lon_max) + # ] + +