diff --git a/gemgis/utils.py b/gemgis/utils.py index 43884219..4bc5f0d7 100644 --- a/gemgis/utils.py +++ b/gemgis/utils.py @@ -37,9 +37,11 @@ __all__ = [series, crs] -def to_section_dict(gdf: gpd.geodataframe.GeoDataFrame, - section_column: str = 'section_name', - resolution: List[int] = None) -> dict: +def to_section_dict( + gdf: gpd.geodataframe.GeoDataFrame, + section_column: str = "section_name", + resolution: List[int] = None, +) -> dict: """Converting custom sections stored in Shape files to GemPy section_dicts Parameters @@ -85,49 +87,73 @@ def to_section_dict(gdf: gpd.geodataframe.GeoDataFrame, # Checking if gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): - raise TypeError('gdf must be of type GeoDataFrame') + raise TypeError("gdf must be of type GeoDataFrame") # Checking if the section_column is of type string if not isinstance(section_column, str): - raise TypeError('Name for section_column must be of type string') + raise TypeError("Name for section_column must be of type string") # Checking if resolution is of type list if not isinstance(resolution, (list, type(None))): - raise TypeError('Resolution must be of type list') + raise TypeError("Resolution must be of type list") # Setting resolution if resolution is None: resolution = [100, 80] # Checking if X and Y values are in column - if not {'X', 'Y'}.issubset(gdf.columns): + if not {"X", "Y"}.issubset(gdf.columns): gdf = vector.extract_xy(gdf) # Checking the length of the resolution list if len(resolution) != 2: - raise ValueError('resolution list must be of length two') + raise ValueError("resolution list must be of length two") # Extracting Section names section_names = gdf[section_column].unique() # Create section dicts for Point Shape Files if all(gdf.geom_type == "Point"): - section_dict = {i: ([gdf[gdf[section_column] == i].X.iloc[0], gdf[gdf[section_column] == i].Y.iloc[0]], - [gdf[gdf[section_column] == i].X.iloc[1], gdf[gdf[section_column] == i].Y.iloc[1]], - resolution) for i in section_names} + section_dict = { + i: ( + [ + gdf[gdf[section_column] == i].X.iloc[0], + gdf[gdf[section_column] == i].Y.iloc[0], + ], + [ + gdf[gdf[section_column] == i].X.iloc[1], + gdf[gdf[section_column] == i].Y.iloc[1], + ], + resolution, + ) + for i in section_names + } # Create section dicts for Line Shape Files else: - section_dict = {i: ([gdf[gdf[section_column] == i].X.iloc[0], gdf[gdf[section_column] == i].Y.iloc[0]], - [gdf[gdf[section_column] == i].X.iloc[1], gdf[gdf[section_column] == i].Y.iloc[1]], - resolution) for i in section_names} + section_dict = { + i: ( + [ + gdf[gdf[section_column] == i].X.iloc[0], + gdf[gdf[section_column] == i].Y.iloc[0], + ], + [ + gdf[gdf[section_column] == i].X.iloc[1], + gdf[gdf[section_column] == i].Y.iloc[1], + ], + resolution, + ) + for i in section_names + } return section_dict -def convert_to_gempy_df(gdf: gpd.geodataframe.GeoDataFrame, - dem: Union[rasterio.io.DatasetReader, np.ndarray] = None, - extent: List[Union[float, int]] = None) -> pd.DataFrame: +def convert_to_gempy_df( + gdf: gpd.geodataframe.GeoDataFrame, + dem: Union[rasterio.io.DatasetReader, np.ndarray] = None, + extent: List[Union[float, int]] = None, +) -> pd.DataFrame: """Converting a GeoDataFrame into a Pandas DataFrame ready to be read in for GemPy Parameters @@ -190,80 +216,81 @@ def convert_to_gempy_df(gdf: gpd.geodataframe.GeoDataFrame, # Checking if gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): - raise TypeError('gdf must be of type GeoDataFrame') + raise TypeError("gdf must be of type GeoDataFrame") # Checking that the dem is a np.ndarray if not isinstance(dem, (np.ndarray, rasterio.io.DatasetReader, type(None))): - raise TypeError('DEM must be a numpy.ndarray or rasterio object') + raise TypeError("DEM must be a numpy.ndarray or rasterio object") # Checking if extent is a list if not isinstance(extent, (list, type(None))): - raise TypeError('Extent must be of type list') + raise TypeError("Extent must be of type list") # Extracting Coordinates from gdf - if not {'X', 'Y', 'Z'}.issubset(gdf.columns): + if not {"X", "Y", "Z"}.issubset(gdf.columns): # Extracting coordinates from array if isinstance(dem, np.ndarray): if not isinstance(extent, type(None)): - gdf = vector.extract_xyz(gdf=gdf, - dem=dem, - extent=extent) + gdf = vector.extract_xyz(gdf=gdf, dem=dem, extent=extent) else: - raise FileNotFoundError('Extent not provided') + raise FileNotFoundError("Extent not provided") # Extracting coordinates from raster elif isinstance(dem, rasterio.io.DatasetReader): - gdf = vector.extract_xyz(gdf=gdf, - dem=dem) + gdf = vector.extract_xyz(gdf=gdf, dem=dem) else: - raise FileNotFoundError('DEM not provided') + raise FileNotFoundError("DEM not provided") # Checking if the formation column is in the gdf and setting type - if 'formation' not in gdf: - raise ValueError('Formation names not defined') + if "formation" not in gdf: + raise ValueError("Formation names not defined") else: - gdf['formation'] = gdf['formation'].astype(str) + gdf["formation"] = gdf["formation"].astype(str) # Checking if the dip column is in the gdf and setting type - if 'dip' in gdf: - gdf['dip'] = gdf['dip'].astype(float) + if "dip" in gdf: + gdf["dip"] = gdf["dip"].astype(float) # Checking if the azimuth column is in the gdf and setting type - if 'azimuth' in gdf: - gdf['azimuth'] = gdf['azimuth'].astype(float) + if "azimuth" in gdf: + gdf["azimuth"] = gdf["azimuth"].astype(float) # Checking if DataFrame is an orientation or interfaces df - if 'dip' in gdf: + if "dip" in gdf: - if (gdf['dip'] > 90).any(): - raise ValueError('dip values exceed 90 degrees') - if 'azimuth' not in gdf: - raise ValueError('azimuth values not defined') - if (gdf['azimuth'] > 360).any(): - raise ValueError('azimuth values exceed 360 degrees') + if (gdf["dip"] > 90).any(): + raise ValueError("dip values exceed 90 degrees") + if "azimuth" not in gdf: + raise ValueError("azimuth values not defined") + if (gdf["azimuth"] > 360).any(): + raise ValueError("azimuth values exceed 360 degrees") # Create orientations dataframe - if 'polarity' not in gdf: - df = pd.DataFrame(gdf[['X', 'Y', 'Z', 'formation', 'dip', 'azimuth']]) - df['polarity'] = 1 + if "polarity" not in gdf: + df = pd.DataFrame(gdf[["X", "Y", "Z", "formation", "dip", "azimuth"]]) + df["polarity"] = 1 return df else: - df = pd.DataFrame(gdf[['X', 'Y', 'Z', 'formation', 'dip', 'azimuth', 'polarity']]) + df = pd.DataFrame( + gdf[["X", "Y", "Z", "formation", "dip", "azimuth", "polarity"]] + ) return df else: # Create interfaces dataframe - df = pd.DataFrame(gdf[['X', 'Y', 'Z', 'formation']]) + df = pd.DataFrame(gdf[["X", "Y", "Z", "formation"]]) return df -def set_extent(minx: Union[int, float] = 0, - maxx: Union[int, float] = 0, - miny: Union[int, float] = 0, - maxy: Union[int, float] = 0, - minz: Union[int, float] = 0, - maxz: Union[int, float] = 0, - gdf: gpd.geodataframe.GeoDataFrame = None) -> List[Union[int, float]]: +def set_extent( + minx: Union[int, float] = 0, + maxx: Union[int, float] = 0, + miny: Union[int, float] = 0, + maxy: Union[int, float] = 0, + minz: Union[int, float] = 0, + maxz: Union[int, float] = 0, + gdf: gpd.geodataframe.GeoDataFrame = None, +) -> List[Union[int, float]]: """Setting the extent for a model Parameters @@ -311,11 +338,13 @@ def set_extent(minx: Union[int, float] = 0, # Checking that the GeoDataFrame is a gdf or of type None if not isinstance(gdf, (type(None), gpd.geodataframe.GeoDataFrame)): - raise TypeError('gdf must be of type GeoDataFrame') + raise TypeError("gdf must be of type GeoDataFrame") # Checking if bounds are of type int or float - if not all(isinstance(i, (int, float)) for i in [minx, maxx, miny, maxy, minz, maxz]): - raise TypeError('bounds must be of type int or float') + if not all( + isinstance(i, (int, float)) for i in [minx, maxx, miny, maxy, minz, maxz] + ): + raise TypeError("bounds must be of type int or float") # Checking if the gdf is of type None if isinstance(gdf, type(None)): @@ -334,15 +363,17 @@ def set_extent(minx: Union[int, float] = 0, # Create extent from gdf of geom_type point or linestring else: bounds = gdf.bounds - extent = [round(bounds.minx.min(), 2), round(bounds.maxx.max(), 2), round(bounds.miny.min(), 2), - round(bounds.maxy.max(), 2)] + extent = [ + round(bounds.minx.min(), 2), + round(bounds.maxx.max(), 2), + round(bounds.miny.min(), 2), + round(bounds.maxy.max(), 2), + ] return extent -def set_resolution(x: int, - y: int, - z: int) -> List[int]: +def set_resolution(x: int, y: int, z: int) -> List[int]: """Setting the resolution for a model Parameters @@ -378,15 +409,15 @@ def set_resolution(x: int, # Checking if x is of type int if not isinstance(x, int): - raise TypeError('X must be of type int') + raise TypeError("X must be of type int") # Checking if y is of type int if not isinstance(y, int): - raise TypeError('Y must be of type int') + raise TypeError("Y must be of type int") # Checking if y is of type int if not isinstance(z, int): - raise TypeError('Z must be of type int') + raise TypeError("Z must be of type int") # Create list of resolution values resolution = [x, y, z] @@ -394,12 +425,14 @@ def set_resolution(x: int, return resolution -def read_csv_as_gdf(path: str, - crs: Union[str, pyproj.crs.crs.CRS], - x: str = 'X', - y: str = 'Y', - z: str = None, - delimiter: str = ',') -> gpd.geodataframe.GeoDataFrame: +def read_csv_as_gdf( + path: str, + crs: Union[str, pyproj.crs.crs.CRS], + x: str = "X", + y: str = "Y", + z: str = None, + delimiter: str = ",", +) -> gpd.geodataframe.GeoDataFrame: """Reading CSV files as GeoDataFrame Parameters @@ -449,7 +482,7 @@ def read_csv_as_gdf(path: str, # Checking that the path is of type string if not isinstance(path, str): - raise TypeError('Path must be provided as string') + raise TypeError("Path must be provided as string") # Getting the absolute path path = os.path.abspath(path=path) @@ -460,45 +493,43 @@ def read_csv_as_gdf(path: str, # Checking that the file exists if not os.path.exists(path): - raise FileNotFoundError('File not found') + raise FileNotFoundError("File not found") # Checking that the x column is of type string if not isinstance(x, str): - raise TypeError('X column name must be provided as string') + raise TypeError("X column name must be provided as string") # Checking that the y column is of type string if not isinstance(y, str): - raise TypeError('Y column name must be provided as string') + raise TypeError("Y column name must be provided as string") # Checking that the z column is of type string if not isinstance(z, (str, type(None))): - raise TypeError('Z column name must be provided as string') + raise TypeError("Z column name must be provided as string") # Checking that the crs is provided as string if not isinstance(crs, (str, pyproj.crs.crs.CRS)): - raise TypeError('CRS must be provided as string or pyproj CRS object') + raise TypeError("CRS must be provided as string or pyproj CRS object") # Checking that the delimiter is of type string if not isinstance(delimiter, str): - raise TypeError('Delimiter must be of type string') + raise TypeError("Delimiter must be of type string") # Loading the csv file - df = pd.read_csv(filepath_or_buffer=path, - sep=delimiter) + df = pd.read_csv(filepath_or_buffer=path, sep=delimiter) # Checking that the file loaded is a DataFrame if not isinstance(df, pd.DataFrame): - raise TypeError('df must be of type DataFrame') + raise TypeError("df must be of type DataFrame") # Create GeoDataFrame if (x in df) and (y in df): - gdf = gpd.GeoDataFrame(data=df, - geometry=gpd.points_from_xy(x=df[x], - y=df[y], - crs=crs)) + gdf = gpd.GeoDataFrame( + data=df, geometry=gpd.points_from_xy(x=df[x], y=df[y], crs=crs) + ) else: - raise ValueError('X and/or Y columns could not be found') + raise ValueError("X and/or Y columns could not be found") return gdf @@ -541,9 +572,9 @@ def show_number_of_data_points(geo_model): """ # Trying to import gempy but returning error if gempy is not installed - #try: + # try: # import gempy as gp - #except ModuleNotFoundError: + # except ModuleNotFoundError: # raise ModuleNotFoundError( # 'GemPy package is not installed. Use pip install gempy to install the latest version') @@ -553,26 +584,32 @@ def show_number_of_data_points(geo_model): # Store values of number of interfaces and orientations in list for i in geo_model.surfaces.df.surface.unique(): - length = len(geo_model.surface_points.df[geo_model.surface_points.df['surface'] == i]) + length = len( + geo_model.surface_points.df[geo_model.surface_points.df["surface"] == i] + ) no_int.append(length) - length = len(geo_model.orientations.df[geo_model.orientations.df['surface'] == i]) + length = len( + geo_model.orientations.df[geo_model.orientations.df["surface"] == i] + ) no_ori.append(length) # Copying GeoDataFrame gdf = geo_model.surfaces.df.copy(deep=True) # Add columns to geo_model surface table - gdf['No. of Interfaces'] = no_int - gdf['No. of Orientations'] = no_ori + gdf["No. of Interfaces"] = no_int + gdf["No. of Orientations"] = no_ori return gdf -def getfeatures(extent: Union[List[Union[int, float]], type(None)], - crs_raster: Union[str, dict], - crs_bbox: Union[str, dict], - bbox: shapely.geometry.polygon.Polygon = None) -> list: +def getfeatures( + extent: Union[List[Union[int, float]], type(None)], + crs_raster: Union[str, dict], + crs_bbox: Union[str, dict], + bbox: shapely.geometry.polygon.Polygon = None, +) -> list: """Creating a list containing a dict with keys and values to clip a raster Parameters @@ -603,23 +640,23 @@ def getfeatures(extent: Union[List[Union[int, float]], type(None)], # Checking if extent is of type list if not isinstance(extent, (list, type(None))): - raise TypeError('Extent must be of type list') + raise TypeError("Extent must be of type list") # Checking if bounds are of type int or float if not all(isinstance(n, (int, float)) for n in extent): - raise TypeError('Bounds must be of type int or float') + raise TypeError("Bounds must be of type int or float") # Checking if the raster crs is of type string or dict if not isinstance(crs_raster, (str, dict, rasterio.crs.CRS)): - raise TypeError('Raster CRS must be of type dict or string') + raise TypeError("Raster CRS must be of type dict or string") # Checking if the bbox crs is of type string or dict if not isinstance(crs_bbox, (str, dict, rasterio.crs.CRS)): - raise TypeError('Bbox CRS must be of type dict or string') + raise TypeError("Bbox CRS must be of type dict or string") # Checking if the bbox is of type none or a shapely polygon if not isinstance(bbox, (shapely.geometry.polygon.Polygon, type(None))): - raise TypeError('Bbox must be a shapely polygon') + raise TypeError("Bbox must be a shapely polygon") # Create bbox if bbox is not provided if isinstance(bbox, type(None)): @@ -628,7 +665,7 @@ def getfeatures(extent: Union[List[Union[int, float]], type(None)], # Checking if the bbox is a shapely box if not isinstance(bbox, shapely.geometry.polygon.Polygon): - raise TypeError('Bbox is not of type shapely box') + raise TypeError("Bbox is not of type shapely box") # Converting to dict if isinstance(crs_raster, rasterio.crs.CRS): @@ -639,17 +676,17 @@ def getfeatures(extent: Union[List[Union[int, float]], type(None)], # Converting raster crs to dict if isinstance(crs_raster, str): - crs_raster = {'init': crs_raster} + crs_raster = {"init": crs_raster} # Converting bbox raster to dict if isinstance(crs_bbox, str): - crs_bbox = {'init': crs_bbox} + crs_bbox = {"init": crs_bbox} # Creating GeoDataFrame - gdf = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=crs_bbox) + gdf = gpd.GeoDataFrame({"geometry": bbox}, index=[0], crs=crs_bbox) gdf = gdf.to_crs(crs=crs_raster) - data = [json.loads(gdf.to_json())['features'][0]['geometry']] + data = [json.loads(gdf.to_json())["features"][0]["geometry"]] return data @@ -657,6 +694,7 @@ def getfeatures(extent: Union[List[Union[int, float]], type(None)], # Parsing QGIS Style Files ######################## + def parse_categorized_qml(qml_name: str) -> tuple: """Parsing a QGIS style file to retrieve surface color values @@ -714,11 +752,12 @@ def parse_categorized_qml(qml_name: str) -> tuple: import xmltodict except ModuleNotFoundError: raise ModuleNotFoundError( - 'xmltodict package is not installed. Use pip install xmltodict to install the latest version') + "xmltodict package is not installed. Use pip install xmltodict to install the latest version" + ) # Checking if the path was provided as string if not isinstance(qml_name, str): - raise TypeError('Path must be of type string') + raise TypeError("Path must be of type string") # Getting the absolute path path = os.path.abspath(path=qml_name) @@ -729,7 +768,7 @@ def parse_categorized_qml(qml_name: str) -> tuple: # Checking that the file exists if not os.path.exists(path): - raise FileNotFoundError('File not found') + raise FileNotFoundError("File not found") # Opening the file with open(qml_name, "rb") as f: @@ -740,15 +779,13 @@ def parse_categorized_qml(qml_name: str) -> tuple: # Extracting symbols symbols = { - symbol["@name"]: { - prop["@k"]: prop["@v"] for prop in symbol["layer"]["prop"] - } + symbol["@name"]: {prop["@k"]: prop["@v"] for prop in symbol["layer"]["prop"]} for symbol in qml["qgis"]["renderer-v2"]["symbols"]["symbol"] } # Extracting styles classes = { - category['@value']: symbols[category['@symbol']] + category["@value"]: symbols[category["@symbol"]] for category in qml["qgis"]["renderer-v2"]["categories"]["category"] } @@ -816,7 +853,7 @@ def build_style_dict(classes: dict) -> dict: # Checking if classes is of type dict if not isinstance(classes, dict): - raise TypeError('Classes must be of type dict') + raise TypeError("Classes must be of type dict") # Create empty styles dict styles_dict = {} @@ -832,14 +869,13 @@ def build_style_dict(classes: dict) -> dict: "opacity": opacity / 255, "weight": float(style["outline_width"]), "fillColor": f"#{fillColor[0]:02x}{fillColor[1]:02x}{fillColor[2]:02x}", - "fillOpacity": fill_opacity / 255 + "fillOpacity": fill_opacity / 255, } return styles_dict -def load_surface_colors(path: str, - gdf: gpd.geodataframe.GeoDataFrame) -> List[str]: +def load_surface_colors(path: str, gdf: gpd.geodataframe.GeoDataFrame) -> List[str]: """Loading surface colors from a QML file and storing the color values as list to be displayed with GeoPandas plots Parameters @@ -883,7 +919,7 @@ def load_surface_colors(path: str, # Checking that the path is of type str if not isinstance(path, str): - raise TypeError('path must be provided as string') + raise TypeError("path must be provided as string") # Getting the absolute path path = os.path.abspath(path=path) @@ -894,11 +930,11 @@ def load_surface_colors(path: str, # Checking that the file exists if not os.path.exists(path): - raise FileNotFoundError('File not found') + raise FileNotFoundError("File not found") # Checking that the gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): - raise TypeError('object must be of type GeoDataFrame') + raise TypeError("object must be of type GeoDataFrame") # Parse qml column, classes = parse_categorized_qml(qml_name=path) @@ -919,7 +955,7 @@ def load_surface_colors(path: str, gdf_copy = gdf_copy.groupby([column], as_index=False).last() # Create list of remaining colors - cols = gdf_copy['Color'].to_list() + cols = gdf_copy["Color"].to_list() return cols @@ -961,7 +997,7 @@ def create_surface_color_dict(path: str) -> dict: # Checking that the path is of type str if not isinstance(path, str): - raise TypeError('path must be provided as string') + raise TypeError("path must be provided as string") # Getting the absolute path path = os.path.abspath(path=path) @@ -972,7 +1008,7 @@ def create_surface_color_dict(path: str) -> dict: # Checking that the file exists if not os.path.exists(path): - raise FileNotFoundError('File not found') + raise FileNotFoundError("File not found") # Parse qml columns, classes = parse_categorized_qml(qml_name=path) @@ -1030,11 +1066,12 @@ def get_location_coordinate(name: str): import geopy except ModuleNotFoundError: raise ModuleNotFoundError( - 'GeoPy package is not installed. Use pip install geopy to install the latest version') + "GeoPy package is not installed. Use pip install geopy to install the latest version" + ) # Checking that the location name is of type string if not isinstance(name, str): - raise TypeError('Location name must be of type string') + raise TypeError("Location name must be of type string") # Create geocoder for OpenStreetMap data geolocator = geopy.geocoders.Nominatim(user_agent=name) @@ -1045,8 +1082,9 @@ def get_location_coordinate(name: str): return coordinates -def transform_location_coordinate(coordinates, - crs: Union[str, pyproj.crs.crs.CRS]) -> dict: +def transform_location_coordinate( + coordinates, crs: Union[str, pyproj.crs.crs.CRS] +) -> dict: """Transforming coordinates of GeoPy Location Parameters @@ -1097,18 +1135,19 @@ def transform_location_coordinate(coordinates, import geopy except ModuleNotFoundError: raise ModuleNotFoundError( - 'GeoPy package is not installed. Use pip install geopy to install the latest version') + "GeoPy package is not installed. Use pip install geopy to install the latest version" + ) # Checking that coordinates object is a GeoPy location object if not isinstance(coordinates, geopy.location.Location): - raise TypeError('The location must be provided as GeoPy Location object') + raise TypeError("The location must be provided as GeoPy Location object") # Checking that the target crs is provided as string if not isinstance(crs, str): - raise TypeError('Target CRS must be of type string') + raise TypeError("Target CRS must be of type string") # Setting source and target projection - transformer = pyproj.Transformer.from_crs('EPSG:4326', crs) + transformer = pyproj.Transformer.from_crs("EPSG:4326", crs) # Transforming coordinate systems long, lat = transformer.transform(coordinates.latitude, coordinates.longitude) @@ -1164,21 +1203,27 @@ def create_polygon_from_location(coordinates) -> shapely.geometry.polygon.Polygo import geopy except ModuleNotFoundError: raise ModuleNotFoundError( - 'GeoPy package is not installed. Use pip install geopy to install the latest version') + "GeoPy package is not installed. Use pip install geopy to install the latest version" + ) # Checking that coordinates object is a GeoPy location object if not isinstance(coordinates, geopy.location.Location): - raise TypeError('The location must be provided as GeoPy Location object') + raise TypeError("The location must be provided as GeoPy Location object") # Create polygon from boundingbox - polygon = box(float(coordinates.raw['boundingbox'][0]), float(coordinates.raw['boundingbox'][2]), - float(coordinates.raw['boundingbox'][1]), float(coordinates.raw['boundingbox'][3])) + polygon = box( + float(coordinates.raw["boundingbox"][0]), + float(coordinates.raw["boundingbox"][2]), + float(coordinates.raw["boundingbox"][1]), + float(coordinates.raw["boundingbox"][3]), + ) return polygon -def get_locations(names: Union[list, str], - crs: Union[str, pyproj.crs.crs.CRS] = 'EPSG:4326') -> dict: +def get_locations( + names: Union[list, str], crs: Union[str, pyproj.crs.crs.CRS] = "EPSG:4326" +) -> dict: """Obtaining coordinates for one city or a list of given cities. A CRS other than 'EPSG:4326' can be passed to transform the coordinates @@ -1224,35 +1269,43 @@ def get_locations(names: Union[list, str], # Checking that the location names are provided as list of strings or as string for one location if not isinstance(names, (list, str)): - raise TypeError('Names must be provided as list of strings') + raise TypeError("Names must be provided as list of strings") # Checking that the target CRS is provided as string if not isinstance(crs, str): - raise TypeError('Target CRS must be of type string') + raise TypeError("Target CRS must be of type string") if isinstance(names, list): # Create list of GeoPy locations coordinates_list = [get_location_coordinate(name=i) for i in names] # Transform CRS and create result_dict - if crs != 'EPSG:4326': - dict_list = [transform_location_coordinate(coordinates=i, - crs=crs) for i in coordinates_list] + if crs != "EPSG:4326": + dict_list = [ + transform_location_coordinate(coordinates=i, crs=crs) + for i in coordinates_list + ] result_dict = {k: v for d in dict_list for k, v in d.items()} else: - result_dict = {coordinates_list[i].address: (coordinates_list[i].longitude, - coordinates_list[i].latitude) - for i in range(len(coordinates_list))} + result_dict = { + coordinates_list[i].address: ( + coordinates_list[i].longitude, + coordinates_list[i].latitude, + ) + for i in range(len(coordinates_list)) + } else: # Create GeoPy Object coordinates = get_location_coordinate(name=names) - if crs != 'EPSG:4326': - result_dict = transform_location_coordinate(coordinates=coordinates, - crs=crs) + if crs != "EPSG:4326": + result_dict = transform_location_coordinate( + coordinates=coordinates, crs=crs + ) else: - result_dict = {coordinates.address: (coordinates.longitude, - coordinates.latitude)} + result_dict = { + coordinates.address: (coordinates.longitude, coordinates.latitude) + } return result_dict @@ -1299,21 +1352,21 @@ def convert_location_dict_to_gdf(location_dict: dict) -> gpd.geodataframe.GeoDat # Checking that the input data is of type dict if not isinstance(location_dict, dict): - raise TypeError('Input data must be provided as dict') + raise TypeError("Input data must be provided as dict") # Creating GeoDataFrame gdf = gpd.GeoDataFrame(data=location_dict).T.reset_index() # Assigning column names - gdf.columns = ['City', 'X', 'Y'] + gdf.columns = ["City", "X", "Y"] # Split city names to only show the name of the city - gdf['City'] = [i.split(',')[0] for i in gdf['City'].to_list()] + gdf["City"] = [i.split(",")[0] for i in gdf["City"].to_list()] # Recreate GeoDataFrame and set coordinates as geometry objects - gdf = gpd.GeoDataFrame(data=gdf, - geometry=gpd.points_from_xy(x=gdf['X'], - y=gdf['Y'])) + gdf = gpd.GeoDataFrame( + data=gdf, geometry=gpd.points_from_xy(x=gdf["X"], y=gdf["Y"]) + ) return gdf @@ -1322,8 +1375,7 @@ def convert_location_dict_to_gdf(location_dict: dict) -> gpd.geodataframe.GeoDat ############################ -def assign_properties(lith_block: np.ndarray, - property_dict: dict) -> np.ndarray: +def assign_properties(lith_block: np.ndarray, property_dict: dict) -> np.ndarray: """Replacing lith block IDs with physical properties Parameters @@ -1366,11 +1418,11 @@ def assign_properties(lith_block: np.ndarray, # Checking that the lith block is a NumPy array if not isinstance(lith_block, np.ndarray): - raise TypeError('Lith block must be a NumPy Array') + raise TypeError("Lith block must be a NumPy Array") # Checking that the properties dict is a dict if not isinstance(property_dict, dict): - raise TypeError('Properties must be provided as dict') + raise TypeError("Properties must be provided as dict") # Store shape shape = lith_block.shape @@ -1449,26 +1501,27 @@ def get_nearest_neighbor(x: np.ndarray, y: np.ndarray) -> np.int64: from sklearn.neighbors import NearestNeighbors except ModuleNotFoundError: raise ModuleNotFoundError( - 'Scikit Learn package is not installed. Use pip install scikit-learn to install the latest version') + "Scikit Learn package is not installed. Use pip install scikit-learn to install the latest version" + ) # Checking that the point data set x is of type np.ndarray if not isinstance(x, np.ndarray): - raise TypeError('Point data set must be of type np.ndarray') + raise TypeError("Point data set must be of type np.ndarray") # Checking that the shape of the array is correct if x.shape[1] != 2: - raise ValueError('Only coordinate pairs are allowed') + raise ValueError("Only coordinate pairs are allowed") # Checking that point y is of type np.ndarray if not isinstance(y, np.ndarray): - raise TypeError('Point data set must be of type np.ndarray') + raise TypeError("Point data set must be of type np.ndarray") # Checking that the shape of the array is correct if y.shape != (2,): - raise ValueError('y point must be of shape (2,)') + raise ValueError("y point must be of shape (2,)") # Finding the nearest neighbor with ball_tree algorithm - nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(y.reshape(1, -1)) + nbrs = NearestNeighbors(n_neighbors=1, algorithm="ball_tree").fit(y.reshape(1, -1)) # Calculating the distances and indices for to find the nearest neighbor distances, indices = nbrs.kneighbors(x) @@ -1479,9 +1532,11 @@ def get_nearest_neighbor(x: np.ndarray, y: np.ndarray) -> np.int64: return index -def calculate_number_of_isopoints(gdf: Union[gpd.geodataframe.GeoDataFrame, pd.DataFrame], - increment: Union[float, int], - zcol: str = 'Z') -> int: +def calculate_number_of_isopoints( + gdf: Union[gpd.geodataframe.GeoDataFrame, pd.DataFrame], + increment: Union[float, int], + zcol: str = "Z", +) -> int: """Creating the number of isopoints to further interpolate strike lines Parameters @@ -1531,19 +1586,21 @@ def calculate_number_of_isopoints(gdf: Union[gpd.geodataframe.GeoDataFrame, pd.D # Checking if gdf is of type GeoDataFrame if not isinstance(gdf, (gpd.geodataframe.GeoDataFrame, pd.DataFrame)): - raise TypeError('gdf must be of type GeoDataFrame') + raise TypeError("gdf must be of type GeoDataFrame") # Checking if the increment is of type float or int if not isinstance(increment, (float, int)): - raise TypeError('The increment must be provided as float or int') + raise TypeError("The increment must be provided as float or int") # Checking that the name of the Z column is provided as string if not isinstance(zcol, str): - raise TypeError('Z column name must be provided as string') + raise TypeError("Z column name must be provided as string") # Checking that the Z column is in the GeoDataFrame if zcol not in gdf: - raise ValueError('Provide name of Z column as kwarg as Z column could not be recognized') + raise ValueError( + "Provide name of Z column as kwarg as Z column could not be recognized" + ) # Creating a list with the unique heights of the GeoDataFrame heights = gdf[zcol].sort_values().unique().tolist() @@ -1554,11 +1611,13 @@ def calculate_number_of_isopoints(gdf: Union[gpd.geodataframe.GeoDataFrame, pd.D return number -def calculate_lines(gdf: Union[gpd.geodataframe.GeoDataFrame, pd.DataFrame], - increment: Union[float, int], - xcol: str = 'X', - ycol: str = 'Y', - zcol: str = 'Z') -> gpd.geodataframe.GeoDataFrame: +def calculate_lines( + gdf: Union[gpd.geodataframe.GeoDataFrame, pd.DataFrame], + increment: Union[float, int], + xcol: str = "X", + ycol: str = "Y", + zcol: str = "Z", +) -> gpd.geodataframe.GeoDataFrame: """Function to interpolate strike lines Parameters @@ -1606,27 +1665,27 @@ def calculate_lines(gdf: Union[gpd.geodataframe.GeoDataFrame, pd.DataFrame], # Checking if gdf is of type GeoDataFrame if not isinstance(gdf, (gpd.geodataframe.GeoDataFrame, pd.DataFrame)): - raise TypeError('gdf must be of type GeoDataFrame') + raise TypeError("gdf must be of type GeoDataFrame") # Checking that all geometries are valid if not all(gdf.geometry.is_valid): - raise ValueError('Not all Shapely Objects are valid objects') + raise ValueError("Not all Shapely Objects are valid objects") # Checking if the increment is of type float or int if not isinstance(increment, (float, int)): - raise TypeError('The increment must be provided as float or int') + raise TypeError("The increment must be provided as float or int") # Checking that xcol is of type string if not isinstance(xcol, str): - raise TypeError('X column name must be of type string') + raise TypeError("X column name must be of type string") # Checking that ycol is of type string if not isinstance(ycol, str): - raise TypeError('Y column name must be of type string') + raise TypeError("Y column name must be of type string") # Checking that zcol is of type string if not isinstance(zcol, str): - raise TypeError('Z column name must be of type string') + raise TypeError("Z column name must be of type string") # Checking that the columns are in the GeoDataFrame # if not {xcol, ycol, zcol}.issubset(gdf.columns): @@ -1649,21 +1708,27 @@ def calculate_lines(gdf: Union[gpd.geodataframe.GeoDataFrame, pd.DataFrame], # Calculating vertices of lines for i in range(len(gdf[gdf[zcol] == minval])): # Getting index for nearest neighbor - index = get_nearest_neighbor(np.array(gdf[gdf[zcol] == minval][[xcol, ycol]].values.tolist()), - np.array([gdf[gdf[zcol] == minval][xcol].values.tolist()[i], - gdf[gdf[zcol] == minval][ycol].values.tolist()[i]])) + index = get_nearest_neighbor( + np.array(gdf[gdf[zcol] == minval][[xcol, ycol]].values.tolist()), + np.array( + [ + gdf[gdf[zcol] == minval][xcol].values.tolist()[i], + gdf[gdf[zcol] == minval][ycol].values.tolist()[i], + ] + ), + ) # Creating x and y points from existing gdf - x1 = gdf[gdf['Z'] == minval][xcol].tolist()[i] - y1 = gdf[gdf['Z'] == minval][ycol].tolist()[i] - x2 = gdf[gdf['Z'] == maxval][xcol].tolist()[index] - y2 = gdf[gdf['Z'] == maxval][ycol].tolist()[index] + x1 = gdf[gdf["Z"] == minval][xcol].tolist()[i] + y1 = gdf[gdf["Z"] == minval][ycol].tolist()[i] + x2 = gdf[gdf["Z"] == maxval][xcol].tolist()[index] + y2 = gdf[gdf["Z"] == maxval][ycol].tolist()[index] # Calculating vertices of lines for j in range(num): # Calculating vertices - pointx = ((j + 1) / (num + 1) * x2 + (1 - (j + 1) / (num + 1)) * x1) - pointy = ((j + 1) / (num + 1) * y2 + (1 - (j + 1) / (num + 1)) * y1) + pointx = (j + 1) / (num + 1) * x2 + (1 - (j + 1) / (num + 1)) * x1 + pointy = (j + 1) / (num + 1) * y2 + (1 - (j + 1) / (num + 1)) * y1 # Append vertices to list pointsx.append(pointx) @@ -1676,8 +1741,9 @@ def calculate_lines(gdf: Union[gpd.geodataframe.GeoDataFrame, pd.DataFrame], # Create linestring from point lists for i in range(0, int(len(pointsx) / 2)): # Creating linestrings - ls = LineString([Point(pointsx[i], pointsy[i]), - Point(pointsx[i + num], pointsy[i + num])]) + ls = LineString( + [Point(pointsx[i], pointsy[i]), Point(pointsx[i + num], pointsy[i + num])] + ) # Appending line strings ls_list.append(ls) heights.append(minval + i * increment + increment) @@ -1687,25 +1753,27 @@ def calculate_lines(gdf: Union[gpd.geodataframe.GeoDataFrame, pd.DataFrame], lines = gpd.GeoDataFrame(gpd.GeoSeries(ls_list), crs=gdf.crs) # Setting geometry column of GeoDataFrame - lines['geometry'] = ls_list + lines["geometry"] = ls_list # Extracting X and Y coordinate and deleting first entry lines = vector.extract_xy(lines) del lines[0] # Adding formation and height information to GeoDataFrame - lines['formation'] = gdf['formation'].unique().tolist()[0] - lines['Z'] = heights - lines['id'] = heights + lines["formation"] = gdf["formation"].unique().tolist()[0] + lines["Z"] = heights + lines["id"] = heights return lines -def interpolate_strike_lines(gdf: gpd.geodataframe.GeoDataFrame, - increment: Union[float, int], - xcol: str = 'X', - ycol: str = 'Y', - zcol: str = 'Z') -> gpd.geodataframe.GeoDataFrame: +def interpolate_strike_lines( + gdf: gpd.geodataframe.GeoDataFrame, + increment: Union[float, int], + xcol: str = "X", + ycol: str = "Y", + zcol: str = "Z", +) -> gpd.geodataframe.GeoDataFrame: """Interpolating strike lines to calculate orientations Parameters @@ -1738,70 +1806,88 @@ def interpolate_strike_lines(gdf: gpd.geodataframe.GeoDataFrame, # Checking if gdf is of type GeoDataFrame if not isinstance(gdf, gpd.geodataframe.GeoDataFrame): - raise TypeError('gdf must be of type GeoDataFrame') + raise TypeError("gdf must be of type GeoDataFrame") # Checking if the increment is of type float or int if not isinstance(increment, (float, int)): - raise TypeError('The increment must be provided as float or int') + raise TypeError("The increment must be provided as float or int") # Checking that xcol is of type string if not isinstance(xcol, str): - raise TypeError('X column name must be of type string') + raise TypeError("X column name must be of type string") # Checking that ycol is of type string if not isinstance(ycol, str): - raise TypeError('Y column name must be of type string') + raise TypeError("Y column name must be of type string") # Checking that zcol is of type string if not isinstance(zcol, str): - raise TypeError('Z column name must be of type string') + raise TypeError("Z column name must be of type string") # Create empty GeoDataFrame gdf_out = gpd.GeoDataFrame() # Extract vertices from gdf - gdf = vector.extract_xy(gdf, drop_id=False, reset_index=False).sort_values(by='id') + gdf = vector.extract_xy(gdf, drop_id=False, reset_index=False).sort_values(by="id") # Interpolate strike lines - for i in range(len(gdf['id'].unique().tolist()) - 1): + for i in range(len(gdf["id"].unique().tolist()) - 1): # Calculate distance between two strike lines in the original gdf - diff = gdf.loc[gdf.index.unique().values.tolist()[i]][zcol].values.tolist()[0] - \ - gdf.loc[gdf.index.unique().values.tolist()[i + 1]][zcol].values.tolist()[0] + diff = ( + gdf.loc[gdf.index.unique().values.tolist()[i]][zcol].values.tolist()[0] + - gdf.loc[gdf.index.unique().values.tolist()[i + 1]][zcol].values.tolist()[ + 0 + ] + ) # If the distance is larger than the increment, interpolate strike lines if np.abs(diff) > increment: gdf_strike = pd.concat( - [gdf.loc[gdf.index.unique().values.tolist()[i]], gdf.loc[gdf.index.unique().values.tolist()[i + 1]]]) + [ + gdf.loc[gdf.index.unique().values.tolist()[i]], + gdf.loc[gdf.index.unique().values.tolist()[i + 1]], + ] + ) # Calculate strike lines lines = calculate_lines(gdf_strike, increment) # Append interpolated lines to gdf that will be returned gdf_new = pd.concat( - [gdf.loc[gdf.index.unique().values.tolist()[i]], lines, - gdf.loc[gdf.index.unique().values.tolist()[i + 1]]]) + [ + gdf.loc[gdf.index.unique().values.tolist()[i]], + lines, + gdf.loc[gdf.index.unique().values.tolist()[i + 1]], + ] + ) gdf_out = gdf_out.append(gdf_new, ignore_index=True) # If the distance is equal to the increment, append line to the gdf that will be returned else: gdf_new = pd.concat( - [gdf.loc[gdf.index.unique().values.tolist()[i]], gdf.loc[gdf.index.unique().values.tolist()[i + 1]]]) + [ + gdf.loc[gdf.index.unique().values.tolist()[i]], + gdf.loc[gdf.index.unique().values.tolist()[i + 1]], + ] + ) gdf_out = gdf_out.append(gdf_new, ignore_index=True) # Drop duplicates - gdf_out = gdf_out.sort_values(by=['Y']).drop_duplicates('geometry') + gdf_out = gdf_out.sort_values(by=["Y"]).drop_duplicates("geometry") # Redefine ID column with interpolated strike lines - gdf_out['id'] = np.arange(1, len(gdf_out['id'].values.tolist()) + 1).tolist() + gdf_out["id"] = np.arange(1, len(gdf_out["id"].values.tolist()) + 1).tolist() return gdf_out -def convert_to_petrel_points_with_attributes(mesh: pv.core.pointset.PolyData, - path: str, - crs: Union[str, pyproj.crs.crs.CRS, type(None)] = None, - target_crs: Union[str, pyproj.crs.crs.CRS, type(None)] = None): +def convert_to_petrel_points_with_attributes( + mesh: pv.core.pointset.PolyData, + path: str, + crs: Union[str, pyproj.crs.crs.CRS, type(None)] = None, + target_crs: Union[str, pyproj.crs.crs.CRS, type(None)] = None, +): """Function to convert vertices of a PyVista Mesh to Petrel Points with Attributes Parameters @@ -1826,22 +1912,26 @@ def convert_to_petrel_points_with_attributes(mesh: pv.core.pointset.PolyData, # Checking that the mesh is a PyVista PolyData object if not isinstance(mesh, pv.core.pointset.PolyData): - raise TypeError('Mesh must be provided as PyVista PolyData object') + raise TypeError("Mesh must be provided as PyVista PolyData object") # Checking that the CRS is provided as proper type if not isinstance(crs, (str, pyproj.crs.crs.CRS, type(None))): - raise TypeError('CRS must be provided as string or pyproj CRS object') + raise TypeError("CRS must be provided as string or pyproj CRS object") # Checking that the target CRS is provided as proper type if not isinstance(target_crs, (str, pyproj.crs.crs.CRS, type(None))): - raise TypeError('CRS must be provided as string or pyproj CRS object') + raise TypeError("CRS must be provided as string or pyproj CRS object") # Selecting vertices vertices = np.array(mesh.points) # Creating GeoDataFrame from vertices - gdf = gpd.GeoDataFrame(geometry=gpd.points_from_xy(vertices[:, 0], vertices[:, 1]), data=vertices, - columns=['X', 'Y', 'Z'], crs=crs) + gdf = gpd.GeoDataFrame( + geometry=gpd.points_from_xy(vertices[:, 0], vertices[:, 1]), + data=vertices, + columns=["X", "Y", "Z"], + crs=crs, + ) # Reprojecting data and extracting X and Y coordinates if target_crs and target_crs != crs: @@ -1849,19 +1939,19 @@ def convert_to_petrel_points_with_attributes(mesh: pv.core.pointset.PolyData, gdf = vector.extract_xy(gdf=gdf) # Dropping Geometry Column - df = gdf.drop('geometry', axis=1) + df = gdf.drop("geometry", axis=1) - df.to_csv(fname=path, - index=False, - sep='\t') + df.to_csv(fname=path, index=False, sep="\t") - print('CSV-File successfully saved') + print("CSV-File successfully saved") -def ray_trace_one_surface(surface: Union[pv.core.pointset.PolyData, pv.core.pointset.UnstructuredGrid], - origin: Union[np.ndarray, list], - end_point: Union[np.ndarray, list], - first_point: bool = False) -> tuple: +def ray_trace_one_surface( + surface: Union[pv.core.pointset.PolyData, pv.core.pointset.UnstructuredGrid], + origin: Union[np.ndarray, list], + end_point: Union[np.ndarray, list], + first_point: bool = False, +) -> tuple: """Function to return the depth of one surface in one well using PyVista ray tracing Parameters @@ -1890,25 +1980,29 @@ def ray_trace_one_surface(surface: Union[pv.core.pointset.PolyData, pv.core.poin """ # Checking that the provided surface is of type PoyData or UnstructuredGrid - if not isinstance(surface, (pv.core.pointset.PolyData, pv.core.pointset.UnstructuredGrid)): - raise TypeError('Surface must be provided as PolyData or UnstructuredGrid') + if not isinstance( + surface, (pv.core.pointset.PolyData, pv.core.pointset.UnstructuredGrid) + ): + raise TypeError("Surface must be provided as PolyData or UnstructuredGrid") # Converting UnstructuredGrid to PolyData if isinstance(surface, pv.core.pointset.UnstructuredGrid): surface = surface.extract_surface() # Extracting the intersection between a PolyData set and a mesh - intersection_points, intersection_cells = surface.ray_trace(origin=origin, - end_point=end_point, - first_point=first_point) + intersection_points, intersection_cells = surface.ray_trace( + origin=origin, end_point=end_point, first_point=first_point + ) return intersection_points, intersection_cells -def ray_trace_multiple_surfaces(surfaces: list, - borehole_top: Union[np.ndarray, list], - borehole_bottom: Union[np.ndarray, list], - first_point: bool = False) -> list: +def ray_trace_multiple_surfaces( + surfaces: list, + borehole_top: Union[np.ndarray, list], + borehole_bottom: Union[np.ndarray, list], + first_point: bool = False, +) -> list: """Function to return the depth of multiple surfaces in one well using PyVista ray tracing Parameters @@ -1937,18 +2031,25 @@ def ray_trace_multiple_surfaces(surfaces: list, """ # Extracting multiple intersections from meshes - intersections = [ray_trace_one_surface(surface=surface, - origin=borehole_top, - end_point=borehole_bottom, - first_point=first_point) for surface in surfaces] + intersections = [ + ray_trace_one_surface( + surface=surface, + origin=borehole_top, + end_point=borehole_bottom, + first_point=first_point, + ) + for surface in surfaces + ] return intersections -def create_virtual_profile(names_surfaces: list, - surfaces: list, - borehole: pv.core.pointset.PolyData, - first_point: bool = False) -> pd.DataFrame: +def create_virtual_profile( + names_surfaces: list, + surfaces: list, + borehole: pv.core.pointset.PolyData, + first_point: bool = False, +) -> pd.DataFrame: """Function to filter and sort the resulting well tops Parameters @@ -1977,13 +2078,21 @@ def create_virtual_profile(names_surfaces: list, """ # Creating well segments - well_segments = [pv.Line(borehole.points[i], borehole.points[i + 1]) for i in range(len(borehole.points) - 1)] + well_segments = [ + pv.Line(borehole.points[i], borehole.points[i + 1]) + for i in range(len(borehole.points) - 1) + ] # Extracting well tops - well_tops = [ray_trace_multiple_surfaces(surfaces=surfaces, - borehole_top=segment.points[0], - borehole_bottom=segment.points[1], - first_point=first_point) for segment in well_segments] + well_tops = [ + ray_trace_multiple_surfaces( + surfaces=surfaces, + borehole_top=segment.points[0], + borehole_bottom=segment.points[1], + first_point=first_point, + ) + for segment in well_segments + ] # Flatten list well_tops = [item for sublist in well_tops for item in sublist] @@ -2023,14 +2132,18 @@ def create_virtual_profile(names_surfaces: list, # well_dict = dict(sorted(well_dict.items(), key=lambda item: item[1], reverse=True)) # Creating DataFrame - df = pd.DataFrame(list(zip(list_surfaces_filtered, z_values)), columns=['Surface', 'Z']) + df = pd.DataFrame( + list(zip(list_surfaces_filtered, z_values)), columns=["Surface", "Z"] + ) return df -def extract_zmap_data(surface: pv.core.pointset.PolyData, - cell_width: int, - nodata: Union[float, int] = -9999): +def extract_zmap_data( + surface: pv.core.pointset.PolyData, + cell_width: int, + nodata: Union[float, int] = -9999, +): """Function to extract a meshgrid of values from a PyVista mesh Parameters @@ -2071,29 +2184,40 @@ def extract_zmap_data(surface: pv.core.pointset.PolyData, y = np.arange(extent[2] + 0.5 * cell_width, extent[3], cell_width) # Calculating the intersections - intersections = [ray_trace_one_surface(surface=surface, - origin=[x_value, y_value, extent[4]], - end_point=[x_value, y_value, extent[5]], - first_point=True) for x_value in x for y_value in y] + intersections = [ + ray_trace_one_surface( + surface=surface, + origin=[x_value, y_value, extent[4]], + end_point=[x_value, y_value, extent[5]], + first_point=True, + ) + for x_value in x + for y_value in y + ] # Extracting the height values - z_values = np.flipud(np.array([z[0][2] if len(z[0]) == 3 else nodata for z in intersections]).reshape(x_no_cells, - y_no_cells).T) + z_values = np.flipud( + np.array([z[0][2] if len(z[0]) == 3 else nodata for z in intersections]) + .reshape(x_no_cells, y_no_cells) + .T + ) return z_values -def create_zmap_grid(surface: pv.core.pointset.PolyData, - cell_width: int, - comments: str = '', - name: str = 'ZMAP_Grid', - z_type: str = 'GRID', - nodes_per_line: int = 5, - field_width: int = 15, - nodata: Union[int, float] = -9999.00000, - nodata2: Union[int, float, str] = '', - decimal_places: int = 5, - start_column: int = 1): +def create_zmap_grid( + surface: pv.core.pointset.PolyData, + cell_width: int, + comments: str = "", + name: str = "ZMAP_Grid", + z_type: str = "GRID", + nodes_per_line: int = 5, + field_width: int = 15, + nodata: Union[int, float] = -9999.00000, + nodata2: Union[int, float, str] = "", + decimal_places: int = 5, + start_column: int = 1, +): """Function to write data to ZMAP Grid, This code is heavily inspired by https://github.com/abduhbm/zmapio Parameters @@ -2143,9 +2267,7 @@ def create_zmap_grid(surface: pv.core.pointset.PolyData, """ # Extracting z_values - z_values = extract_zmap_data(surface=surface, - cell_width=cell_width, - nodata=nodata) + z_values = extract_zmap_data(surface=surface, cell_width=cell_width, nodata=nodata) # Defining extent extent = surface.bounds @@ -2157,16 +2279,20 @@ def create_zmap_grid(surface: pv.core.pointset.PolyData, # Defining auxiliary function def chunks(x, n): for i in range(0, len(x), n): - yield x[i: i + n] + yield x[i : i + n] # Create list of lines with first comments - lines = ['!', '! This ZMAP Grid was created using the GemGIS Package', - '! See https://github.com/cgre-aachen/gemgis for more information', '!'] + lines = [ + "!", + "! This ZMAP Grid was created using the GemGIS Package", + "! See https://github.com/cgre-aachen/gemgis for more information", + "!", + ] # Appending comments to lines for comment in comments: - lines.append('! ' + comment) - lines.append('!') + lines.append("! " + comment) + lines.append("!") # Appending header information to lines lines.append("@{}, {}, {}".format(name, z_type, nodes_per_line)) @@ -2185,12 +2311,7 @@ def chunks(x, n): # Appending header information to lines lines.append( "{}, {}, {}, {}, {}, {}".format( - no_rows, - no_cols, - extent[0], - extent[1], - extent[2], - extent[3] + no_rows, no_cols, extent[0], extent[1], extent[2], extent[3] ) ) @@ -2203,15 +2324,21 @@ def chunks(x, n): for j in chunks(i, nodes_per_line): j_fmt = "0.{}f".format(decimal_places) j_fmt = "{0:" + j_fmt + "}" - j = [j_fmt.format(float(x)) if x is not np.nan else j_fmt.format(float(nodata)) for x in j] + j = [ + ( + j_fmt.format(float(x)) + if x is not np.nan + else j_fmt.format(float(nodata)) + ) + for x in j + ] line = "{:>" + "{}".format(field_width) + "}" lines.append("".join([line] * len(j)).format(*tuple(j))) return lines -def save_zmap_grid(zmap_grid: list, - path: str = 'ZMAP_Grid.dat'): +def save_zmap_grid(zmap_grid: list, path: str = "ZMAP_Grid.dat"): """Function to save ZMAP Grid information to file Parameters @@ -2228,20 +2355,22 @@ def save_zmap_grid(zmap_grid: list, """ # Writing the ZMAP Grid to file - with open(path, 'w') as f: + with open(path, "w") as f: f.write("\n".join(zmap_grid)) - print('ZMAP Grid successfully saved to file') + print("ZMAP Grid successfully saved to file") -def rotate_gempy_input_data(extent: Union[np.ndarray, shapely.geometry.Polygon, gpd.geodataframe.GeoDataFrame], - interfaces: Union[pd.DataFrame, gpd.geodataframe.GeoDataFrame], - orientations: Union[pd.DataFrame, gpd.geodataframe.GeoDataFrame], - zmin: Union[float, int] = None, - zmax: Union[float, int] = None, - rotate_reverse_direction: bool = False, - return_extent_gdf: bool = False, - manual_rotation_angle: Union[float, int] = None): +def rotate_gempy_input_data( + extent: Union[np.ndarray, shapely.geometry.Polygon, gpd.geodataframe.GeoDataFrame], + interfaces: Union[pd.DataFrame, gpd.geodataframe.GeoDataFrame], + orientations: Union[pd.DataFrame, gpd.geodataframe.GeoDataFrame], + zmin: Union[float, int] = None, + zmax: Union[float, int] = None, + rotate_reverse_direction: bool = False, + return_extent_gdf: bool = False, + manual_rotation_angle: Union[float, int] = None, +): """Function to rotate the GemPy Input Data horizontally or vertically Parameters @@ -2288,142 +2417,210 @@ def rotate_gempy_input_data(extent: Union[np.ndarray, shapely.geometry.Polygon, """ # Checking that the extent is of type list, Shapely Polygon, or GeoDataFrame - if not isinstance(extent, (np.ndarray, shapely.geometry.Polygon, gpd.geodataframe.GeoDataFrame)): - raise TypeError('The extent must be provided as NumPy array, Shapely Polygon oder GeoDataFrame') + if not isinstance( + extent, (np.ndarray, shapely.geometry.Polygon, gpd.geodataframe.GeoDataFrame) + ): + raise TypeError( + "The extent must be provided as NumPy array, Shapely Polygon oder GeoDataFrame" + ) # Checking the number of coordinates of the extent and convert extent to Shapely Polygpon if isinstance(extent, np.ndarray): if len(extent) != 4: - raise ValueError('Please only provide four corner coordinates as extent') + raise ValueError("Please only provide four corner coordinates as extent") extent_polygon = Polygon(extent) elif isinstance(extent, shapely.geometry.Polygon): - if not (len(list(extent.exterior.coords)) != 4) or (len(list(extent.exterior.coords)) != 5): - raise ValueError('Please only provide a polygon with four corner coordinates as extent') + if not (len(list(extent.exterior.coords)) != 4) or ( + len(list(extent.exterior.coords)) != 5 + ): + raise ValueError( + "Please only provide a polygon with four corner coordinates as extent" + ) extent_polygon = extent else: - if len(list(extent.iloc[0]['geometry'].exterior.coords)) != 5: - raise ValueError('Please only provide a polygon with four corner coordinates as extent') + if len(list(extent.iloc[0]["geometry"].exterior.coords)) != 5: + raise ValueError( + "Please only provide a polygon with four corner coordinates as extent" + ) - extent_polygon = extent.iloc[0]['geometry'] + extent_polygon = extent.iloc[0]["geometry"] # Checking that the interfaces are of type DataFrame or GeoDataFrame if not isinstance(interfaces, (pd.DataFrame, gpd.geodataframe.GeoDataFrame)): - raise TypeError('Interfaces must be provided as Pandas DataFrame or GeoPandas GeoDataFrame') + raise TypeError( + "Interfaces must be provided as Pandas DataFrame or GeoPandas GeoDataFrame" + ) # Extracting X, Y, Z coordinates if interfaces are of type GeoDataFrame - if (isinstance(interfaces, gpd.geodataframe.GeoDataFrame)) and (not {'X', 'Y', 'Z'}.issubset(interfaces.columns)): + if (isinstance(interfaces, gpd.geodataframe.GeoDataFrame)) and ( + not {"X", "Y", "Z"}.issubset(interfaces.columns) + ): interfaces = vector.extract_xy(interfaces) # Checking if X, Y, Z coordinates are in columns - if not {'X', 'Y', 'Z'}.issubset(interfaces.columns): - raise ValueError('Please provide all X, Y and Z coordinates in the Pandas DataFrame or GeoPandas GeoDataFrame') + if not {"X", "Y", "Z"}.issubset(interfaces.columns): + raise ValueError( + "Please provide all X, Y and Z coordinates in the Pandas DataFrame or GeoPandas GeoDataFrame" + ) # Checking that the orientations are of type DataFrame or GeoDataFrame if not isinstance(orientations, (pd.DataFrame, gpd.geodataframe.GeoDataFrame)): - raise TypeError('Orientations must be provided as Pandas DataFrame or GeoPandas GeoDataFrame') + raise TypeError( + "Orientations must be provided as Pandas DataFrame or GeoPandas GeoDataFrame" + ) # Extracting X, Y, Z coordinates if orientations are of type GeoDataFrame if (isinstance(orientations, gpd.geodataframe.GeoDataFrame)) and ( - not {'X', 'Y', 'Z'}.issubset(orientations.columns)): + not {"X", "Y", "Z"}.issubset(orientations.columns) + ): orientations = vector.extract_xy(orientations) # Checking if X, Y, Z coordinates are in columns - if not {'X', 'Y', 'Z'}.issubset(orientations.columns): - raise ValueError('Please provide all X, Y and Z coordinates in the Pandas DataFrame or GeoPandas GeoDataFrame') + if not {"X", "Y", "Z"}.issubset(orientations.columns): + raise ValueError( + "Please provide all X, Y and Z coordinates in the Pandas DataFrame or GeoPandas GeoDataFrame" + ) # Checking that zmin is of type float or int if not isinstance(zmin, (float, int)): - raise TypeError('zmin must be provided as float or int') + raise TypeError("zmin must be provided as float or int") # Checking that zmax is of type float or int if not isinstance(zmax, (float, int)): - raise TypeError('zmax must be provided as float or int') + raise TypeError("zmax must be provided as float or int") # Checking that rotate_reverse_direction is of type bool if not isinstance(rotate_reverse_direction, bool): - raise TypeError('rotate_reverse_direction must be of type bool') + raise TypeError("rotate_reverse_direction must be of type bool") # Checking that return_extent_gdf is of type bool if not isinstance(return_extent_gdf, bool): - raise TypeError('return_extent_gdf must be of type bool') + raise TypeError("return_extent_gdf must be of type bool") # Calculating the smallest angle to perform the rotation - min_angle = min([vector.calculate_angle(LineString((list(extent_polygon.exterior.coords)[i], - list(extent_polygon.exterior.coords)[i + 1]))) for i in - range(len(list(extent_polygon.exterior.coords)) - 1)]) + min_angle = min( + [ + vector.calculate_angle( + LineString( + ( + list(extent_polygon.exterior.coords)[i], + list(extent_polygon.exterior.coords)[i + 1], + ) + ) + ) + for i in range(len(list(extent_polygon.exterior.coords)) - 1) + ] + ) # Using the manual rotation angle if provided if manual_rotation_angle: min_angle = manual_rotation_angle # Creating GeoDataFrames from DataFrames - interfaces = gpd.GeoDataFrame(geometry=gpd.points_from_xy(x=interfaces['X'], - y=interfaces['Y']), - data=interfaces) + interfaces = gpd.GeoDataFrame( + geometry=gpd.points_from_xy(x=interfaces["X"], y=interfaces["Y"]), + data=interfaces, + ) - orientations = gpd.GeoDataFrame(geometry=gpd.points_from_xy(x=orientations['X'], - y=orientations['Y']), - data=orientations) + orientations = gpd.GeoDataFrame( + geometry=gpd.points_from_xy(x=orientations["X"], y=orientations["Y"]), + data=orientations, + ) # Creating Polygons from Interfaces and Orientations - interfaces_polygon = shapely.geometry.Polygon(interfaces['geometry']) - orientations_polygon = shapely.geometry.Polygon(orientations['geometry']) + interfaces_polygon = shapely.geometry.Polygon(interfaces["geometry"]) + orientations_polygon = shapely.geometry.Polygon(orientations["geometry"]) # Rotating extent to vertical or horizontal if not rotate_reverse_direction: # Rotating extent - extent_rotated = shapely.affinity.rotate(extent_polygon, -min_angle, 'center') + extent_rotated = shapely.affinity.rotate(extent_polygon, -min_angle, "center") # Rotating interfaces and orientations - interfaces_polygon_rotated = shapely.affinity.rotate(interfaces_polygon, - -min_angle, - (list(extent_polygon.centroid.coords)[0][0], - list(extent_polygon.centroid.coords)[0][1])) + interfaces_polygon_rotated = shapely.affinity.rotate( + interfaces_polygon, + -min_angle, + ( + list(extent_polygon.centroid.coords)[0][0], + list(extent_polygon.centroid.coords)[0][1], + ), + ) - orientations_polygon_rotated = shapely.affinity.rotate(orientations_polygon, - -min_angle, - (list(extent_polygon.centroid.coords)[0][0], - list(extent_polygon.centroid.coords)[0][1])) + orientations_polygon_rotated = shapely.affinity.rotate( + orientations_polygon, + -min_angle, + ( + list(extent_polygon.centroid.coords)[0][0], + list(extent_polygon.centroid.coords)[0][1], + ), + ) else: # Rotating extent - extent_rotated = shapely.affinity.rotate(extent_polygon, min_angle, 'center') + extent_rotated = shapely.affinity.rotate(extent_polygon, min_angle, "center") # Rotating interfaces and orientations - interfaces_polygon_rotated = shapely.affinity.rotate(interfaces_polygon, - min_angle, - (list(extent_polygon.centroid.coords)[0][0], - list(extent_polygon.centroid.coords)[0][1])) + interfaces_polygon_rotated = shapely.affinity.rotate( + interfaces_polygon, + min_angle, + ( + list(extent_polygon.centroid.coords)[0][0], + list(extent_polygon.centroid.coords)[0][1], + ), + ) - orientations_polygon_rotated = shapely.affinity.rotate(orientations_polygon, - min_angle, - (list(extent_polygon.centroid.coords)[0][0], - list(extent_polygon.centroid.coords)[0][1])) + orientations_polygon_rotated = shapely.affinity.rotate( + orientations_polygon, + min_angle, + ( + list(extent_polygon.centroid.coords)[0][0], + list(extent_polygon.centroid.coords)[0][1], + ), + ) # Creating Bounding Box - extent = [extent_rotated.bounds[0], - extent_rotated.bounds[2], - extent_rotated.bounds[1], - extent_rotated.bounds[3], - zmin, - zmax] + extent = [ + extent_rotated.bounds[0], + extent_rotated.bounds[2], + extent_rotated.bounds[1], + extent_rotated.bounds[3], + zmin, + zmax, + ] # Converting Polygons back to Points and extracting Points interfaces_rotated = gpd.GeoDataFrame( - geometry=gpd.points_from_xy(x=[coords[0] for coords in list(interfaces_polygon_rotated.exterior.coords)[:-1]], - y=[coords[1] for coords in list(interfaces_polygon_rotated.exterior.coords)[:-1]]), - data=interfaces) + geometry=gpd.points_from_xy( + x=[ + coords[0] + for coords in list(interfaces_polygon_rotated.exterior.coords)[:-1] + ], + y=[ + coords[1] + for coords in list(interfaces_polygon_rotated.exterior.coords)[:-1] + ], + ), + data=interfaces, + ) interfaces_rotated = vector.extract_xy(interfaces_rotated) orientations_rotated = gpd.GeoDataFrame( - geometry=gpd.points_from_xy(x=[coords[0] for coords in list(orientations_polygon_rotated.exterior.coords)[:-1]], - y=[coords[1] for coords in - list(orientations_polygon_rotated.exterior.coords)[:-1]]), - data=orientations) + geometry=gpd.points_from_xy( + x=[ + coords[0] + for coords in list(orientations_polygon_rotated.exterior.coords)[:-1] + ], + y=[ + coords[1] + for coords in list(orientations_polygon_rotated.exterior.coords)[:-1] + ], + ), + data=orientations, + ) orientations_rotated = vector.extract_xy(orientations_rotated) # Return extent gdf if needed @@ -2462,48 +2659,62 @@ def open_mpk(path_in: str): import py7zr except ModuleNotFoundError: raise ModuleNotFoundError( - 'py7zr package is not installed. Use pip install py7zr to install the latest version') + "py7zr package is not installed. Use pip install py7zr to install the latest version" + ) # Checking that the file path is of type string if not isinstance(path_in, str): - raise TypeError('The file path must be provided as string') + raise TypeError("The file path must be provided as string") # Renaming .mpk file to .zip file - path_out = path_in.split('.mpk')[0] - os.rename(path_in, path_out + '.zip') + path_out = path_in.split(".mpk")[0] + os.rename(path_in, path_out + ".zip") # Unzipping files - with py7zr.SevenZipFile(path_out + '.zip', - 'r') as archive: + with py7zr.SevenZipFile(path_out + ".zip", "r") as archive: archive.extractall(path=path_out) # Getting vector data files - files_vector_data = [os.path.join(path, name) for path, subdirs, files in os.walk(path_out) - for name in files if name.endswith(".shp")] + files_vector_data = [ + os.path.join(path, name) + for path, subdirs, files in os.walk(path_out) + for name in files + if name.endswith(".shp") + ] # Creating vector data dict - dict_vector_data = {file.rsplit('\\')[-1]: gpd.read_file(file) for file in files_vector_data} + dict_vector_data = { + file.rsplit("\\")[-1]: gpd.read_file(file) for file in files_vector_data + } # TODO: Add support for .tif files if case arises # Getting raster data files - files_raster_data_adf = [os.path.join(path, name) for path, subdirs, files in os.walk(path_out) for name in files if - (name.endswith(".adf")) & (name.startswith("w001001."))] + files_raster_data_adf = [ + os.path.join(path, name) + for path, subdirs, files in os.walk(path_out) + for name in files + if (name.endswith(".adf")) & (name.startswith("w001001.")) + ] # Creating raster data dict - dict_raster_data = {file.rsplit('\\')[-1]: rasterio.open(file) for file in files_raster_data_adf} + dict_raster_data = { + file.rsplit("\\")[-1]: rasterio.open(file) for file in files_raster_data_adf + } return dict_vector_data, dict_raster_data -def convert_crs_seismic_data(path_in: str, - path_out: str, - crs_in: Union[str, pyproj.crs.crs.CRS], - crs_out: Union[str, pyproj.crs.crs.CRS], - cdpx: int = 181, - cdpy: int = 185, - vert_domain: str = 'TWT', - coord_scalar: int = None): +def convert_crs_seismic_data( + path_in: str, + path_out: str, + crs_in: Union[str, pyproj.crs.crs.CRS], + crs_out: Union[str, pyproj.crs.crs.CRS], + cdpx: int = 181, + cdpy: int = 185, + vert_domain: str = "TWT", + coord_scalar: int = None, +): """Convert CDP coordinates of seismic data to a new CRS. Parameters @@ -2533,45 +2744,44 @@ def convert_crs_seismic_data(path_in: str, from segysak.segy import segy_loader, segy_writer except ModuleNotFoundError: raise ModuleNotFoundError( - 'segysak package is not installed. Use pip install segysak to install the latest version') + "segysak package is not installed. Use pip install segysak to install the latest version" + ) # Checking that path_in is of type string if not isinstance(path_in, str): - raise TypeError('path_in must be provided as string') + raise TypeError("path_in must be provided as string") # Checking that path_out is of type string if not isinstance(path_out, str): - raise TypeError('path_out must be provided as string') + raise TypeError("path_out must be provided as string") # Checking that crs_in is of type string or pyproj CRS if not isinstance(crs_in, (str, pyproj.crs.crs.CRS)): - raise TypeError('crs_in must be provided as string or pyproj CRS') + raise TypeError("crs_in must be provided as string or pyproj CRS") # Checking that crs_out is of type string or pyproj CRS if not isinstance(crs_out, (str, pyproj.crs.crs.CRS)): - raise TypeError('crs_out must be provided as string or pyproj CRS') + raise TypeError("crs_out must be provided as string or pyproj CRS") # Checking that vert_domain is of type str if not isinstance(vert_domain, str): - raise TypeError('vert_domain must be provided as string') + raise TypeError("vert_domain must be provided as string") # Checking that the coord_scalar is of type int or None if not isinstance(coord_scalar, (int, type(None))): - raise TypeError('coord_scalar must be provided as int') + raise TypeError("coord_scalar must be provided as int") # Loading seismic data - seismic = segy_loader(path_in, - vert_domain=vert_domain, - cdpx=cdpx, - cdpy=cdpy) + seismic = segy_loader(path_in, vert_domain=vert_domain, cdpx=cdpx, cdpy=cdpy) # Converting Seismic to DataFrame df_seismic = seismic.to_dataframe() # Checking that the CDP coordinates are in the DataFrame - if not {'cdp_x', 'cdp_y'}.issubset(df_seismic.columns): + if not {"cdp_x", "cdp_y"}.issubset(df_seismic.columns): raise ValueError( - 'No coordinates found, please provide the byte positions where the X and Y data of the CDPs is stored') + "No coordinates found, please provide the byte positions where the X and Y data of the CDPs is stored" + ) # Extracting the length of the samples to reduce computing time samples = len(df_seismic.index.get_level_values(1).unique()) @@ -2580,36 +2790,39 @@ def convert_crs_seismic_data(path_in: str, df_seismic_resampled = df_seismic[::samples] # Reprojecting Coordinates - df_seismic_reprojected = gpd.GeoDataFrame(geometry=gpd.points_from_xy(x=df_seismic_resampled['cdp_x'].values, - y=df_seismic_resampled['cdp_y'].values), - crs=crs_in).to_crs(crs_out) + df_seismic_reprojected = gpd.GeoDataFrame( + geometry=gpd.points_from_xy( + x=df_seismic_resampled["cdp_x"].values, + y=df_seismic_resampled["cdp_y"].values, + ), + crs=crs_in, + ).to_crs(crs_out) # Extracting reprojected coordinates x = df_seismic_reprojected.geometry.x.values y = df_seismic_reprojected.geometry.y.values # Assigning new coordinates - seismic['cdp_x'][:] = x - seismic['cdp_y'][:] = y + seismic["cdp_x"][:] = x + seismic["cdp_y"][:] = y # Optionally setting a new coord_scalar if coord_scalar: - seismic.attrs['coord_scalar'] = coord_scalar + seismic.attrs["coord_scalar"] = coord_scalar # Saving reprojected seismic data to file - segy_writer(seismic, - path_out, - trace_header_map=dict(cdp_x=181, - cdp_y=185)) + segy_writer(seismic, path_out, trace_header_map=dict(cdp_x=181, cdp_y=185)) - print('Seismic data was successfully reprojected and saved to file') + print("Seismic data was successfully reprojected and saved to file") -def get_cdp_linestring_of_seismic_data(path_in: str, - crs_in: Union[str, pyproj.crs.crs.CRS], - cdpx: int = 181, - cdpy: int = 185, - vert_domain: str = 'TWT'): +def get_cdp_linestring_of_seismic_data( + path_in: str, + crs_in: Union[str, pyproj.crs.crs.CRS], + cdpx: int = 181, + cdpy: int = 185, + vert_domain: str = "TWT", +): """Extracting the path of the seismic data as LineString. Parameters @@ -2638,33 +2851,32 @@ def get_cdp_linestring_of_seismic_data(path_in: str, from segysak.segy import segy_loader except ModuleNotFoundError: raise ModuleNotFoundError( - 'segysak package is not installed. Use pip install segysak to install the latest version') + "segysak package is not installed. Use pip install segysak to install the latest version" + ) # Checking that path_in is of type string if not isinstance(path_in, str): - raise TypeError('path_in must be provided as string') + raise TypeError("path_in must be provided as string") # Checking that crs_in is of type string or pyproj CRS if not isinstance(crs_in, (str, pyproj.crs.crs.CRS)): - raise TypeError('crs_in must be provided as string or pyproj CRS') + raise TypeError("crs_in must be provided as string or pyproj CRS") # Checking that vert_domain is of type str if not isinstance(vert_domain, str): - raise TypeError('vert_domain must be provided as string') + raise TypeError("vert_domain must be provided as string") # Loading seismic data - seismic = segy_loader(path_in, - vert_domain=vert_domain, - cdpx=cdpx, - cdpy=cdpy) + seismic = segy_loader(path_in, vert_domain=vert_domain, cdpx=cdpx, cdpy=cdpy) # Converting Seismic to DataFrame df_seismic = seismic.to_dataframe() # Checking that the CDP coordinates are in the DataFrame - if not {'cdp_x', 'cdp_y'}.issubset(df_seismic.columns): + if not {"cdp_x", "cdp_y"}.issubset(df_seismic.columns): raise ValueError( - 'No coordinates found, please provide the byte positions where the X and Y data of the CDPs is stored') + "No coordinates found, please provide the byte positions where the X and Y data of the CDPs is stored" + ) # Extracting the length of the samples to reduce computing time samples = len(df_seismic.index.get_level_values(1).unique()) @@ -2673,23 +2885,27 @@ def get_cdp_linestring_of_seismic_data(path_in: str, df_seismic_resampled = df_seismic[::samples] # Creating LineString from coordinates - linestring = LineString(np.c_[(df_seismic_resampled['cdp_x'].values, - df_seismic_resampled['cdp_y'].values)]) + linestring = LineString( + np.c_[ + (df_seismic_resampled["cdp_x"].values, df_seismic_resampled["cdp_y"].values) + ] + ) # Reprojecting Coordinates - gdf_seismic = gpd.GeoDataFrame(geometry=[linestring], - crs=crs_in) + gdf_seismic = gpd.GeoDataFrame(geometry=[linestring], crs=crs_in) return gdf_seismic -def get_cdp_points_of_seismic_data(path_in: str, - crs_in: Union[str, pyproj.crs.crs.CRS], - cdpx: int = 181, - cdpy: int = 185, - vert_domain: str = 'TWT', - filter: int = None, - n_meter: Union[int, float] = None): +def get_cdp_points_of_seismic_data( + path_in: str, + crs_in: Union[str, pyproj.crs.crs.CRS], + cdpx: int = 181, + cdpy: int = 185, + vert_domain: str = "TWT", + filter: int = None, + n_meter: Union[int, float] = None, +): """Extracting the path of the seismic data as LineString. Parameters @@ -2722,33 +2938,32 @@ def get_cdp_points_of_seismic_data(path_in: str, from segysak.segy import segy_loader except ModuleNotFoundError: raise ModuleNotFoundError( - 'segysak package is not installed. Use pip install segysak to install the latest version') + "segysak package is not installed. Use pip install segysak to install the latest version" + ) # Checking that path_in is of type string if not isinstance(path_in, str): - raise TypeError('path_in must be provided as string') + raise TypeError("path_in must be provided as string") # Checking that crs_in is of type string or pyproj CRS if not isinstance(crs_in, (str, pyproj.crs.crs.CRS)): - raise TypeError('crs_in must be provided as string or pyproj CRS') + raise TypeError("crs_in must be provided as string or pyproj CRS") # Checking that vert_domain is of type str if not isinstance(vert_domain, str): - raise TypeError('vert_domain must be provided as string') + raise TypeError("vert_domain must be provided as string") # Loading seismic data - seismic = segy_loader(path_in, - vert_domain=vert_domain, - cdpx=cdpx, - cdpy=cdpy) + seismic = segy_loader(path_in, vert_domain=vert_domain, cdpx=cdpx, cdpy=cdpy) # Converting Seismic to DataFrame df_seismic = seismic.to_dataframe() # Checking that the CDP coordinates are in the DataFrame - if not {'cdp_x', 'cdp_y'}.issubset(df_seismic.columns): + if not {"cdp_x", "cdp_y"}.issubset(df_seismic.columns): raise ValueError( - 'No coordinates found, please provide the byte positions where the X and Y data of the CDPs is stored') + "No coordinates found, please provide the byte positions where the X and Y data of the CDPs is stored" + ) # Extracting the length of the samples to reduce computing time samples = len(df_seismic.index.get_level_values(1).unique()) @@ -2759,28 +2974,44 @@ def get_cdp_points_of_seismic_data(path_in: str, if n_meter: # Creating LineString from coordinates - linestring = LineString(np.c_[(df_seismic_resampled['cdp_x'].values, - df_seismic_resampled['cdp_y'].values)]) + linestring = LineString( + np.c_[ + ( + df_seismic_resampled["cdp_x"].values, + df_seismic_resampled["cdp_y"].values, + ) + ] + ) # Defining number of samples samples = np.arange(0, round(linestring.length / n_meter) + 1, 1) # Getting points every n_meter - points = [shapely.line_interpolate_point(linestring, n_meter * sample) for sample in samples] + points = [ + shapely.line_interpolate_point(linestring, n_meter * sample) + for sample in samples + ] # Creating GeoDataFrame from points - gdf_seismic = gpd.GeoDataFrame(geometry=points, - crs=crs_in) + gdf_seismic = gpd.GeoDataFrame(geometry=points, crs=crs_in) # Appending distance - gdf_seismic['distance'] = samples * n_meter + gdf_seismic["distance"] = samples * n_meter else: # Creating Points from coordinates - gdf_seismic = gpd.GeoDataFrame(geometry=gpd.points_from_xy(x=df_seismic_resampled['cdp_x'].values, - y=df_seismic_resampled['cdp_y'].values), - data=df_seismic_resampled, - crs=crs_in).reset_index().drop(['twt', 'data'], axis=1) + gdf_seismic = ( + gpd.GeoDataFrame( + geometry=gpd.points_from_xy( + x=df_seismic_resampled["cdp_x"].values, + y=df_seismic_resampled["cdp_y"].values, + ), + data=df_seismic_resampled, + crs=crs_in, + ) + .reset_index() + .drop(["twt", "data"], axis=1) + ) # Returning only every nth point if filter: