From 470c1b2fbde5fda87c59dd67d77626f5f3d2f2fb Mon Sep 17 00:00:00 2001 From: AlexanderJuestel Date: Sun, 26 Nov 2023 21:03:01 +0100 Subject: [PATCH] EditeFiles --- gemgis/misc.py | 33 +++++++---- tests/test_misc.py | 138 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 12 deletions(-) diff --git a/gemgis/misc.py b/gemgis/misc.py index fb853405..de63eb99 100644 --- a/gemgis/misc.py +++ b/gemgis/misc.py @@ -278,6 +278,9 @@ def get_meta_data(page: List[str]) -> list: .. versionadded:: 1.0.x + .. versionchanged:: 1.1.7 + Adapting positions of coordinate values. + Example _______ @@ -338,7 +341,7 @@ def get_meta_data(page: List[str]) -> list: well_number = well_number.split('Archiv-Nr.')[0] # Obtaining Depth of well - well_depth = page[page.index('Endteufe') + 2:page.index('Endteufe') + 3] + well_depth = page[page.index('Endteufe') + 3:page.index('Endteufe') + 4] well_depth = float(''.join(well_depth).replace(':', '')) # Obtaining Stratigraphie der Endteufe @@ -355,20 +358,20 @@ def get_meta_data(page: List[str]) -> list: well_gemarkung = ''.join(well_gemarkung).replace(':', '') # Obtaining GK Coordinates of wells - well_coord_x_gk = page[page.index('Rechtswert/Hochwert') + 2:page.index('Rechtswert/Hochwert') + 3] + well_coord_x_gk = page[page.index('Rechtswert/Hochwert') + 3:page.index('Rechtswert/Hochwert') + 4] well_coord_x_gk = ''.join(well_coord_x_gk).replace(':', '') - well_coord_y_gk = page[page.index('Rechtswert/Hochwert') + 4:page.index('Rechtswert/Hochwert') + 5] + well_coord_y_gk = page[page.index('Rechtswert/Hochwert') + 5:page.index('Rechtswert/Hochwert') + 6] well_coord_y_gk = ''.join(well_coord_y_gk).replace(':', '') # Obtaining UTM Coordinates of wells - well_coord_x = page[page.index('East/North') + 2:page.index('East/North') + 3] + well_coord_x = page[page.index('East/North') + 3:page.index('East/North') + 4] well_coord_x = ''.join(well_coord_x).replace(':', '') - well_coord_y = page[page.index('East/North') + 4:page.index('East/North') + 5] + well_coord_y = page[page.index('East/North') + 5:page.index('East/North') + 6] well_coord_y = ''.join(well_coord_y).replace(':', '') - well_coord_z = page[page.index('Ansatzpunktes') + 2:page.index('Ansatzpunktes') + 3] + well_coord_z = page[page.index('Ansatzpunktes') + 3:page.index('Ansatzpunktes') + 4] well_coord_z = ''.join(well_coord_z).replace(':', '') # Obtaining Coordinates Precision @@ -488,6 +491,9 @@ def get_meta_data_df(data: str, .. versionadded:: 1.0.x + .. versionchanged:: 1.1.7 + Fixed bug in parsing PDF. + Example _______ @@ -531,7 +537,7 @@ def get_meta_data_df(data: str, # Split Data data = data.split() data = '#'.join(data) - data = data.split('-Stammdaten') + data = data.split('-#Stammdaten') data = [item.split('|')[0] for item in data] data = [item.split('#') for item in data] @@ -689,17 +695,17 @@ def get_stratigraphic_data(text: list, well_name = ''.join(well_name).replace(':', '') # Obtaining Depth of well - well_depth = text[text.index('Endteufe') + 2:text.index('Endteufe') + 3] + well_depth = text[text.index('Endteufe') + 3:text.index('Endteufe') + 4] well_depth = float(''.join(well_depth).replace(':', '')) # Obtaining UTM Coordinates of wells - well_coord_x = text[text.index('East/North') + 2:text.index('East/North') + 3] + well_coord_x = text[text.index('East/North') + 3:text.index('East/North') + 4] well_coord_x = ''.join(well_coord_x).replace(':', '') - well_coord_y = text[text.index('East/North') + 4:text.index('East/North') + 5] + well_coord_y = text[text.index('East/North') + 5:text.index('East/North') + 6] well_coord_y = ''.join(well_coord_y).replace(':', '') - well_coord_z = text[text.index('Ansatzpunktes') + 2:text.index('Ansatzpunktes') + 3] + well_coord_z = text[text.index('Ansatzpunktes') + 3:text.index('Ansatzpunktes') + 4] well_coord_z = ''.join(well_coord_z).replace(':', '') # Defining Phrases @@ -866,6 +872,9 @@ def get_stratigraphic_data_df(data: str, .. versionadded:: 1.0.x + .. versionchanged:: 1.1.7 + Fixed bug in parsing PDF. + Example _______ @@ -926,7 +935,7 @@ def get_stratigraphic_data_df(data: str, data = '#'.join(data) # Split entire string at each new page into separate elements of a list - data = data.split('-Stammdaten') + data = data.split('-#Stammdaten') # Cut off the last part of each element, this is not done for each page # Segment to filter out stratigraphic tables that have multiple versions and are on multiple pages diff --git a/tests/test_misc.py b/tests/test_misc.py index ca486fae..766764b9 100644 --- a/tests/test_misc.py +++ b/tests/test_misc.py @@ -23,6 +23,8 @@ import geopandas as gpd import re import gemgis as gg +import pytest +import numpy as np gg.download_gemgis_data.download_tutorial_data(filename='test_misc.zip', dirpath='../docs/getting_started/tutorial/data/test_misc/') @@ -41,6 +43,21 @@ def test_load_pdf(): assert isinstance(pdf, str) + with pytest.raises(TypeError): + pdf = load_pdf(path=['../docs/getting_started/tutorial/data/test_misc/test_pdf.pdf'], + save_as_txt=True) + + with pytest.raises(TypeError): + pdf = load_pdf(path='../docs/getting_started/tutorial/data/test_misc/test_pdf.pdf', + save_as_txt='True') + + with pytest.raises(TypeError): + pdf = load_pdf(path='../docs/getting_started/tutorial/data/test_misc/test_pdf.doc', + save_as_txt=True) + + with pytest.raises(FileNotFoundError): + pdf = load_pdf(path='../docs/getting_starte/tutorial/data/test_misc/test_pdf.pdf', + save_as_txt=True) # Testing get_coordinate_data ########################################################### @@ -67,6 +84,14 @@ def test_get_meta_data(): assert isinstance(data, list) + with pytest.raises(TypeError): + data = [get_meta_data(page=np.array(item)) for item in data] + + with pytest.raises(TypeError): + data = [get_meta_data(page=[item]) for item in data] + + + # Testing coordinates_table_list_comprehension ########################################################### @@ -116,6 +141,20 @@ def test_get_meta_data_df(): assert df.loc[0]['Z'] == 60 assert df.loc[1]['Z'] == 60 + with pytest.raises(TypeError): + df = get_meta_data_df(data=[pdf], + name='Test', + return_gdf=True) + + with pytest.raises(TypeError): + df = get_meta_data_df(data=pdf, + name=['Test'], + return_gdf=True) + + with pytest.raises(TypeError): + df = get_meta_data_df(data=pdf, + name='Test', + return_gdf='True') # Testing get_stratigraphic_data ########################################################### @@ -188,6 +227,21 @@ def test_get_stratigraphic_data(): assert isinstance(stratigraphy, list) + with pytest.raises(TypeError): + stratigraphy = [get_stratigraphic_data(text=np.array(item), + symbols=symbols, + formations=formations) for item in data] + + with pytest.raises(TypeError): + stratigraphy = [get_stratigraphic_data(text=item, + symbols=np.array(symbols), + formations=formations) for item in data] + + with pytest.raises(TypeError): + stratigraphy = [get_stratigraphic_data(text=item, + symbols=symbols, + formations=np.array(formations)) for item in data] + # Testing get_stratigraphic_data_df ########################################################### @@ -244,6 +298,44 @@ def test_get_stratigraphic_data_df(): assert isinstance(df, gpd.geodataframe.GeoDataFrame) + with pytest.raises(TypeError): + df = get_stratigraphic_data_df(data=[pdf], + name='GD', + symbols=symbols, + formations=formations, + return_gdf=True) + with pytest.raises(TypeError): + df = get_stratigraphic_data_df(data=pdf, + name=['GD'], + symbols=symbols, + formations=formations, + return_gdf=True) + with pytest.raises(TypeError): + df = get_stratigraphic_data_df(data=pdf, + name='GD', + symbols=np.array(symbols), + formations=formations, + return_gdf=True) + with pytest.raises(TypeError): + df = get_stratigraphic_data_df(data=pdf, + name='GD', + symbols=symbols, + formations=np.array(formations), + return_gdf=True) + with pytest.raises(TypeError): + df = get_stratigraphic_data_df(data=pdf, + name='GD', + symbols=symbols, + formations=formations, + return_gdf='True') + + with pytest.raises(TypeError): + df = get_stratigraphic_data_df(data=pdf, + name='GD', + symbols=symbols, + formations=formations, + remove_last='True', + return_gdf=True) # Testing stratigraphic_table_list_comprehension ########################################################### @@ -303,6 +395,35 @@ def test_stratigraphic_table_list_comprehension(): except UnicodeEncodeError: pass + try: + pdf = load_pdf('../docs/getting_started/tutorial/data/test_misc/test_pdf.pdf') + + assert type(pdf) == str + + df = get_stratigraphic_data_df(data=pdf, + name='Test', + symbols=symbols, + formations=formations, + remove_last=True, + return_gdf=False) + + assert type(df) == pd.DataFrame + assert len(df) == 5 + assert df.loc[0]['Depth'] == 1242 + assert df.loc[4]['Depth'] == 1135 + assert df.loc[0]['Name'] == 'ASCHEBERG12STK.' + assert df.loc[4]['Name'] == 'ASCHEBERG15STK.' + assert df.loc[0]['X'] == 32407673.17 + assert df.loc[4]['X'] == 32407713.16 + assert df.loc[0]['Y'] == 5742123.75 + assert df.loc[4]['Y'] == 5742143.75 + assert df.loc[0]['Z'] == -870 + assert df.loc[4]['Z'] == 59.5 + assert df.loc[0]['Altitude'] == 60 + assert df.loc[4]['Altitude'] == 60 + except UnicodeEncodeError: + pass + # Testing load_symbols ########################################################### @@ -316,6 +437,14 @@ def test_load_symbols(): assert isinstance(symbols, list) + with pytest.raises(TypeError): + symbols = load_symbols(path=['../docs/getting_started/tutorial/data/test_misc/symbols20201216.txt']) + + with pytest.raises(TypeError): + symbols = load_symbols(path='../docs/getting_started/tutorial/data/test_misc/symbols20201216.pdf') + + with pytest.raises(FileNotFoundError): + symbols = load_symbols(path='../docs/getting_starte/tutorial/data/test_misc/symbols20201216.txt') # Testing load_formations ########################################################### @@ -325,3 +454,12 @@ def test_load_formations(): formations = load_formations(path='../docs/getting_started/tutorial/data/test_misc/formations20210109.txt') assert isinstance(formations, list) + + with pytest.raises(TypeError): + formations = load_formations(path=['../docs/getting_started/tutorial/data/test_misc/formations20210109.txt']) + + with pytest.raises(TypeError): + formations = load_formations(path='../docs/getting_started/tutorial/data/test_misc/formations20210109.pdf') + + with pytest.raises(FileNotFoundError): + formations = load_formations(path='../docs/getting_starte/tutorial/data/test_misc/formations20210109.txt') \ No newline at end of file