Skip to content

Commit

Permalink
some upgrades
Browse files Browse the repository at this point in the history
- add CTX.save_as_tif
- implement LROC index
- improve scraping of CTX index
- fixes #54, #53
  • Loading branch information
michaelaye committed Dec 18, 2023
1 parent 9c90c32 commit 77f97f3
Show file tree
Hide file tree
Showing 8 changed files with 328 additions and 317 deletions.
375 changes: 139 additions & 236 deletions notebooks/api/02b_pds.ctx_index.ipynb

Large diffs are not rendered by default.

114 changes: 113 additions & 1 deletion notebooks/api/02e_pds.lroc_index.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
"outputs": [],
"source": [
"#| export\n",
"import pandas as pd\n",
"from yarl import URL\n",
"import warnings"
]
},
Expand All @@ -41,6 +43,15 @@
"from nbdev.showdoc import *"
]
},
{
"cell_type": "markdown",
"id": "7350aeb9-d9f2-48f2-bcbb-2c6dd7327b0c",
"metadata": {},
"source": [
"# EDR vs RDR etc.\n",
"I cannot distinguish between different data levels for dynamic index URLs yet, as is required for LROC, so for now I only implement the EDR index, pretending it's the only one!"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -50,10 +61,111 @@
"source": [
"#| export\n",
"class LROCIndex:\n",
" edr_url = 'https://pds.lroc.asu.edu/data/LRO-L-LROC-2-EDR-V1.0/'\n",
"\n",
" def __init__(self):\n",
" warnings.warn(\"LROCIndex not implemented yet.\")"
" self._volumes_table = None\n",
"\n",
" @property\n",
" def volumes_table(self):\n",
" if self._volumes_table is None:\n",
" self._volumes_table = pd.read_html(self.edr_url)[0].dropna(\n",
" how='all', axis=1).dropna(how='all', axis=0).iloc[1:-1, :-1]\n",
" return self._volumes_table\n",
"\n",
" @property\n",
" def latest_release_folder(self):\n",
" return self.volumes_table.iloc[-1, 0]\n",
"\n",
" @property\n",
" def latest_release_number(self):\n",
" return self.latest_release_folder.rstrip('/').split(\"_\")[1]\n",
"\n",
" @property\n",
" def latest_index_label_url(self):\n",
" return URL(self.edr_url) / f\"{self.latest_release_folder}/INDEX/CUMINDEX.LBL\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0609991f-15c4-4b54-9d7e-b9b4e1163324",
"metadata": {},
"outputs": [],
"source": [
"lroc = LROCIndex()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "378723f4-8dc8-49e7-8165-5be0374b6f59",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'LROLRC_0056A/'"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lroc.latest_release_folder"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f1222bb-d3a4-4f78-ade7-ad705c145304",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'0056A'"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lroc.latest_release_number"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74d2908a-fd18-498a-8391-0b41026d5b9e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"URL('https://pds.lroc.asu.edu/data/LRO-L-LROC-2-EDR-V1.0/LROLRC_0056A/INDEX/CUMINDEX.LBL')"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lroc.latest_index_label_url"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2399c28d-2edd-4ff7-917c-0bfcc696e647",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion planetarypy/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.28.1"
__version__ = "0.30.0"
28 changes: 19 additions & 9 deletions planetarypy/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,10 @@
'planetarypy.ctx.CTX.preproc_cal_path': ('api/ctx.html#ctx.preproc_cal_path', 'planetarypy/ctx.py'),
'planetarypy.ctx.CTX.preproc_folder': ('api/ctx.html#ctx.preproc_folder', 'planetarypy/ctx.py'),
'planetarypy.ctx.CTX.proc_folder': ('api/ctx.html#ctx.proc_folder', 'planetarypy/ctx.py'),
'planetarypy.ctx.CTX.save_as_tif': ('api/ctx.html#ctx.save_as_tif', 'planetarypy/ctx.py'),
'planetarypy.ctx.CTX.spatial_summing': ('api/ctx.html#ctx.spatial_summing', 'planetarypy/ctx.py'),
'planetarypy.ctx.CTX.spice_init': ('api/ctx.html#ctx.spice_init', 'planetarypy/ctx.py'),
'planetarypy.ctx.CTX.tif_path': ('api/ctx.html#ctx.tif_path', 'planetarypy/ctx.py'),
'planetarypy.ctx.CTXCollection': ('api/ctx.html#ctxcollection', 'planetarypy/ctx.py'),
'planetarypy.ctx.CTXCollection.__init__': ('api/ctx.html#ctxcollection.__init__', 'planetarypy/ctx.py'),
'planetarypy.ctx.CTXCollection.__repr__': ('api/ctx.html#ctxcollection.__repr__', 'planetarypy/ctx.py'),
Expand Down Expand Up @@ -452,16 +454,16 @@
'planetarypy.pds.apps.get_index': ('api/pds.apps.html#get_index', 'planetarypy/pds/apps.py')},
'planetarypy.pds.ctx_index': { 'planetarypy.pds.ctx_index.CTXIndex': ( 'api/pds.ctx_index.html#ctxindex',
'planetarypy/pds/ctx_index.py'),
'planetarypy.pds.ctx_index.CTXIndex.__init__': ( 'api/pds.ctx_index.html#ctxindex.__init__',
'planetarypy/pds/ctx_index.py'),
'planetarypy.pds.ctx_index.CTXIndex.latest_index_label_url': ( 'api/pds.ctx_index.html#ctxindex.latest_index_label_url',
'planetarypy/pds/ctx_index.py'),
'planetarypy.pds.ctx_index.CTXIndex.latest_volume_url': ( 'api/pds.ctx_index.html#ctxindex.latest_volume_url',
'planetarypy/pds/ctx_index.py'),
'planetarypy.pds.ctx_index.CTXIndex.release_number': ( 'api/pds.ctx_index.html#ctxindex.release_number',
'planetarypy/pds/ctx_index.py'),
'planetarypy.pds.ctx_index.CTXIndex.release_url': ( 'api/pds.ctx_index.html#ctxindex.release_url',
'planetarypy/pds/ctx_index.py'),
'planetarypy.pds.ctx_index.CTXIndex.web_tables_list': ( 'api/pds.ctx_index.html#ctxindex.web_tables_list',
'planetarypy/pds/ctx_index.py')},
'planetarypy.pds.ctx_index.CTXIndex.latest_release_folder': ( 'api/pds.ctx_index.html#ctxindex.latest_release_folder',
'planetarypy/pds/ctx_index.py'),
'planetarypy.pds.ctx_index.CTXIndex.latest_release_number': ( 'api/pds.ctx_index.html#ctxindex.latest_release_number',
'planetarypy/pds/ctx_index.py'),
'planetarypy.pds.ctx_index.CTXIndex.volumes_table': ( 'api/pds.ctx_index.html#ctxindex.volumes_table',
'planetarypy/pds/ctx_index.py')},
'planetarypy.pds.indexes': { 'planetarypy.pds.indexes.Index': ('api/pds.indexes.html#index', 'planetarypy/pds/indexes.py'),
'planetarypy.pds.indexes.Index.__init__': ( 'api/pds.indexes.html#index.__init__',
'planetarypy/pds/indexes.py'),
Expand Down Expand Up @@ -524,7 +526,15 @@
'planetarypy.pds.lroc_index': { 'planetarypy.pds.lroc_index.LROCIndex': ( 'api/pds.lroc_index.html#lrocindex',
'planetarypy/pds/lroc_index.py'),
'planetarypy.pds.lroc_index.LROCIndex.__init__': ( 'api/pds.lroc_index.html#lrocindex.__init__',
'planetarypy/pds/lroc_index.py')},
'planetarypy/pds/lroc_index.py'),
'planetarypy.pds.lroc_index.LROCIndex.latest_index_label_url': ( 'api/pds.lroc_index.html#lrocindex.latest_index_label_url',
'planetarypy/pds/lroc_index.py'),
'planetarypy.pds.lroc_index.LROCIndex.latest_release_folder': ( 'api/pds.lroc_index.html#lrocindex.latest_release_folder',
'planetarypy/pds/lroc_index.py'),
'planetarypy.pds.lroc_index.LROCIndex.latest_release_number': ( 'api/pds.lroc_index.html#lrocindex.latest_release_number',
'planetarypy/pds/lroc_index.py'),
'planetarypy.pds.lroc_index.LROCIndex.volumes_table': ( 'api/pds.lroc_index.html#lrocindex.volumes_table',
'planetarypy/pds/lroc_index.py')},
'planetarypy.pds.opusapi': { 'planetarypy.pds.opusapi.OPUS': ('api/pds.opusapi.html#opus', 'planetarypy/pds/opusapi.py'),
'planetarypy.pds.opusapi.OPUS.__init__': ( 'api/pds.opusapi.html#opus.__init__',
'planetarypy/pds/opusapi.py'),
Expand Down
20 changes: 16 additions & 4 deletions planetarypy/ctx.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_edr_index(refresh=False):
cache['edrindex'] = edrindex
return edrindex

# %% ../notebooks/api/03_ctx.ipynb 8
# %% ../notebooks/api/03_ctx.ipynb 9
class CTXEDR:
"""Manage access to EDR data"""

Expand Down Expand Up @@ -146,7 +146,7 @@ def __str__(self):
def __repr__(self):
return self.__str__()

# %% ../notebooks/api/03_ctx.ipynb 30
# %% ../notebooks/api/03_ctx.ipynb 31
class CTX:
"""Class to manage dealing with CTX data.
Expand Down Expand Up @@ -342,6 +342,18 @@ def plot_calibrated(self):
"Plot the calibrated xarray using hvplot."
return self.plot_da(self.cal_da)

@property
def tif_path(self):
return self.proc_folder / self.map_name.with_suffix(".tif")

def save_as_tif(self, refresh=False):
if self.tif_path.is_file() and not refresh:
print("File exists. Use `refresh=True` to force recreation.")
return
ds = gdal.Open(str(self.map_path))
gdal.Translate(str(self.tif_path), ds, format="GTiff")
print("Saving", self.tif_path)

def __str__(self):
"Print out some infos about yourself."
s = self.edr.__str__()
Expand All @@ -354,7 +366,7 @@ def __str__(self):
def __repr__(self):
return self.__str__()

# %% ../notebooks/api/03_ctx.ipynb 59
# %% ../notebooks/api/03_ctx.ipynb 66
class CTXCollection:
"""Class with several helpful methods to work with a set of CTX images.
Expand Down Expand Up @@ -521,7 +533,7 @@ def __str__(self):
def __repr__(self):
return self.__str__()

# %% ../notebooks/api/03_ctx.ipynb 102
# %% ../notebooks/api/03_ctx.ipynb 109
@call_parse
def ctx_calib(
pid: str, # CTX product_id
Expand Down
78 changes: 15 additions & 63 deletions planetarypy/pds/ctx_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,81 +8,33 @@
from ssl import SSLError
from string import Template

from yarl import URL

import pandas as pd

# %% ../../notebooks/api/02b_pds.ctx_index.ipynb 4
@dataclass
class CTXIndex:
"""Class to determine the URL for the latest cumulative index.
url = 'https://planetarydata.jpl.nasa.gov/img/data/mro/mars_reconnaissance_orbiter/ctx/'

This is a 2 step process, where first the MRO release page is scraped
for the latest CTX release, and then the latest release page is scraped
for the latest volume.
From that latest volume the latest index URL is constructed.
"""
volumes_url: str = "https://pds-imaging.jpl.nasa.gov/volumes/mro.html"
release_url_template: Template = Template(
"https://pds-imaging.jpl.nasa.gov/volumes/mro/release${release}.html")
volume_url_template: Template = Template(
"https://pds-imaging.jpl.nasa.gov/data/mro/mars_reconnaissance_orbiter/ctx/mrox_${volume}/"
)
scraped_tables: bool = False
release_scraped: bool = False
def __init__(self):
self._volumes_table = None

@property
def web_tables_list(self):
"""Use the pandas scraper to read in the MRO data release table.
The scraper returns several tables in a list and the last one
lists all the CTX volumes.
This could be replaced by cached properties.
"""
if not self.scraped_tables:
try:
self._list_of_scraped_tables = pd.read_html(self.volumes_url)
except SSLError:
print(f"pd.read_html({self.volumes_url}) failed.")
self.scraped_tables = True
return self._list_of_scraped_tables
def volumes_table(self):
if self._volumes_table is None:
self._volumes_table = pd.read_html(self.url)[0].dropna(
how='all', axis=1).dropna(how='all', axis=0).iloc[1:, :-1]
return self._volumes_table

@property
def release_number(self):
"""Fishes out the release number.
This is needed to construct the exact URL to the latest cumulative index file.
"""
alist = self.web_tables_list
return alist[-1].iloc[-1, 0].split()[-1]
def latest_release_folder(self):
return self.volumes_table.iloc[-2, 0]

@property
def release_url(self):
"Constructs the release URL from the release number."
return self.release_url_template.substitute(release=self.release_number)

@property
def latest_volume_url(self):
"""Scrape the Release URL for the latest volume URL in that.
This is necessary because a release usually has more that one volume.
"""
if not self.release_scraped:
alist = pd.read_html(self.release_url)
# get last row of 4th table
row = alist[3].iloc[-1]
number = None
# first number that is NAN breaks the loop over last row of table
for elem in row.values:
try:
number = int(elem.split()[-1])
except AttributeError:
break
self.number = number
self.release_scraped = True
return self.volume_url_template.substitute(volume=self.number)
def latest_release_number(self):
return self.latest_release_folder.rstrip('/').split("_")[1]

@property
def latest_index_label_url(self):
"Construct the URL for the latest cumulative index."
return URL(self.url) / f"{self.latest_release_folder}/index/cumindex.lbl"

return self.latest_volume_url + "index/cumindex.lbl"
26 changes: 24 additions & 2 deletions planetarypy/pds/lroc_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,32 @@
__all__ = ['LROCIndex']

# %% ../../notebooks/api/02e_pds.lroc_index.ipynb 2
import pandas as pd
from yarl import URL
import warnings

# %% ../../notebooks/api/02e_pds.lroc_index.ipynb 4
# %% ../../notebooks/api/02e_pds.lroc_index.ipynb 5
class LROCIndex:
edr_url = 'https://pds.lroc.asu.edu/data/LRO-L-LROC-2-EDR-V1.0/'

def __init__(self):
warnings.warn("LROCIndex not implemented yet.")
self._volumes_table = None

@property
def volumes_table(self):
if self._volumes_table is None:
self._volumes_table = pd.read_html(self.edr_url)[0].dropna(
how='all', axis=1).dropna(how='all', axis=0).iloc[1:-1, :-1]
return self._volumes_table

@property
def latest_release_folder(self):
return self.volumes_table.iloc[-1, 0]

@property
def latest_release_number(self):
return self.latest_release_folder.rstrip('/').split("_")[1]

@property
def latest_index_label_url(self):
return URL(self.edr_url) / f"{self.latest_release_folder}/INDEX/CUMINDEX.LBL"
2 changes: 1 addition & 1 deletion settings.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ keywords = planetary science, data analysis
author = Michael Aye
author_email = [email protected]
copyright = K.-Michael Aye
version = 0.28.1
version = 0.30.0
min_python = 3.9
audience = Developers
language = English
Expand Down

0 comments on commit 77f97f3

Please sign in to comment.