Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create HIPS-style properties file #358

Merged
merged 8 commits into from
Sep 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@

import hats.pixel_math as hist
import hats.pixel_math.healpix_shim as hp
from hats.catalog import Catalog, PartitionInfo
from hats.catalog import Catalog, PartitionInfo, TableProperties
from hats.catalog.association_catalog.partition_join_info import PartitionJoinInfo
from hats.catalog.catalog_info import CatalogInfo
from hats.io.paths import pixel_catalog_files
from hats.pixel_math import HealpixPixel
from hats.pixel_tree import PixelAlignment, align_trees
Expand All @@ -29,7 +28,7 @@ def time_test_alignment_even_sky():

def time_test_cone_filter_multiple_order():
"""Create a catalog cone filter where we have multiple orders in the catalog"""
catalog_info = CatalogInfo(
catalog_info = TableProperties(
**{
"catalog_name": "test_name",
"catalog_type": "object",
Expand Down
2 changes: 1 addition & 1 deletion docs/guide/directory_scheme.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ structure:
__ /path/to/catalogs/<catalog_name>/
|__ _common_metadata
|__ _metadata
|__ catalog_info.json
|__ partition_info.csv
|__ properties
|__ Norder=1/
| |__ Dir=0/
| |__ Npix=0.parquet
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ dependencies = [
"astropy",
"fsspec>=2023.10.0", # Used for abstract filesystems
"healpy",
"jproperties",
"matplotlib>=3.3,<3.9",
"mocpy",
"numba>=0.58",
"numpy<3",
"pandas",
"pyarrow>=14.0.1",
"pydantic",
"typing-extensions>=4.3.0",
"universal-pathlib",
]
Expand Down
3 changes: 2 additions & 1 deletion src/hats/catalog/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Catalog data wrappers"""

from .association_catalog.association_catalog import AssociationCatalog
from .association_catalog import AssociationCatalog
from .catalog import Catalog
from .catalog_type import CatalogType
from .dataset.dataset import Dataset
from .dataset.table_properties import TableProperties
from .margin_cache.margin_catalog import MarginCatalog
from .partition_info import PartitionInfo
1 change: 0 additions & 1 deletion src/hats/catalog/association_catalog/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
from .association_catalog import AssociationCatalog
from .association_catalog_info import AssociationCatalogInfo
from .partition_join_info import PartitionJoinInfo
15 changes: 3 additions & 12 deletions src/hats/catalog/association_catalog/association_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@
import pandas as pd
import pyarrow as pa
from mocpy import MOC
from typing_extensions import TypeAlias
from upath import UPath

from hats.catalog.association_catalog.association_catalog_info import AssociationCatalogInfo
from hats.catalog.association_catalog.partition_join_info import PartitionJoinInfo
from hats.catalog.catalog_type import CatalogType
from hats.catalog.dataset.table_properties import TableProperties
from hats.catalog.healpix_dataset.healpix_dataset import HealpixDataset, PixelInputTypes
from hats.io import file_io, paths

Expand All @@ -24,24 +22,17 @@ class AssociationCatalog(HealpixDataset):
Catalog, corresponding to each pair of partitions in each catalog that contain rows to join.
"""

# Update CatalogInfoClass, used to check if the catalog_info is the correct type, and
# set the catalog info to the correct type
CatalogInfoClass: TypeAlias = AssociationCatalogInfo
catalog_info: CatalogInfoClass

JoinPixelInputTypes = Union[list, pd.DataFrame, PartitionJoinInfo]

def __init__(
self,
catalog_info: CatalogInfoClass,
catalog_info: TableProperties,
pixels: PixelInputTypes,
join_pixels: JoinPixelInputTypes,
catalog_path=None,
moc: MOC | None = None,
schema: pa.Schema | None = None,
) -> None:
if not catalog_info.catalog_type == CatalogType.ASSOCIATION:
raise ValueError("Catalog info `catalog_type` must be 'association'")
super().__init__(catalog_info, pixels, catalog_path, moc=moc, schema=schema)
self.join_info = self._get_partition_join_info_from_pixels(join_pixels)

Expand All @@ -67,7 +58,7 @@ def _get_partition_join_info_from_pixels(
@classmethod
def _read_args(
cls, catalog_base_dir: str | Path | UPath
) -> Tuple[CatalogInfoClass, PixelInputTypes, JoinPixelInputTypes]: # type: ignore[override]
) -> Tuple[TableProperties, PixelInputTypes, JoinPixelInputTypes]: # type: ignore[override]
args = super()._read_args(catalog_base_dir)
partition_join_info = PartitionJoinInfo.read_from_dir(catalog_base_dir)
return args + (partition_join_info,)
Expand Down
43 changes: 0 additions & 43 deletions src/hats/catalog/association_catalog/association_catalog_info.py

This file was deleted.

4 changes: 2 additions & 2 deletions src/hats/catalog/association_catalog/partition_join_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def primary_to_join_map(self) -> Dict[HealpixPixel, List[HealpixPixel]]:
return primary_to_join

def write_to_metadata_files(self, catalog_path: str | Path | UPath | None = None):
"""Generate parquet metadata, using the known partitions.
"""Generate parquet metadata, using the known joint partitions.

Args:
catalog_path (UPath): base path for the catalog
Expand All @@ -86,7 +86,7 @@ def write_to_metadata_files(self, catalog_path: str | Path | UPath | None = None
"""
if catalog_path is None:
if self.catalog_base_dir is None:
raise ValueError("catalog_path is required if info was not loaded from a directory")
raise ValueError("catalog_path is required if join info was not loaded from a directory")
catalog_path = self.catalog_base_dir

batches = [
Expand Down
48 changes: 1 addition & 47 deletions src/hats/catalog/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,12 @@

from __future__ import annotations

from pathlib import Path
from typing import List, Tuple

import numpy as np
import pyarrow as pa
from mocpy import MOC
from typing_extensions import TypeAlias
from upath import UPath

import hats.pixel_math.healpix_shim as hp
from hats.catalog.catalog_info import CatalogInfo
from hats.catalog.catalog_type import CatalogType
from hats.catalog.healpix_dataset.healpix_dataset import HealpixDataset, PixelInputTypes
from hats.catalog.healpix_dataset.healpix_dataset import HealpixDataset
from hats.pixel_math import HealpixPixel
from hats.pixel_math.box_filter import generate_box_moc, wrap_ra_angles
from hats.pixel_math.cone_filter import generate_cone_moc
Expand All @@ -36,45 +29,6 @@ class Catalog(HealpixDataset):
`Norder=/Dir=/Npix=.parquet`
"""

HIPS_CATALOG_TYPES = [CatalogType.OBJECT, CatalogType.SOURCE]

# Update CatalogInfoClass, used to check if the catalog_info is the correct type, and
# set the catalog info to the correct type
CatalogInfoClass: TypeAlias = CatalogInfo
catalog_info: CatalogInfoClass

def __init__(
self,
catalog_info: CatalogInfoClass,
pixels: PixelInputTypes,
catalog_path: str | Path | UPath | None = None,
moc: MOC | None = None,
schema: pa.Schema | None = None,
) -> None:
"""Initializes a Catalog

Args:
catalog_info: CatalogInfo object with catalog metadata
pixels: Specifies the pixels contained in the catalog. Can be either a
list of HealpixPixel, `PartitionInfo object`, or a `PixelTree` object
catalog_path: If the catalog is stored on disk, specify the location of the catalog
Does not load the catalog from this path, only store as metadata
moc (mocpy.MOC): MOC object representing the coverage of the catalog
schema (pa.Schema): The pyarrow schema for the catalog
"""
if catalog_info.catalog_type not in self.HIPS_CATALOG_TYPES:
raise ValueError(
f"Catalog info `catalog_type` must be one of "
f"{', '.join([t.value for t in self.HIPS_CATALOG_TYPES])}"
)
super().__init__(
catalog_info,
pixels,
catalog_path=catalog_path,
moc=moc,
schema=schema,
)

def filter_by_cone(self, ra: float, dec: float, radius_arcsec: float) -> Catalog:
"""Filter the pixels in the catalog to only include the pixels that overlap with a cone

Expand Down
21 changes: 0 additions & 21 deletions src/hats/catalog/catalog_info.py

This file was deleted.

1 change: 0 additions & 1 deletion src/hats/catalog/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
from .base_catalog_info import BaseCatalogInfo
from .dataset import Dataset
69 changes: 0 additions & 69 deletions src/hats/catalog/dataset/base_catalog_info.py

This file was deleted.

Loading