Skip to content

Commit

Permalink
Merge pull request #113 from astronomy-commons/sean/cone-search
Browse files Browse the repository at this point in the history
Add Cone Search Catalog Filter
  • Loading branch information
smcguire-cmu authored Jul 17, 2023
2 parents 550021f + 05d1cd4 commit 6125aa8
Show file tree
Hide file tree
Showing 9 changed files with 230 additions and 98 deletions.
102 changes: 88 additions & 14 deletions docs/notebooks/cone_search.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/.pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ good-names=i,
k,
ex,
Run,
ra,
_

# Good variable names regexes, separated by a comma. If names match any regex,
Expand Down
10 changes: 3 additions & 7 deletions src/hipscat/catalog/association_catalog/association_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@

import pandas as pd

from hipscat.catalog.catalog_type import CatalogType
from hipscat.catalog.association_catalog.association_catalog_info import (
AssociationCatalogInfo,
)
from hipscat.catalog.association_catalog.association_catalog_info import AssociationCatalogInfo
from hipscat.catalog.association_catalog.partition_join_info import PartitionJoinInfo
from hipscat.catalog.catalog_type import CatalogType
from hipscat.catalog.dataset.dataset import Dataset
from hipscat.io import FilePointer, paths

Expand Down Expand Up @@ -59,8 +57,6 @@ def _read_args(
cls, catalog_base_dir: FilePointer
) -> Tuple[CatalogInfoClass, JoinPixelInputTypes]:
args = super()._read_args(catalog_base_dir)
partition_join_info_file = paths.get_partition_join_info_pointer(
catalog_base_dir
)
partition_join_info_file = paths.get_partition_join_info_pointer(catalog_base_dir)
partition_join_info = PartitionJoinInfo.read_from_file(partition_join_info_file)
return args + (partition_join_info,)
24 changes: 21 additions & 3 deletions src/hipscat/catalog/catalog.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Container class to hold catalog metadata and partition iteration"""
from __future__ import annotations

import dataclasses
from typing import Tuple, Union

import pandas as pd
Expand All @@ -10,6 +11,7 @@
from hipscat.catalog.dataset.dataset import Dataset
from hipscat.catalog.partition_info import PartitionInfo
from hipscat.io import FilePointer, file_io, paths
from hipscat.pixel_math.cone_filter import filter_pixels_by_cone
from hipscat.pixel_tree.pixel_tree import PixelTree
from hipscat.pixel_tree.pixel_tree_builder import PixelTreeBuilder

Expand Down Expand Up @@ -82,9 +84,7 @@ def get_pixels(self):
return self.partition_info.data_frame

@classmethod
def _read_args(
cls, catalog_base_dir: FilePointer
) -> Tuple[CatalogInfoClass, PartitionInfo]:
def _read_args(cls, catalog_base_dir: FilePointer) -> Tuple[CatalogInfoClass, PartitionInfo]:
args = super()._read_args(catalog_base_dir)
partition_info_file = paths.get_partition_info_pointer(catalog_base_dir)
partition_info = PartitionInfo.read_from_file(partition_info_file)
Expand All @@ -96,3 +96,21 @@ def _check_files_exist(cls, catalog_base_dir: FilePointer):
partition_info_file = paths.get_partition_info_pointer(catalog_base_dir)
if not file_io.does_file_or_directory_exist(partition_info_file):
raise FileNotFoundError(f"No partition info found where expected: {str(partition_info_file)}")

def filter_by_cone(self, ra: float, dec: float, radius: float) -> Catalog:
"""Filter the pixels in the catalog to only include the pixels that overlap with a cone
Args:
ra (float): Right Ascension of the center of the cone in degrees
dec (float): Declination of the center of the cone in degrees
radius (float): Radius of the cone in degrees
Returns:
A new catalog with only the pixels that overlap with the specified cone
"""
filtered_cone_pixels = filter_pixels_by_cone(self.pixel_tree, ra, dec, radius)
filtered_catalog_info = dataclasses.replace(
self.catalog_info,
total_rows=None,
)
return Catalog(filtered_catalog_info, filtered_cone_pixels)
51 changes: 51 additions & 0 deletions src/hipscat/pixel_math/cone_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import healpy as hp
import numpy as np
import pandas as pd

from hipscat.catalog.partition_info import PartitionInfo
from hipscat.pixel_tree import PixelAlignment, PixelAlignmentType, align_trees
from hipscat.pixel_tree.pixel_tree import PixelTree
from hipscat.pixel_tree.pixel_tree_builder import PixelTreeBuilder


def filter_pixels_by_cone(pixel_tree: PixelTree, ra: float, dec: float, radius: float) -> PixelTree:
"""Filter the leaf pixels in a pixel tree to return a partition_info dataframe with the pixels
that overlap with a cone
Args:
ra (float): Right Ascension of the center of the cone in degrees
dec (float): Declination of the center of the cone in degrees
radius (float): Radius of the cone in degrees
Returns:
A catalog_info dataframe with only the pixels that overlap with the specified cone
"""
max_order = max(pixel_tree.pixels.keys())
cone_tree = _generate_cone_pixel_tree(ra, dec, radius, max_order)
cone_alignment = align_trees(pixel_tree, cone_tree, alignment_type=PixelAlignmentType.INNER)
pixels_df = cone_alignment.pixel_mapping[
[PixelAlignment.PRIMARY_ORDER_COLUMN_NAME, PixelAlignment.PRIMARY_PIXEL_COLUMN_NAME]
]
filtered_pixels_df = pixels_df.drop_duplicates()
partition_info_df = filtered_pixels_df.rename(
columns={
PixelAlignment.PRIMARY_ORDER_COLUMN_NAME: PartitionInfo.METADATA_ORDER_COLUMN_NAME,
PixelAlignment.PRIMARY_PIXEL_COLUMN_NAME: PartitionInfo.METADATA_PIXEL_COLUMN_NAME,
}
)
return partition_info_df.reset_index(drop=True)


def _generate_cone_pixel_tree(ra: float, dec: float, radius: float, order: int):
"""Generates a pixel_tree filled with leaf nodes at a given order that overlap with a cone"""
n_side = hp.order2nside(order)
center_vec = hp.ang2vec(ra, dec, lonlat=True)
radius_radians = np.radians(radius)
cone_pixels = hp.query_disc(n_side, center_vec, radius_radians, inclusive=True, nest=True)
cone_pixel_info_dict = {
PartitionInfo.METADATA_ORDER_COLUMN_NAME: np.full(len(cone_pixels), order),
PartitionInfo.METADATA_PIXEL_COLUMN_NAME: cone_pixels,
}
cone_partition_info_df = pd.DataFrame.from_dict(cone_pixel_info_dict)
cone_tree = PixelTreeBuilder.from_partition_info_df(cone_partition_info_df)
return cone_tree
74 changes: 20 additions & 54 deletions src/hipscat/pixel_tree/pixel_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from hipscat.pixel_tree.pixel_tree import PixelTree
from hipscat.pixel_tree.pixel_tree_builder import PixelTreeBuilder


LEFT_TREE_KEY = "left"
RIGHT_TREE_KEY = "right"

Expand Down Expand Up @@ -84,9 +83,7 @@ def align_trees(
"""

tree_builder = PixelTreeBuilder()
pixels_to_search = _get_children_pixels_from_trees(
[left, right], left.root_pixel.pixel
)
pixels_to_search = _get_children_pixels_from_trees([left, right], left.root_pixel.pixel)

while len(pixels_to_search) > 0:
search_pixel = pixels_to_search.pop(0)
Expand All @@ -96,14 +93,10 @@ def align_trees(
if left_node.node_type == right_node.node_type:
if left_node.node_type == PixelNodeType.LEAF:
# Matching leaf nodes get added to the aligned tree
tree_builder.create_node_and_parent_if_not_exist(
search_pixel, PixelNodeType.LEAF
)
tree_builder.create_node_and_parent_if_not_exist(search_pixel, PixelNodeType.LEAF)
else:
# For matching inner nodes search into their children to check for alignment
pixels_to_search += _get_children_pixels_from_trees(
[left, right], search_pixel
)
pixels_to_search += _get_children_pixels_from_trees([left, right], search_pixel)
else:
# Nodes with non-matching types: one must be a leaf and the other an inner node
if left_node.node_type == PixelNodeType.LEAF:
Expand All @@ -112,45 +105,33 @@ def align_trees(
else:
tree_with_leaf_node = RIGHT_TREE_KEY
inner_node = left_node
if _should_include_all_pixels_from_tree(
tree_with_leaf_node, alignment_type
):
if _should_include_all_pixels_from_tree(tree_with_leaf_node, alignment_type):
# If the alignment type means fully covering the tree with the leaf node, then
# create a leaf node in the aligned tree and split it to match the partitioning
# of the other tree to ensure the node is fully covered
tree_builder.create_node_and_parent_if_not_exist(
search_pixel, PixelNodeType.LEAF
)
tree_builder.create_node_and_parent_if_not_exist(search_pixel, PixelNodeType.LEAF)
tree_builder.split_leaf_to_match_partitioning(inner_node)
else:
# Otherwise just add the subtree from the inner node to include all the
# overlapping pixels
tree_builder.create_node_and_parent_if_not_exist(
search_pixel, PixelNodeType.INNER
)
tree_builder.create_node_and_parent_if_not_exist(search_pixel, PixelNodeType.INNER)
tree_builder.add_all_descendants_from_node(inner_node)
elif search_pixel in left and search_pixel not in right:
# For nodes that only exist in one tree, include them if the alignment type means that
# tree should have all its nodes included
if _should_include_all_pixels_from_tree(LEFT_TREE_KEY, alignment_type):
tree_builder.create_node_and_parent_if_not_exist(
search_pixel, left[search_pixel].node_type
)
tree_builder.create_node_and_parent_if_not_exist(search_pixel, left[search_pixel].node_type)
tree_builder.add_all_descendants_from_node(left[search_pixel])
elif search_pixel in right and search_pixel not in left:
if _should_include_all_pixels_from_tree(RIGHT_TREE_KEY, alignment_type):
tree_builder.create_node_and_parent_if_not_exist(
search_pixel, right[search_pixel].node_type
)
tree_builder.create_node_and_parent_if_not_exist(search_pixel, right[search_pixel].node_type)
tree_builder.add_all_descendants_from_node(right[search_pixel])
tree = tree_builder.build()
pixel_mapping = _generate_pixel_mapping_from_tree(left, right, tree)
return PixelAlignment(tree, pixel_mapping, alignment_type)


def _get_children_pixels_from_trees(
trees: List[PixelTree], pixel: HealpixInputTypes
) -> List[HealpixPixel]:
def _get_children_pixels_from_trees(trees: List[PixelTree], pixel: HealpixInputTypes) -> List[HealpixPixel]:
"""Returns the combined HEALPix pixels that have child nodes of the given pixel from trees
This returns a list of HEALPix pixels, not the actual child nodes, and does not contain
Expand All @@ -173,9 +154,7 @@ def _get_children_pixels_from_trees(
return list(pixels_to_add)


def _should_include_all_pixels_from_tree(
tree_type: str, alignment_type: PixelAlignmentType
) -> bool:
def _should_include_all_pixels_from_tree(tree_type: str, alignment_type: PixelAlignmentType) -> bool:
"""If for a given alignment type, the left or right tree should include all pixels or just the
ones that overlap with the other tree.
Expand All @@ -188,13 +167,12 @@ def _should_include_all_pixels_from_tree(
"""
left_add_types = [PixelAlignmentType.OUTER, PixelAlignmentType.LEFT]
right_add_types = [PixelAlignmentType.OUTER, PixelAlignmentType.RIGHT]
return (tree_type == LEFT_TREE_KEY and alignment_type in left_add_types) or \
(tree_type == RIGHT_TREE_KEY and alignment_type in right_add_types)
return (tree_type == LEFT_TREE_KEY and alignment_type in left_add_types) or (
tree_type == RIGHT_TREE_KEY and alignment_type in right_add_types
)


def _generate_pixel_mapping_from_tree(
left: PixelTree, right: PixelTree, aligned: PixelTree
) -> pd.DataFrame:
def _generate_pixel_mapping_from_tree(left: PixelTree, right: PixelTree, aligned: PixelTree) -> pd.DataFrame:
"""Generates a pixel mapping dataframe from two trees and their aligned tree
The pixel mapping dataframe contains columns for the order and pixel of overlapping pixels in
Expand Down Expand Up @@ -226,27 +204,15 @@ def _generate_pixel_mapping_from_tree(
right_leaf_nodes = [None]
for left_node in left_leaf_nodes:
for right_node in right_leaf_nodes:
pixel_mapping_dict[PixelAlignment.ALIGNED_ORDER_COLUMN_NAME].append(
leaf_node.hp_order
)
pixel_mapping_dict[PixelAlignment.ALIGNED_PIXEL_COLUMN_NAME].append(
leaf_node.hp_pixel
)
pixel_mapping_dict[PixelAlignment.ALIGNED_ORDER_COLUMN_NAME].append(leaf_node.hp_order)
pixel_mapping_dict[PixelAlignment.ALIGNED_PIXEL_COLUMN_NAME].append(leaf_node.hp_pixel)
left_order = left_node.hp_order if left_node is not None else None
left_pixel = left_node.hp_pixel if left_node is not None else None
pixel_mapping_dict[PixelAlignment.PRIMARY_ORDER_COLUMN_NAME].append(
left_order
)
pixel_mapping_dict[PixelAlignment.PRIMARY_PIXEL_COLUMN_NAME].append(
left_pixel
)
pixel_mapping_dict[PixelAlignment.PRIMARY_ORDER_COLUMN_NAME].append(left_order)
pixel_mapping_dict[PixelAlignment.PRIMARY_PIXEL_COLUMN_NAME].append(left_pixel)
right_order = right_node.hp_order if right_node is not None else None
right_pixel = right_node.hp_pixel if right_node is not None else None
pixel_mapping_dict[PixelAlignment.JOIN_ORDER_COLUMN_NAME].append(
right_order
)
pixel_mapping_dict[PixelAlignment.JOIN_PIXEL_COLUMN_NAME].append(
right_pixel
)
pixel_mapping_dict[PixelAlignment.JOIN_ORDER_COLUMN_NAME].append(right_order)
pixel_mapping_dict[PixelAlignment.JOIN_PIXEL_COLUMN_NAME].append(right_pixel)
pixel_mapping = pd.DataFrame.from_dict(pixel_mapping_dict)
return pixel_mapping
8 changes: 2 additions & 6 deletions src/hipscat/pixel_tree/pixel_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,7 @@ def get_node(self, pixel: HealpixInputTypes) -> PixelNode | None:
def __getitem__(self, item):
return self.get_node(item)

def get_leaf_nodes_at_healpix_pixel(
self, pixel: HealpixInputTypes
) -> List[PixelNode]:
def get_leaf_nodes_at_healpix_pixel(self, pixel: HealpixInputTypes) -> List[PixelNode]:
"""Lookup all leaf nodes that contain or are within a given HEALPix pixel
- Exact matches will return a list with only the matching pixel
Expand Down Expand Up @@ -111,9 +109,7 @@ def get_leaf_nodes_at_healpix_pixel(
return []
return [node_in_tree]

def _find_first_lower_order_leaf_node_in_tree(
self, pixel: HealpixInputTypes
) -> PixelNode | None:
def _find_first_lower_order_leaf_node_in_tree(self, pixel: HealpixInputTypes) -> PixelNode | None:
pixel = get_healpix_pixel(pixel)
for delta_order in range(1, pixel.order + 1):
lower_pixel = pixel.convert_to_lower_order(delta_order)
Expand Down
18 changes: 4 additions & 14 deletions src/hipscat/pixel_tree/pixel_tree_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@

from hipscat.catalog.partition_info import PartitionInfo
from hipscat.pixel_math import HealpixPixel
from hipscat.pixel_math.healpix_pixel_convertor import (
HealpixInputTypes,
get_healpix_pixel,
)
from hipscat.pixel_math.healpix_pixel_convertor import HealpixInputTypes, get_healpix_pixel
from hipscat.pixel_tree.pixel_node import PixelNode
from hipscat.pixel_tree.pixel_node_type import PixelNodeType
from hipscat.pixel_tree.pixel_tree import PixelTree
Expand Down Expand Up @@ -174,9 +171,7 @@ def create_node(
node_to_replace = None
if pixel in self:
if not replace_existing_node:
raise ValueError(
f"Cannot create node at {str(pixel)}, node already exists"
)
raise ValueError(f"Cannot create node at {str(pixel)}, node already exists")
node_to_replace = self[pixel]
parent.remove_child_link(node_to_replace)
node = PixelNode(pixel, node_type, parent)
Expand All @@ -192,7 +187,6 @@ def create_node(
for child in node_to_replace.children:
self._remove_node_and_children_from_tree(child.pixel)


def remove_node(self, pixel: HealpixInputTypes):
"""Remove node in tree
Expand Down Expand Up @@ -263,9 +257,5 @@ def split_leaf_to_match_partitioning(self, node_to_match: PixelNode):
parent_node = self[node.parent.pixel]
if parent_node.node_type == PixelNodeType.LEAF:
parent_node.node_type = PixelNodeType.INNER
for child_pixel in parent_node.pixel.convert_to_higher_order(
delta_order=1
):
self.create_node(
child_pixel, PixelNodeType.LEAF, parent=parent_node
)
for child_pixel in parent_node.pixel.convert_to_higher_order(delta_order=1):
self.create_node(child_pixel, PixelNodeType.LEAF, parent=parent_node)
Loading

0 comments on commit 6125aa8

Please sign in to comment.