Skip to content

Commit

Permalink
refactor: remove type_id from search_content API
Browse files Browse the repository at this point in the history
There's no need for type_id to be a separate argument to this API;
we've already documented content_type_id as a field on our Unit
classes, so let's make it possible to search for that field using
a Criteria, just like other documented fields.

Although this costs some extra code to deal with _content_type_id
internally, this is likely worthwhile due to:

- more consistent API; callers filter on content_type_id like any
  other field, rather than having to remember that type_id is
  passed as a regular argument while all other fields are passed
  via criteria.

- fits better with some upcoming proposed features;
  a Criteria.for_units(...) has been proposed as a method of
  searching for units matching given instances of Unit classes.
  It would not make sense to force a caller to pass a type_id in
  that case, when the given Units already know their own type_id.

Note also that validate_type_ids was removed, because it's OK to
omit type_id altogether (which means search for all types), and
it's OK to search for types not currently known by this library
(which will be returned as just plain Unit instances).
  • Loading branch information
rohanpm committed Jan 7, 2020
1 parent 618425f commit 5442586
Show file tree
Hide file tree
Showing 7 changed files with 229 additions and 87 deletions.
6 changes: 4 additions & 2 deletions examples/search-repo-content
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import os
import logging
from argparse import ArgumentParser

from pubtools.pulplib import Client
from pubtools.pulplib import Client, Criteria

log = logging.getLogger("search-repo-content")

Expand Down Expand Up @@ -47,7 +47,9 @@ def main():

client = make_client(p)
repository = client.get_repository(p.repo_id)
units = repository.search_content(type_id=p.content_type)
units = repository.search_content(
Criteria.with_field("content_type_id", p.content_type)
)

count = 0
for unit in units:
Expand Down
39 changes: 27 additions & 12 deletions pubtools/pulplib/_impl/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ..page import Page
from ..criteria import Criteria
from ..model import Repository, MaintenanceReport, Distributor, Unit
from .search import filters_for_criteria, validate_type_ids
from .search import search_for_criteria
from .errors import PulpException
from .poller import TaskPoller
from . import retry
Expand Down Expand Up @@ -222,22 +222,37 @@ def search_distributor(self, criteria=None):
"""
return self._search(Distributor, "distributors", criteria=criteria)

def _search(self, return_type, resource_type, criteria=None, search_options=None):
url = os.path.join(self._url, "pulp/api/v2/%s/search/" % resource_type)
filtered_crit = filters_for_criteria(criteria, return_type)
type_ids = search_options.pop("type_ids", None) if search_options else None
type_ids = {"type_ids": validate_type_ids(type_ids)} if type_ids else {}

if return_type is Unit and type_ids:
url = os.path.join(url, "units/")
filtered_crit = {"unit": filtered_crit} if filtered_crit else {}
def _search(
self,
return_type,
resource_type,
search_type="search",
search_options=None,
criteria=None,
):
url = os.path.join(
self._url, "pulp/api/v2/%s/%s/" % (resource_type, search_type)
)
prepared_search = search_for_criteria(criteria, return_type)

search = {
"criteria": {"skip": 0, "limit": self._PAGE_SIZE, "filters": filtered_crit}
"criteria": {
"skip": 0,
"limit": self._PAGE_SIZE,
"filters": prepared_search.filters,
}
}
search["criteria"].update(type_ids)
search.update(search_options or {})

if search_type == "search/units":
# Unit searches need a little special handling:
# - serialization might have extracted some type_ids
# - filters should be wrapped under 'unit'
# (we do not support searching on associations right now)
if prepared_search.type_ids:
search["criteria"]["type_ids"] = prepared_search.type_ids
search["criteria"]["filters"] = {"unit": search["criteria"]["filters"]}

response_f = self._do_search(url, search)

# When this request is resolved, we'll have the first page of data.
Expand Down
139 changes: 104 additions & 35 deletions pubtools/pulplib/_impl/client/search.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import datetime
import contextlib
from pubtools.pulplib._impl.criteria import (
AndCriteria,
OrCriteria,
Expand All @@ -14,7 +15,7 @@

from pubtools.pulplib._impl import compat_attr as attr
from pubtools.pulplib._impl.model.attr import PULP2_FIELD, PY_PULP2_CONVERTER
from pubtools.pulplib._impl.model.unit import SUPPORTED_UNIT_TYPES
from pubtools.pulplib._impl.model.unit.base import Unit

LOG = logging.getLogger("pubtools.pulplib")

Expand Down Expand Up @@ -71,33 +72,126 @@ def map_field_for_type(field_name, matcher, type_hint):
return None


def filters_for_criteria(criteria, type_hint=None):
# convert a Criteria object to a filters dict as used in the Pulp 2.x API:
@attr.s
class PulpSearch(object):
# Helper class representing a prepared Pulp search.
# Usually just a filters dict, but may include type_ids
# for unit searches.
filters = attr.ib(type=dict)
type_ids = attr.ib(type=list, default=None)


class TypeIdAccumulator(object):
def __init__(self):
self.can_accumulate = True
self.values = []

def accumulate_from_match(self, match_expr):
# Are we still in a state where accumulation is possible?
if not self.can_accumulate:
raise ValueError(
(
"Can't serialize criteria for Pulp query; too complicated. "
"Try simplifying the query with respect to content_type_id."
)
)

# OK, we can accumulate if it's a supported expression type.
if isinstance(match_expr, dict) and list(match_expr.keys()) == ["$eq"]:
self.values = [match_expr["$eq"]]
elif isinstance(match_expr, dict) and list(match_expr.keys()) == ["$in"]:
self.values = match_expr["$in"]
else:
raise ValueError(
(
"Can't serialize criteria for Pulp query, "
"unsupported expression for content_type_id: %s\n"
)
% repr(match_expr)
)

# It is only possible to accumulate once.
self.can_accumulate = False

@property
@contextlib.contextmanager
def no_accumulate(self):
old_can_accumulate = self.can_accumulate
self.can_accumulate = False
try:
yield
finally:
self.can_accumulate = old_can_accumulate


def search_for_criteria(criteria, type_hint=None, type_ids_accum=None):
# convert a Criteria object to a PulpSearch with filters as used in the Pulp 2.x API:
# https://docs.pulpproject.org/dev-guide/conventions/criteria.html#search-criteria
#
# type_hint optionally provides the class expected to be found by this search.
# This can impact the application of certain criteria, e.g. it will affect
# field mappings looked up by FieldMatchCriteria.
if criteria is None or isinstance(criteria, TrueCriteria):
return {}
return PulpSearch(filters={})

type_ids_accum = type_ids_accum or TypeIdAccumulator()

if isinstance(criteria, AndCriteria):
return {"$and": [filters_for_criteria(c) for c in criteria._operands]}
clauses = [
search_for_criteria(c, type_hint, type_ids_accum).filters
for c in criteria._operands
]

if isinstance(criteria, OrCriteria):
return {"$or": [filters_for_criteria(c) for c in criteria._operands]}
# Empty filters do not affect the result and can be simplified.
clauses = [c for c in clauses if c != {}]

if isinstance(criteria, FieldMatchCriteria):
# A single clause is the same as no $and at all.
if len(clauses) == 1:
filters = clauses[0]
else:
filters = {"$and": clauses}

elif isinstance(criteria, OrCriteria):
with type_ids_accum.no_accumulate:
filters = {
"$or": [
search_for_criteria(c, type_hint, type_ids_accum).filters
for c in criteria._operands
]
}

elif isinstance(criteria, FieldMatchCriteria):
field = criteria._field
matcher = criteria._matcher

mapped = map_field_for_type(field, matcher, type_hint)
if mapped:
field, matcher = mapped

return {field: field_match(matcher)}
match_expr = field_match(matcher)

# If we are looking at the special _content_type_id field
# for the purpose of a unit search...
if field == "_content_type_id" and type_hint is Unit:
# We should not include this into filters, but instead
# attempt to accumulate it into type_ids_out.
# This is because type_ids needs to be serialized into the
# top-level 'criteria' and not 'filters', and there are
# additional restrictions on its usage.
type_ids_accum.accumulate_from_match(match_expr)

filters = {}
else:
filters = {field: match_expr}

else:
raise TypeError("Not a criteria: %s" % repr(criteria))

return PulpSearch(filters=filters, type_ids=type_ids_accum.values[:])


raise TypeError("Not a criteria: %s" % repr(criteria))
def filters_for_criteria(criteria, type_hint=None):
return search_for_criteria(criteria, type_hint).filters


def field_match(to_match):
Expand All @@ -117,28 +211,3 @@ def field_match(to_match):
return {"$lt": to_mongo_json(to_match._value)}

raise TypeError("Not a matcher: %s" % repr(to_match))


def validate_type_ids(type_ids):
valid_type_ids = []
invalid_type_ids = []

if isinstance(type_ids, str):
type_ids = [type_ids]

if not isinstance(type_ids, (list, tuple)):
raise TypeError("Expected str, list, or tuple, got %s" % type(type_ids))

for type_id in type_ids:
if type_id in SUPPORTED_UNIT_TYPES:
valid_type_ids.append(type_id)
else:
invalid_type_ids.append(type_id)

if invalid_type_ids:
LOG.error("Invalid content type ID(s): \n\t%s", ", ".join(invalid_type_ids))

if valid_type_ids:
return valid_type_ids

raise ValueError("Must provide valid content type ID(s)")
6 changes: 3 additions & 3 deletions pubtools/pulplib/_impl/fake/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
Distributor,
MaintenanceReport,
)
from pubtools.pulplib._impl.client.search import filters_for_criteria
from pubtools.pulplib._impl.client.search import search_for_criteria
from .. import compat_attr as attr

from .match import match_object
Expand Down Expand Up @@ -76,7 +76,7 @@ def search_repository(self, criteria=None):
# we're not accessing a real Pulp server. The point is to ensure the
# same validation and error behavior as used by the real client also
# applies to the fake.
filters_for_criteria(criteria, Repository)
search_for_criteria(criteria, Repository)

try:
for repo in self._repositories:
Expand All @@ -94,7 +94,7 @@ def search_distributor(self, criteria=None):
criteria = criteria or Criteria.true()
distributors = []

filters_for_criteria(criteria, Distributor)
search_for_criteria(criteria, Distributor)

try:
for repo in self._repositories:
Expand Down
26 changes: 15 additions & 11 deletions pubtools/pulplib/_impl/model/repository/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from ..attr import pulp_attrib
from ..distributor import Distributor
from ..frozenlist import FrozenList
from ..unit import Unit
from ...criteria import Criteria
from ...schema import load_schema
from ... import compat_attr as attr

Expand Down Expand Up @@ -202,7 +202,7 @@ def file_content(self):
.. versionadded:: 2.4.0
"""
return list(self.search_content("iso"))
return list(self.search_content(Criteria.with_field("content_type_id", "iso")))

@property
def rpm_content(self):
Expand All @@ -213,7 +213,7 @@ def rpm_content(self):
.. versionadded:: 2.4.0
"""
return list(self.search_content("rpm"))
return list(self.search_content(Criteria.with_field("content_type_id", "rpm")))

@property
def srpm_content(self):
Expand All @@ -224,7 +224,7 @@ def srpm_content(self):
.. versionadded:: 2.4.0
"""
return list(self.search_content("srpm"))
return list(self.search_content(Criteria.with_field("content_type_id", "srpm")))

@property
def modulemd_content(self):
Expand All @@ -235,7 +235,9 @@ def modulemd_content(self):
.. versionadded:: 2.4.0
"""
return list(self.search_content("modulemd"))
return list(
self.search_content(Criteria.with_field("content_type_id", "modulemd"))
)

@property
def modulemd_defaults_content(self):
Expand All @@ -246,14 +248,16 @@ def modulemd_defaults_content(self):
.. versionadded:: 2.4.0
"""
return list(self.search_content("modulemd_defaults"))
return list(
self.search_content(
Criteria.with_field("content_type_id", "modulemd_defaults")
)
)

def search_content(self, type_id, criteria=None):
def search_content(self, criteria=None):
"""Search this repository for content matching the given criteria.
Args:
type_id (str)
The content type to search
criteria (:class:`~pubtools.pulplib.Criteria`)
A criteria object used for this search.
Expand All @@ -270,9 +274,9 @@ def search_content(self, type_id, criteria=None):
raise DetachedException()

resource_type = "repositories/%s" % self.id
search_options = {"type_ids": type_id}

return self._client._search(
Unit, resource_type, criteria=criteria, search_options=search_options
Unit, resource_type, search_type="search/units", criteria=criteria
)

def delete(self):
Expand Down
23 changes: 0 additions & 23 deletions tests/client/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from pubtools.pulplib._impl.client.search import (
filters_for_criteria,
field_match,
validate_type_ids,
)


Expand Down Expand Up @@ -101,25 +100,3 @@ def test_dict_matcher_value():
assert filters_for_criteria(crit) == {
"created": {"$lt": {"created_date": {"$date": "2019-09-04T00:00:00Z"}}}
}


def test_valid_type_ids(caplog):
assert validate_type_ids(["srpm", "iso", "quark", "rpm"]) == ["srpm", "iso", "rpm"]
for m in ["Invalid content type ID(s):", "quark"]:
assert m in caplog.text


def test_invalid_type_ids():
"""validate_type_ids raises if called without valid criteria"""
with pytest.raises(ValueError) as e:
validate_type_ids("quark")

assert "Must provide valid content type ID(s)" in str(e)


def test_invalid_type_ids_type():
"""validate_type_ids raises if called without valid criteria"""
with pytest.raises(TypeError) as e:
validate_type_ids({"srpm": "some-srpm"})

assert "Expected str, list, or tuple, got %s" % type({}) in str(e)
Loading

0 comments on commit 5442586

Please sign in to comment.