Skip to content

Commit 91f6b46

Browse files
committed
Adding new filter and search functions to parser
1 parent 846a185 commit 91f6b46

File tree

1 file changed

+20
-23
lines changed

1 file changed

+20
-23
lines changed

scrapling/parser.py

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def __init__(
120120
def _is_text_node(element: Union[html.HtmlElement, etree._ElementUnicodeResult]) -> bool:
121121
"""Return True if given element is a result of a string expression
122122
Examples:
123-
Xpath -> '/text()', '/@attribute' etc...
123+
XPath -> '/text()', '/@attribute' etc...
124124
CSS3 -> '::text', '::attr(attrib)'...
125125
"""
126126
# Faster than checking `element.is_attribute or element.is_text or element.is_tail`
@@ -1007,28 +1007,25 @@ def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entiti
10071007
return result
10081008
return default
10091009

1010-
# def __getattr__(self, name):
1011-
# if name in dir(self.__class__):
1012-
# return super().__getattribute__(name)
1013-
#
1014-
# # Execute the method itself on each Adaptor
1015-
# results = []
1016-
# for item in self:
1017-
# results.append(getattr(item, name))
1018-
#
1019-
# if all(callable(r) for r in results):
1020-
# def call_all(*args, **kwargs):
1021-
# final_results = [r(*args, **kwargs) for r in results]
1022-
# if all([isinstance(r, (Adaptor, Adaptors,)) for r in results]):
1023-
# return self.__class__(final_results)
1024-
# return final_results
1025-
#
1026-
# return call_all
1027-
# else:
1028-
# # Flatten the result if it's a single-item list containing a list
1029-
# if len(self) == 1 and isinstance(results[0], list):
1030-
# return self.__class__(results[0])
1031-
# return self.__class__(results)
1010+
def search(self, func: Callable[['Adaptor'], bool]) -> Union['Adaptor', None]:
1011+
"""Loop over all current elements and return the first element that matches the passed function
1012+
:param func: A function that takes each element as an argument and returns True/False
1013+
:return: The first element that match the function or ``None`` otherwise.
1014+
"""
1015+
for element in self:
1016+
if func(element):
1017+
return element
1018+
return None
1019+
1020+
def filter(self, func: Callable[['Adaptor'], bool]) -> Union['Adaptors', List]:
1021+
"""Filter current elements based on the passed function
1022+
:param func: A function that takes each element as an argument and returns True/False
1023+
:return: The new `Adaptors` object or empty list otherwise.
1024+
"""
1025+
results = [
1026+
element for element in self if func(element)
1027+
]
1028+
return self.__class__(results) if results else results
10321029

10331030
def get(self, default=None):
10341031
"""Returns the first item of the current list

0 commit comments

Comments
 (0)