Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: support run analyzer #2622

Merged
merged 1 commit into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions pymilvus/client/grpc_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from .constants import ITERATOR_SESSION_TS_FIELD
from .prepare import Prepare
from .types import (
AnalyzeResult,
BulkInsertState,
CompactionPlans,
CompactionState,
Expand Down Expand Up @@ -2258,3 +2259,20 @@ def remove_privileges_from_group(
)
resp = self._stub.OperatePrivilegeGroup(req, wait_for_ready=True, timeout=timeout)
check_status(resp)

@retry_on_rpc_failure()
def run_analyzer(
self,
texts: Union[str, List[str]],
analyzer_params: Union[str, Dict],
timeout: Optional[float] = None,
**kwargs,
):
check_pass_param(timeout=timeout)
req = Prepare.run_analyzer(texts, analyzer_params)
resp = self._stub.RunAnalyzer(req, timeout=timeout)
check_status(resp.status)

if isinstance(texts, str):
return AnalyzeResult(resp.results[0])
return [AnalyzeResult(result) for result in resp.results]
15 changes: 15 additions & 0 deletions pymilvus/client/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -1728,3 +1728,18 @@ def operate_privilege_group_req(
privileges=[milvus_types.PrivilegeEntity(name=p) for p in privileges],
type=operate_privilege_group_type,
)

@classmethod
def run_analyzer(cls, texts: Union[str, List[str]], analyzer_params: Union[str, Dict]):
req = milvus_types.RunAnalyzerRequset()
if isinstance(texts, str):
req.placeholder.append(texts.encode("utf-8"))
else:
req.placeholder.extend([text.encode("utf-8") for text in texts])

if isinstance(analyzer_params, dict):
req.analyzer_params = ujson.dumps(analyzer_params)
else:
req.analyzer_params = analyzer_params

return req
10 changes: 10 additions & 0 deletions pymilvus/client/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -1100,3 +1100,13 @@ def to_dict(self) -> Dict[str, Any]:
result = {"name": self.name}
result.update(self.properties)
return result


class AnalyzeResult:
def __init__(self, info: milvus_types.AnalyzerResult) -> None:
self.tokens = info.tokens

def __str__(self) -> str:
return f"{{tokens: {self.tokens}}}"

__reper__ = __str__
2 changes: 1 addition & 1 deletion pymilvus/grpc_gen/milvus-proto
40 changes: 23 additions & 17 deletions pymilvus/grpc_gen/milvus_pb2.py

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions pymilvus/grpc_gen/milvus_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -2211,3 +2211,27 @@ class ListImportsAuthPlaceholder(_message.Message):
db_name: str
collection_name: str
def __init__(self, db_name: _Optional[str] = ..., collection_name: _Optional[str] = ...) -> None: ...

class RunAnalyzerRequset(_message.Message):
__slots__ = ("base", "analyzer_params", "placeholder")
BASE_FIELD_NUMBER: _ClassVar[int]
ANALYZER_PARAMS_FIELD_NUMBER: _ClassVar[int]
PLACEHOLDER_FIELD_NUMBER: _ClassVar[int]
base: _common_pb2.MsgBase
analyzer_params: str
placeholder: _containers.RepeatedScalarFieldContainer[bytes]
def __init__(self, base: _Optional[_Union[_common_pb2.MsgBase, _Mapping]] = ..., analyzer_params: _Optional[str] = ..., placeholder: _Optional[_Iterable[bytes]] = ...) -> None: ...

class AnalyzerResult(_message.Message):
__slots__ = ("tokens",)
TOKENS_FIELD_NUMBER: _ClassVar[int]
tokens: _containers.RepeatedScalarFieldContainer[str]
def __init__(self, tokens: _Optional[_Iterable[str]] = ...) -> None: ...

class RunAnalyzerResponse(_message.Message):
__slots__ = ("status", "results")
STATUS_FIELD_NUMBER: _ClassVar[int]
RESULTS_FIELD_NUMBER: _ClassVar[int]
status: _common_pb2.Status
results: _containers.RepeatedCompositeFieldContainer[AnalyzerResult]
def __init__(self, status: _Optional[_Union[_common_pb2.Status, _Mapping]] = ..., results: _Optional[_Iterable[_Union[AnalyzerResult, _Mapping]]] = ...) -> None: ...
33 changes: 33 additions & 0 deletions pymilvus/grpc_gen/milvus_pb2_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,11 @@ def __init__(self, channel):
request_serializer=milvus__pb2.OperatePrivilegeGroupRequest.SerializeToString,
response_deserializer=common__pb2.Status.FromString,
)
self.RunAnalyzer = channel.unary_unary(
'/milvus.proto.milvus.MilvusService/RunAnalyzer',
request_serializer=milvus__pb2.RunAnalyzerRequset.SerializeToString,
response_deserializer=milvus__pb2.RunAnalyzerResponse.FromString,
)


class MilvusServiceServicer(object):
Expand Down Expand Up @@ -1062,6 +1067,12 @@ def OperatePrivilegeGroup(self, request, context):
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')

def RunAnalyzer(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')


def add_MilvusServiceServicer_to_server(servicer, server):
rpc_method_handlers = {
Expand Down Expand Up @@ -1535,6 +1546,11 @@ def add_MilvusServiceServicer_to_server(servicer, server):
request_deserializer=milvus__pb2.OperatePrivilegeGroupRequest.FromString,
response_serializer=common__pb2.Status.SerializeToString,
),
'RunAnalyzer': grpc.unary_unary_rpc_method_handler(
servicer.RunAnalyzer,
request_deserializer=milvus__pb2.RunAnalyzerRequset.FromString,
response_serializer=milvus__pb2.RunAnalyzerResponse.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'milvus.proto.milvus.MilvusService', rpc_method_handlers)
Expand Down Expand Up @@ -3143,6 +3159,23 @@ def OperatePrivilegeGroup(request,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)

@staticmethod
def RunAnalyzer(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(request, target, '/milvus.proto.milvus.MilvusService/RunAnalyzer',
milvus__pb2.RunAnalyzerRequset.SerializeToString,
milvus__pb2.RunAnalyzerResponse.FromString,
options, channel_credentials,
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)


class ProxyServiceStub(object):
"""Missing associated documentation comment in .proto file."""
Expand Down
19 changes: 19 additions & 0 deletions pymilvus/milvus_client/milvus_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1715,3 +1715,22 @@ def describe_replica(
"""
conn = self._get_connection()
return conn.describe_replica(collection_name, timeout=timeout, **kwargs)

def run_analyzer(
self,
texts: Union[str, List[str]],
analyzer_params: Union[str, Dict, None] = None,
timeout: Optional[float] = None,
):
"""Run analyzer. Return result tokens of analysis.
Args:
text(``str``,``List[str]``): The input text (string or string list).
analyzer_params(``str``,``Dict``,``None``): The parameters of analyzer.
timeout(``float``, optional): The timeout value in seconds. Defaults to None.
Returns:
(``List[str]``,``List[List[str]]``): The result tokens of analysis.
"""
if analyzer_params is None:
analyzer_params = {}

return self._get_connection().run_analyzer(texts, analyzer_params, timeout=timeout)