Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Proof of concept for MagicLookup #187

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 52 additions & 18 deletions src/mmda/types/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from abc import abstractmethod
from copy import deepcopy
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Union
import weakref

from mmda.types.box import Box
from mmda.types.metadata import Metadata
Expand All @@ -22,7 +23,6 @@
__all__ = ["Annotation", "BoxGroup", "SpanGroup", "Relation"]



def warn_deepcopy_of_annotation(obj: "Annotation") -> None:
"""Warns when a deepcopy is performed on an Annotation."""

Expand All @@ -34,10 +34,57 @@ def warn_deepcopy_of_annotation(obj: "Annotation") -> None:
warnings.warn(msg, UserWarning, stacklevel=2)


class MagicLookup:
'''WIP

Benchmark with
https://gist.github.com/soldni/4b3e4b97f6f8e86df6b82cdfcd292bc6

Results on @soldni's laptop:

$ python magic_v2.py
try_new: 7.94e-03 +/- 4.70e-04 s
try_old: 7.36e-03 +/- 3.82e-04 s
'''
__slots__ = '_entity', '_query'

_entity: Optional["Annotation"]

def __init__(self, query: str, entity: Optional["Annotation"] = None):
self._entity = weakref.proxy(entity) if entity else None
self._query = query

def __get__(self, _obj, _obj_type):
return type(self)(query=self._query, entity=_obj)

def __getattr__(self, field):
if (ent := self._entity) is None:
raise RuntimeError(
"An entity is not attached to this Lookup; "
"please report this error"
)

if (doc := ent.doc) is None:
raise RuntimeError(
"This entity is not attached to any document"
)

if self._query == 'intersects':
if field not in doc.fields:
raise ValueError(f'Field {field} does not exist')
return doc.find_overlapping(ent, field) # pyright: ignore

else:
raise NotImplementedError(f'Query {self._query} not recognized')


class Annotation:
"""Annotation is intended for storing model predictions for a document."""

intersects = MagicLookup('intersects')
refers_to = MagicLookup('refers_to')
parent_of = MagicLookup('parent_of')

def __init__(
self,
id: Optional[int] = None,
Expand All @@ -61,21 +108,9 @@ def attach_doc(self, doc: "Document") -> None:
if not self.doc:
self.doc = doc
else:
raise AttributeError("This annotation already has an attached document")

# TODO[kylel] - comment explaining
def __getattr__(self, field: str) -> List["Annotation"]:
if self.doc is None:
raise ValueError("This annotation is not attached to a document")

if field in self.doc.fields:
return self.doc.find_overlapping(self, field)

if field in self.doc.fields:
return self.doc.find_overlapping(self, field)

return self.__getattribute__(field)

raise AttributeError(
"This annotation already has an attached document"
)


class BoxGroup(Annotation):
Expand Down Expand Up @@ -284,6 +319,5 @@ def text(self, text: Union[str, None]) -> None:
self.metadata.text = text



class Relation(Annotation):
pass
pass