-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Kylel/2023/hardening codebase #259
Open
kyleclo
wants to merge
25
commits into
main
Choose a base branch
from
kylel/2023/spangroup-overlap-bug
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
25 commits
Select commit
Hold shift + click to select a range
f391c74
formatting
kyleclo bd0c3ab
add functions to Box; reformatting
kyleclo 326bf43
add tests for box
kyleclo a62faf4
tests for span
kyleclo c0f283e
fix tests for span; add flag for merging Box behavior
kyleclo 66ec632
fix tests for Box
kyleclo 128ed4f
bugfix annotation
kyleclo 966dc13
comment out unfinished test
kyleclo 78b7c3b
fix bug; span equality
kyleclo 013b512
add boxgroup indexer; add tests for indexers
kyleclo caf81c3
bugfix unit test
kyleclo d7afec3
bugfix; metrics
kyleclo 37b223e
bugfix test
kyleclo 439307b
CHANGE! word predictor doesnt return merged Box anymore as part of it…
kyleclo db07e68
reformat; remove unused imports from Doc
kyleclo 039f0dc
reformatting
kyleclo d163044
reformat
kyleclo 524e260
move MergeSpan into figure table heuristic predictor
kyleclo d26a379
revert commit
kyleclo bb4b740
bugfix; remove repeat method in Span
kyleclo 070ce04
add test for unsorted span merging
kyleclo e8953fc
update tools tests so Spans have Boxes
kyleclo dc70d9a
fix bug in tools; should just use the librarys span merging function
kyleclo 1069e52
reformat test_tools; bugfix need to check Box equality via .coordinates
kyleclo 1d546cf
minor fix in test; equivalence of Span and Box
kyleclo File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,6 @@ | |
__all__ = ["Annotation", "BoxGroup", "SpanGroup", "Relation"] | ||
|
||
|
||
|
||
def warn_deepcopy_of_annotation(obj: "Annotation") -> None: | ||
"""Warns when a deepcopy is performed on an Annotation.""" | ||
|
||
|
@@ -34,15 +33,14 @@ def warn_deepcopy_of_annotation(obj: "Annotation") -> None: | |
warnings.warn(msg, UserWarning, stacklevel=2) | ||
|
||
|
||
|
||
class Annotation: | ||
"""Annotation is intended for storing model predictions for a document.""" | ||
|
||
def __init__( | ||
self, | ||
id: Optional[int] = None, | ||
doc: Optional['Document'] = None, | ||
metadata: Optional[Metadata] = None | ||
self, | ||
id: Optional[int] = None, | ||
doc: Optional["Document"] = None, | ||
metadata: Optional[Metadata] = None, | ||
): | ||
self.id = id | ||
self.doc = doc | ||
|
@@ -77,40 +75,44 @@ def __getattr__(self, field: str) -> List["Annotation"]: | |
return self.__getattribute__(field) | ||
|
||
|
||
|
||
class BoxGroup(Annotation): | ||
def __init__( | ||
self, | ||
boxes: List[Box], | ||
id: Optional[int] = None, | ||
doc: Optional['Document'] = None, | ||
metadata: Optional[Metadata] = None, | ||
self, | ||
boxes: List[Box], | ||
id: Optional[int] = None, | ||
doc: Optional["Document"] = None, | ||
metadata: Optional[Metadata] = None, | ||
allow_overlap: Optional[bool] = False, | ||
): | ||
self.boxes = boxes | ||
if not allow_overlap: | ||
clusters = Box.cluster_boxes(boxes=boxes) | ||
if any([len(cluster) > 1 for cluster in clusters]): | ||
raise ValueError( | ||
"BoxGroup does not allow overlapping boxes. " | ||
"Consider setting allow_overlap=True." | ||
) | ||
super().__init__(id=id, doc=doc, metadata=metadata) | ||
|
||
def to_json(self) -> Dict: | ||
box_group_dict = dict( | ||
boxes=[box.to_json() for box in self.boxes], | ||
id=self.id, | ||
metadata=self.metadata.to_json() | ||
metadata=self.metadata.to_json(), | ||
) | ||
return { | ||
key: value for key, value in box_group_dict.items() if value | ||
} # only serialize non-null values | ||
|
||
@classmethod | ||
def from_json(cls, box_group_dict: Dict) -> "BoxGroup": | ||
|
||
if "metadata" in box_group_dict: | ||
metadata_dict = box_group_dict["metadata"] | ||
else: | ||
# this fallback is necessary to ensure compatibility with box | ||
# groups that were create before the metadata migration and | ||
# therefore have "type" in the root of the json dict instead. | ||
metadata_dict = { | ||
"type": box_group_dict.get("type", None) | ||
} | ||
metadata_dict = {"type": box_group_dict.get("type", None)} | ||
|
||
return cls( | ||
boxes=[ | ||
|
@@ -132,7 +134,7 @@ def __deepcopy__(self, memo): | |
box_group = BoxGroup( | ||
boxes=deepcopy(self.boxes, memo), | ||
id=self.id, | ||
metadata=deepcopy(self.metadata, memo) | ||
metadata=deepcopy(self.metadata, memo), | ||
) | ||
|
||
# Don't copy an attached document | ||
|
@@ -150,25 +152,30 @@ def type(self, type: Union[str, None]) -> None: | |
|
||
|
||
class SpanGroup(Annotation): | ||
|
||
def __init__( | ||
self, | ||
spans: List[Span], | ||
box_group: Optional[BoxGroup] = None, | ||
id: Optional[int] = None, | ||
doc: Optional['Document'] = None, | ||
metadata: Optional[Metadata] = None, | ||
self, | ||
spans: List[Span], | ||
box_group: Optional[BoxGroup] = None, | ||
id: Optional[int] = None, | ||
doc: Optional["Document"] = None, | ||
metadata: Optional[Metadata] = None, | ||
allow_overlap: Optional[bool] = False, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same story here as |
||
): | ||
self.spans = spans | ||
if not allow_overlap: | ||
clusters = Span.cluster_spans(spans=spans) | ||
if any([len(cluster) > 1 for cluster in clusters]): | ||
raise ValueError( | ||
"SpanGroup does not allow overlapping spans. " | ||
"Consider setting allow_overlap=True." | ||
) | ||
self.box_group = box_group | ||
super().__init__(id=id, doc=doc, metadata=metadata) | ||
|
||
@property | ||
def symbols(self) -> List[str]: | ||
if self.doc is not None: | ||
return [ | ||
self.doc.symbols[span.start: span.end] for span in self.spans | ||
] | ||
return [self.doc.symbols[span.start : span.end] for span in self.spans] | ||
else: | ||
return [] | ||
|
||
|
@@ -187,12 +194,10 @@ def to_json(self) -> Dict: | |
spans=[span.to_json() for span in self.spans], | ||
id=self.id, | ||
metadata=self.metadata.to_json(), | ||
box_group=self.box_group.to_json() if self.box_group else None | ||
box_group=self.box_group.to_json() if self.box_group else None, | ||
) | ||
return { | ||
key: value | ||
for key, value in span_group_dict.items() | ||
if value is not None | ||
key: value for key, value in span_group_dict.items() if value is not None | ||
} # only serialize non-null values | ||
|
||
@classmethod | ||
|
@@ -211,7 +216,7 @@ def from_json(cls, span_group_dict: Dict) -> "SpanGroup": | |
# therefore have "id", "type" in the root of the json dict instead. | ||
metadata_dict = { | ||
"type": span_group_dict.get("type", None), | ||
"text": span_group_dict.get("text", None) | ||
"text": span_group_dict.get("text", None), | ||
} | ||
|
||
return cls( | ||
|
@@ -256,7 +261,7 @@ def __deepcopy__(self, memo): | |
spans=deepcopy(self.spans, memo), | ||
id=self.id, | ||
metadata=deepcopy(self.metadata, memo), | ||
box_group=deepcopy(self.box_group, memo) | ||
box_group=deepcopy(self.box_group, memo), | ||
) | ||
|
||
# Don't copy an attached document | ||
|
@@ -284,6 +289,5 @@ def text(self, text: Union[str, None]) -> None: | |
self.metadata.text = text | ||
|
||
|
||
|
||
class Relation(Annotation): | ||
pass | ||
pass |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
allow_overlap=False
as the default is a behavioral change and will almost certainly break stuff in practice. Can we switch the default toTrue
?