diff --git a/.github/workflows/mmda-ci.yml b/.github/workflows/mmda-ci.yml index 2af0ec87..dd5a2a37 100644 --- a/.github/workflows/mmda-ci.yml +++ b/.github/workflows/mmda-ci.yml @@ -24,6 +24,7 @@ jobs: - name: Test with Python ${{ matrix.python-version }} run: | pip install -e .[dev,pysbd_predictors,hf_predictors] + flake8 pytest --cov-fail-under=42 --ignore=tests/test_predictors/test_vila_predictors.py --ignore=tests/test_predictors/test_figure_table_predictors.py test_vila_predictors: diff --git a/README.md b/README.md index bfcb7c4a..5ca518d2 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,15 @@ conda create -n mmda python=3.8 pip install -e '.[dev,]' ``` +## PEP 8 - Style guid for python code +https://peps.python.org/pep-0008/ + +We propose to follow PEP8 style guide. To test the code run + +```bash +flake8 +``` + ## Unit testing Note that pytest is running coverage, which checks the unit test coverage of the code. The percent coverage can be found in setup.cfg file. diff --git a/mmda/eval/vlue.py b/mmda/eval/vlue.py index 267910c9..831bfbf9 100644 --- a/mmda/eval/vlue.py +++ b/mmda/eval/vlue.py @@ -78,7 +78,7 @@ def read_labels(labels_json_path: str) -> list[LabeledDoc]: list[LabeledDoc]: List of labeled documents """ with open(labels_json_path, encoding="utf-8") as f: - labels = [LabeledDoc(**l) for l in json.loads(f.read())] + labels = [LabeledDoc(**line) for line in json.loads(f.read())] return labels diff --git a/mmda/featurizers/citation_link_featurizers.py b/mmda/featurizers/citation_link_featurizers.py index fe7c4919..09308d2a 100644 --- a/mmda/featurizers/citation_link_featurizers.py +++ b/mmda/featurizers/citation_link_featurizers.py @@ -1,12 +1,11 @@ -import pandas as pd -from pydantic import BaseModel import re +from typing import List, Dict + +import pandas as pd from thefuzz import fuzz -from typing import List, Tuple, Dict from mmda.types.annotation import SpanGroup - DIGITS = re.compile(r'[0-9]+') ALPHA = re.compile(r'[A-Za-z]+') RELEVANT_PUNCTUATION = re.compile(r"\(|\)|\[|,|\]|\.|&|\;") @@ -23,6 +22,7 @@ JACCARD_ALPHA = "jaccard_alpha" MATCH_FIRST_TOKEN = "match_first_token" + class CitationLink: def __init__(self, mention: SpanGroup, bib: SpanGroup): self.mention = mention @@ -31,6 +31,7 @@ def __init__(self, mention: SpanGroup, bib: SpanGroup): def to_text_dict(self) -> Dict[str, str]: return {"source_text": "".join(self.mention.symbols), "target_text": "".join(self.bib.symbols)} + def featurize(possible_links: List[CitationLink]) -> pd.DataFrame: # create dataframe df = pd.DataFrame.from_records([link.to_text_dict() for link in possible_links]) @@ -46,7 +47,7 @@ def featurize(possible_links: List[CitationLink]) -> pd.DataFrame: df[MATCH_NUMERIC] = df.apply(lambda row: match_numeric(row['source_text'], row['target_text']), axis=1) df[JACCARD_ALPHA] = df.apply(lambda row: jaccard_alpha(row['source_text'], row['target_text']), axis=1) df[MATCH_FIRST_TOKEN] = df.apply(lambda row: match_first_token(row['source_text'], row['target_text']), axis=1) - + # drop text columns X_features = df.drop(columns=['source_text', 'target_text']) return X_features @@ -54,7 +55,8 @@ def featurize(possible_links: List[CitationLink]) -> pd.DataFrame: def ngramify(s: str, n: int) -> List[str]: s_len = len(s) - return [s[i:i+n] for i in range(s_len-n+1)] + return [s[i:i + n] for i in range(s_len - n + 1)] + def jaccard_ngram(ngrams1: List[str], ngrams2: List[str]) -> float: if ngrams1 or ngrams2: @@ -64,24 +66,28 @@ def jaccard_ngram(ngrams1: List[str], ngrams2: List[str]) -> float: else: return 0.0 + def jaccardify(source: str, target: str, n: int) -> float: truncated_target = target[:50] source_ngrams = ngramify(source, n) target_ngrams = ngramify(truncated_target, n) return jaccard_ngram(source_ngrams, target_ngrams) + def has_source_text(source: str) -> int: if source.strip(): return 1 else: return 0 + def jaccard_numeric(source: str, target: str) -> float: source_numerics = re.findall(DIGITS, source) truncated_target = target[:100] target_numerics = re.findall(DIGITS, truncated_target) return jaccard_ngram(source_numerics, target_numerics) + def match_numeric(source: str, target: str) -> float: source_numerics = re.findall(DIGITS, source) truncated_target = target[:100] @@ -90,25 +96,28 @@ def match_numeric(source: str, target: str) -> float: for number in source_numerics: found = number in target_numerics token_found.append(found) - + if False not in token_found: return 1 else: return 0 + def jaccard_alpha(source: str, target: str) -> float: source_alpha = re.findall(ALPHA, source) truncated_target = target[:50] target_alpha = re.findall(ALPHA, truncated_target) return jaccard_ngram(source_alpha, target_alpha) + # predicts mention/bib entry matches by matching normalized source tokens # examples returning True: # source_text = "[78]" # target_text = "[78]. C. L. Willis and S. L. Miertschin. Mind maps..." # source_text = "(Wilkinson et al., 2017)" -# target_text = "Wilkinson, R., Quigley, Q., and Marimba, P. Time means nothing. Journal of Far-Fetched Hypotheses, 2017." +# target_text = "Wilkinson, R., Quigley, Q., and Marimba, P. Time means nothing. +# Journal of Far-Fetched Hypotheses, 2017." # # examples returning False: # source_text = "[3]" @@ -117,11 +126,13 @@ def jaccard_alpha(source: str, target: str) -> float: # target_text = "Shi, X. Vanilla Ice Cream Is the Best. Epic Assertions, 2021" # some failure modes: no source text; source text ranges such as "[13-15]"; -# incomplete source text such as ", 2019)"; bib entry text with both item and page numbers +# incomplete source text such as ", 2019)"; bib entry text with both item +# and page numbers def strip_and_tokenize(text: str) -> List[str]: stripped_text = RELEVANT_PUNCTUATION.sub("", text) return stripped_text.lower().strip().split() + def match_source_tokens(source: str, target: str) -> float: if not source: return 0 @@ -133,7 +144,7 @@ def match_source_tokens(source: str, target: str) -> float: if token != 'et' and token != 'al' and token != 'and': found = token in target_tokens token_found.append(found) - + if False not in token_found: return 1 else: @@ -152,4 +163,4 @@ def match_first_token(source: str, target: str) -> float: if first_source_token in target_tokens: return 1 else: - return 0 \ No newline at end of file + return 0 diff --git a/mmda/parsers/__init__.py b/mmda/parsers/__init__.py index a880b9ce..7611fe6c 100644 --- a/mmda/parsers/__init__.py +++ b/mmda/parsers/__init__.py @@ -2,4 +2,4 @@ __all__ = [ 'PDFPlumberParser' -] \ No newline at end of file +] diff --git a/mmda/parsers/grobid_parser.py b/mmda/parsers/grobid_parser.py index 8b028c34..52e00749 100644 --- a/mmda/parsers/grobid_parser.py +++ b/mmda/parsers/grobid_parser.py @@ -7,10 +7,8 @@ import os import io import xml.etree.ElementTree as et -from typing import List, Optional, Text +from typing import List, Optional import requests -import tempfile -import json from mmda.parsers.parser import Parser from mmda.types.annotation import SpanGroup @@ -54,6 +52,7 @@ def _post_document(url: str, input_pdf_path: str) -> str: return req.text + class GrobidHeaderParser(Parser): """Grobid parser that uses header API methods to get title and abstract only. The current purpose of this class is evaluation against other methods for title and diff --git a/mmda/parsers/parser.py b/mmda/parsers/parser.py index a41758ef..51db8df2 100644 --- a/mmda/parsers/parser.py +++ b/mmda/parsers/parser.py @@ -7,7 +7,7 @@ """ from abc import abstractmethod -from typing import List, Optional, Protocol, Union +from typing import Protocol from mmda.types.document import Document diff --git a/mmda/parsers/pdfplumber_parser.py b/mmda/parsers/pdfplumber_parser.py index 06d1f1ae..637c3037 100644 --- a/mmda/parsers/pdfplumber_parser.py +++ b/mmda/parsers/pdfplumber_parser.py @@ -9,7 +9,7 @@ from mmda.types.annotation import SpanGroup from mmda.types.document import Document from mmda.parsers.parser import Parser -from mmda.types.names import * +from mmda.types.names import Symbols, Pages, Tokens, Rows class PDFPlumberParser(Parser): @@ -288,7 +288,7 @@ def _simple_line_detection( Adapted from https://github.com/allenai/VILA/blob/e6d16afbd1832f44a430074855fbb4c3d3604f4a/src/vila/pdftools/pdfplumber_extractor.py#L24 Modified Oct 2022 (kylel): Changed return value to be List[int] - """ + """ # noqa prev_y = None prev_x = None @@ -333,9 +333,9 @@ def _align_coarse_and_fine_tokens( """Returns a list of length len(fine_tokens) where elements of the list are integer indices into coarse_tokens elements.""" assert len(coarse_tokens) <= len(fine_tokens), \ - f"This method requires |coarse| <= |fine|" + "This method requires |coarse| <= |fine|" assert ''.join(coarse_tokens) == ''.join(fine_tokens), \ - f"This method requires the chars(coarse) == chars(fine)" + "This method requires the chars(coarse) == chars(fine)" coarse_start_ends = [] start = 0 @@ -366,19 +366,7 @@ def _align_coarse_and_fine_tokens( return out - - - - - - - """ - - - - - row_annos.append(row) current_rows_tokens = [] @@ -400,4 +388,4 @@ def _align_coarse_and_fine_tokens( page_annos.append(page) current_pages_tokens = [] -""" \ No newline at end of file +""" # noqa diff --git a/mmda/parsers/symbol_scraper_parser.py b/mmda/parsers/symbol_scraper_parser.py index 315b6c98..34d4d2a9 100644 --- a/mmda/parsers/symbol_scraper_parser.py +++ b/mmda/parsers/symbol_scraper_parser.py @@ -6,7 +6,6 @@ """ import os -import json import logging import math import re @@ -21,8 +20,7 @@ from mmda.types.annotation import SpanGroup from mmda.types.document import Document from mmda.parsers.parser import Parser -from mmda.types.names import * - +from mmda.types.names import Symbols, Pages, Tokens, Rows logger = logging.getLogger(__name__) @@ -109,7 +107,8 @@ def _find_one_and_extract(self, my_list: List[str], return None def _parse_row_head_tag(self, row_tag: str) -> Dict: - # TODO - not sure why line bboxes are useful; skip for now. they dont quite make sense (e.g. bbox[1] == bbox[3]) + # TODO - not sure why line bboxes are useful; skip for now. they dont quite make sense + # (e.g. bbox[1] == bbox[3]) match = re.match(pattern=r'', string=row_tag) return {'id': int(match.group(1)), 'bbox': match.group(2)} @@ -132,14 +131,19 @@ def _parse_page_to_metrics(self, xml_lines: List[str]) -> Dict: pagemetrics = xml_lines[start:end] page_to_metrics = {} - for start, end in self._split_list_by_start_end_tags(my_list=pagemetrics, start_tag='', end_tag=''): + for start, end in self._split_list_by_start_end_tags( + my_list=pagemetrics, start_tag='', end_tag=''): partition = pagemetrics[start:end] page_num = int(self._find_one_and_extract(my_list=partition, start_tag='', end_tag='')) - page_width = float(self._find_one_and_extract(my_list=partition, start_tag='', end_tag='')) - page_height = float(self._find_one_and_extract(my_list=partition, start_tag='', end_tag='')) + page_width = float(self._find_one_and_extract( + my_list=partition, start_tag='', end_tag='')) + page_height = float(self._find_one_and_extract( + my_list=partition, start_tag='', end_tag='')) page_num_rows = int(self._find_one_and_extract(my_list=partition, start_tag='', end_tag='')) - page_num_tokens = int(self._find_one_and_extract(my_list=partition, start_tag='', end_tag='')) - page_num_chars = int(self._find_one_and_extract(my_list=partition, start_tag='', end_tag='')) + page_num_tokens = int(self._find_one_and_extract( + my_list=partition, start_tag='', end_tag='')) + page_num_chars = int(self._find_one_and_extract( + my_list=partition, start_tag='', end_tag='')) page_to_metrics[page_num] = { 'height': page_height, 'width': page_width, @@ -163,10 +167,9 @@ def _parse_page_to_row_to_tokens(self, xml_lines: List[str], page_to_metrics: Di row_info = self._parse_row_head_tag(row_tag=row_lines[0]) # first line is the head tag row_id = row_info['id'] for token_start, token_end in self._split_list_by_start_end_tags(my_list=row_lines, - start_tag='')]: @@ -216,7 +219,7 @@ def _convert_nested_text_to_doc_json(self, page_to_row_to_tokens: Dict) -> Dict: if k < len(tokens) - 1: text += ' ' else: - text += '\n' # start newline at end of row + text += '\n' # start newline at end of row start = end + 1 # make row row = SpanGroup(spans=[ @@ -265,8 +268,10 @@ def _parse_xml_to_doc(self, xmlfile: str) -> Document: # get page metrics page_to_metrics = self._parse_page_to_metrics(xml_lines=xml_lines) logger.info(f'\tNum pages: {len(page_to_metrics)}') - logger.info(f"\tAvg tokens: {sum([metric['tokens'] for metric in page_to_metrics.values()]) / len(page_to_metrics)}") - logger.info(f"\tAvg rows: {sum([metric['rows'] for metric in page_to_metrics.values()]) / len(page_to_metrics)}") + logger.info( + f"\tAvg tokens: {sum([metric['tokens'] for metric in page_to_metrics.values()]) / len(page_to_metrics)}") + logger.info( + f"\tAvg rows: {sum([metric['rows'] for metric in page_to_metrics.values()]) / len(page_to_metrics)}") # get token stream (grouped by page & row) page_to_row_to_tokens = self._parse_page_to_row_to_tokens(xml_lines=xml_lines, page_to_metrics=page_to_metrics) @@ -277,5 +282,3 @@ def _parse_xml_to_doc(self, xmlfile: str) -> Document: # build Document doc = Document.from_json(doc_dict=doc_dict) return doc - - diff --git a/mmda/predictors/base_predictors/base_heuristic_predictor.py b/mmda/predictors/base_predictors/base_heuristic_predictor.py index 3f1eb951..34f74d1b 100644 --- a/mmda/predictors/base_predictors/base_heuristic_predictor.py +++ b/mmda/predictors/base_predictors/base_heuristic_predictor.py @@ -2,4 +2,4 @@ class BaseHeuristicPredictor(BasePredictor): - REQUIRED_BACKENDS = [] \ No newline at end of file + REQUIRED_BACKENDS = [] diff --git a/mmda/predictors/base_predictors/base_predictor.py b/mmda/predictors/base_predictors/base_predictor.py index ce10d81f..faf725d0 100644 --- a/mmda/predictors/base_predictors/base_predictor.py +++ b/mmda/predictors/base_predictors/base_predictor.py @@ -1,6 +1,5 @@ -from dataclasses import dataclass from abc import abstractmethod -from typing import Union, List, Dict, Any +from typing import List from mmda.types.annotation import Annotation from mmda.types.document import Document @@ -9,7 +8,7 @@ class BasePredictor: ################################################################### - ##################### Necessary Model Variables ################### + # Necessary Model Variables # ################################################################### # TODO[Shannon] Add the check for required backends in the future. @@ -34,14 +33,14 @@ def REQUIRED_DOCUMENT_FIELDS(self): return None ################################################################### - ######################### Core Methods ############################ + # Core Methods # ################################################################### def _doc_field_checker(self, document: Document) -> None: if self.REQUIRED_DOCUMENT_FIELDS is not None: for field in self.REQUIRED_DOCUMENT_FIELDS: assert ( - field in document.fields + field in document.fields ), f"The input Document object {document} doesn't contain the required field {field}" # TODO[Shannon] Allow for some preprocessed document intput @@ -51,4 +50,4 @@ def predict(self, document: Document) -> List[Annotation]: """For all the mmda models, the input is a document object, and the output is a list of annotations. """ - self._doc_field_checker(document) \ No newline at end of file + self._doc_field_checker(document) diff --git a/mmda/predictors/d2_predictors/bibentry_detection_predictor.py b/mmda/predictors/d2_predictors/bibentry_detection_predictor.py index 32d696e8..47b4cab5 100644 --- a/mmda/predictors/d2_predictors/bibentry_detection_predictor.py +++ b/mmda/predictors/d2_predictors/bibentry_detection_predictor.py @@ -1,8 +1,9 @@ from functools import reduce import itertools -from typing import Any, Dict, Iterator, List, Optional, Union +from typing import Iterator, List import layoutparser as lp +from PIL.Image import Image from mmda.predictors.base_predictors.base_predictor import BasePredictor from mmda.types.annotation import BoxGroup @@ -81,7 +82,7 @@ def postprocess(self, model_outputs: lp.Layout, page_tokens: List[Span], page_index: int, - image: "PIL.Image", + image: Image, id_counter: Iterator[int]) -> (List[BoxGroup], List[BoxGroup]): """Convert the model outputs for a single page image into the mmda format diff --git a/mmda/predictors/heuristic_predictors/dictionary_word_predictor.py b/mmda/predictors/heuristic_predictors/dictionary_word_predictor.py index f8d565ad..c437e25f 100644 --- a/mmda/predictors/heuristic_predictors/dictionary_word_predictor.py +++ b/mmda/predictors/heuristic_predictors/dictionary_word_predictor.py @@ -8,14 +8,13 @@ from typing import Optional, Set, List from mmda.predictors.base_predictors.base_predictor import BasePredictor -from mmda.types.metadata import Metadata -from mmda.types.annotation import Annotation, Span, SpanGroup +from mmda.types.annotation import SpanGroup from mmda.types.document import Document +from mmda.types.metadata import Metadata from mmda.types.names import Rows, Tokens class DictionaryWordPredictor(BasePredictor): - REQUIRED_BACKENDS = None REQUIRED_DOCUMENT_FIELDS = [Rows, Tokens] @@ -127,10 +126,10 @@ def predict(self, document: Document) -> List[SpanGroup]: # Remove optional pluralization at end of token for plural_suffix in ["(s)", "(s"]: - if next_row_first_token_text[-len(plural_suffix) :] == plural_suffix: + if next_row_first_token_text[-len(plural_suffix):] == plural_suffix: next_row_first_token_text = next_row_first_token_text[ - : -len(plural_suffix) - ] + : -len(plural_suffix) + ] # Combined word is in dictionary without hyphen (JOIN) combined_no_hyphen = "".join( @@ -142,9 +141,9 @@ def predict(self, document: Document) -> List[SpanGroup]: # Restore original text without any punctuation stripping if ( - combined_no_hyphen in self.dictionary - or combined_no_hyphen.lower() in self.dictionary - or combined_no_hyphen.lower() in local_dictionary + combined_no_hyphen in self.dictionary + or combined_no_hyphen.lower() in self.dictionary + or combined_no_hyphen.lower() in local_dictionary ): combined_text = curr_row_last_token_text[:-1] + \ self._token_text(next_row_first_token) diff --git a/mmda/predictors/heuristic_predictors/grobid_citation_predictor.py b/mmda/predictors/heuristic_predictors/grobid_citation_predictor.py index 0b542bf3..73f08d3f 100644 --- a/mmda/predictors/heuristic_predictors/grobid_citation_predictor.py +++ b/mmda/predictors/heuristic_predictors/grobid_citation_predictor.py @@ -5,17 +5,10 @@ """ import io -import os import xml.etree.ElementTree as et -from typing import List, Optional, Text +from typing import Optional import requests -from mmda.parsers.parser import Parser -from mmda.predictors.base_predictors.base_predictor import BasePredictor -from mmda.types.annotation import SpanGroup -from mmda.types.document import Document -from mmda.types.names import Symbols -from mmda.types.span import Span # processCitationList available in Grobid 0.7.1-SNAPSHOT and later DEFAULT_API = "http://localhost:8070/api/processCitation" diff --git a/mmda/predictors/hf_predictors/base_hf_predictor.py b/mmda/predictors/hf_predictors/base_hf_predictor.py index 3d271470..3ca74eb5 100644 --- a/mmda/predictors/hf_predictors/base_hf_predictor.py +++ b/mmda/predictors/hf_predictors/base_hf_predictor.py @@ -1,5 +1,5 @@ from abc import abstractmethod -from typing import Union, List, Dict, Any +from typing import Dict, Any from transformers import AutoTokenizer, AutoConfig, AutoModel @@ -33,4 +33,4 @@ def preprocess(self, document: Document) -> Dict: @abstractmethod def postprocess(self, model_outputs: Any) -> Dict: - """Convert the model outputs into the Annotation format""" \ No newline at end of file + """Convert the model outputs into the Annotation format""" diff --git a/mmda/predictors/hf_predictors/bibentry_predictor/predictor.py b/mmda/predictors/hf_predictors/bibentry_predictor/predictor.py index 34e1dd7b..268a8198 100644 --- a/mmda/predictors/hf_predictors/bibentry_predictor/predictor.py +++ b/mmda/predictors/hf_predictors/bibentry_predictor/predictor.py @@ -1,6 +1,6 @@ import os import re -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional from optimum.onnxruntime import ORTModelForTokenClassification import torch @@ -19,7 +19,6 @@ class BibEntryPredictor(BasePredictor): - REQUIRED_BACKENDS = ["transformers", "torch"] REQUIRED_DOCUMENT_FIELDS = ["tokens", "pages", "bib_entry_boxes"] @@ -97,9 +96,13 @@ def _get_word_level_prediction(word_ids: List[Optional[int]], predictions: List[ if word_id is not None and word_id != prev_word_id: # Tokenization process removes empty string and skips word id, so we're adding it back here # For example: - # input string list: [' Anon ', '1934', ' ', 'University and Educational Intelligence', ' ', 'Nature', ' ', '133', ' ', '805–805'] - # tokenization removes empty string: ['[CLS]', 'an', '##on', '1934', 'university', 'and', 'educational', 'intelligence', 'nature', '133', '80', '##5', '–', '80', '##5', '[SEP]'] - # skipping empty string results in skipping word id: [None, 0, 0, 1, 3, 3, 3, 3, 5, 7, 9, 9, 9, 9, 9, None] + # input string list: [' Anon ', '1934', ' ', 'University and Educational Intelligence', ' ', 'Nature', + # ' ', '133', ' ', '805–805'] + # tokenization removes empty string: ['[CLS]', 'an', '##on', '1934', 'university', 'and', + # 'educational', + # 'intelligence', 'nature', '133', '80', '##5', '–', '80', '##5', '[SEP]'] + # skipping empty string results in skipping word id: [None, 0, 0, 1, 3, 3, 3, 3, 5, 7, 9, 9, 9, 9, + # 9, None] # predictions: [0, 9, 9, 0, 8, 9, 8, 8, 9, 0, 13, 13, 13, 13, 13, 4] if prev_word_id is not None: for i in range(word_id - (prev_word_id + 1)): @@ -111,21 +114,25 @@ def _get_word_level_prediction(word_ids: List[Optional[int]], predictions: List[ @staticmethod def _aggregate_token_level_prediction(input: str, spans, label_ids: List[int]) -> BibEntryPredictionWithSpan: - citation_number = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, - BibEntryLabel.CITATION_NUMBER) + citation_number = BibEntryPredictor._extract_first_contiguous_label_group_token_level( + input, spans, label_ids, BibEntryLabel.CITATION_NUMBER) authors = BibEntryPredictor._extract_author_token(input, spans, label_ids) - title = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, BibEntryLabel.TITLE) + title = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, + BibEntryLabel.TITLE) journal = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, BibEntryLabel.JOURNAL) - event = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, BibEntryLabel.EVENT) + event = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, + BibEntryLabel.EVENT) journal_venue_or_event = journal if journal else event year = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, BibEntryLabel.ISSUED_YEAR) - doi = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, BibEntryLabel.DOI) - url = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, BibEntryLabel.URL) + doi = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, + BibEntryLabel.DOI) + url = BibEntryPredictor._extract_first_contiguous_label_group_token_level(input, spans, label_ids, + BibEntryLabel.URL) return BibEntryPredictionWithSpan( citation_number=citation_number, @@ -148,12 +155,14 @@ def _extract_author_token(input: str, spans, label_ids: List[int]) -> Optional[L author_span = spans[word_index] # Middle of current author elif ( - label_id == BibEntryLabel.AUTHOR_START.value or label_id == BibEntryLabel.AUTHOR_MIDDLE.value or label_id == BibEntryLabel.AUTHOR_END.value) and author_span: + label_id == BibEntryLabel.AUTHOR_START.value or label_id == BibEntryLabel.AUTHOR_MIDDLE.value or + label_id == BibEntryLabel.AUTHOR_END.value) and author_span: current_span = spans[word_index] author_span = author_span._replace(end=current_span.end) # End of current author. Close current author span and reset. elif ( - label_id != BibEntryLabel.AUTHOR_START.value and label_id != BibEntryLabel.AUTHOR_MIDDLE.value and label_id != BibEntryLabel.AUTHOR_END.value) and author_span: + label_id != BibEntryLabel.AUTHOR_START.value and label_id != BibEntryLabel.AUTHOR_MIDDLE.value and + label_id != BibEntryLabel.AUTHOR_END.value) and author_span: res.append(StringWithSpan( content=input[author_span.start:author_span.end], start=author_span.start, @@ -197,7 +206,7 @@ def _extract_first_contiguous_label_group_token_level( @staticmethod def _clean_str(s: str) -> Optional[str]: without_diacritics = unidecode(s.strip()) - subbed = re.sub("-\s+", "", without_diacritics) + subbed = re.sub(r'-\s+', '', without_diacritics) if subbed: return subbed else: @@ -207,6 +216,6 @@ def _clean_str(s: str) -> Optional[str]: def _clean_doi(doi: str) -> Optional[str]: lower_trimmed = doi.strip().lower() if lower_trimmed.startswith("10."): - return re.sub("\s", "", lower_trimmed) + return re.sub(r'\s', '', lower_trimmed) else: return None diff --git a/mmda/predictors/hf_predictors/bibentry_predictor/utils.py b/mmda/predictors/hf_predictors/bibentry_predictor/utils.py index e847bfeb..317068c1 100644 --- a/mmda/predictors/hf_predictors/bibentry_predictor/utils.py +++ b/mmda/predictors/hf_predictors/bibentry_predictor/utils.py @@ -3,7 +3,8 @@ from mmda.types.annotation import SpanGroup from mmda.types.document import Document from mmda.types.span import Span -from mmda.predictors.hf_predictors.bibentry_predictor.types import BibEntryPredictionWithSpan, BibEntryStructureSpanGroups +from mmda.predictors.hf_predictors.bibentry_predictor.types import (BibEntryPredictionWithSpan, + BibEntryStructureSpanGroups) _SPAN_JOINER = " " @@ -83,4 +84,3 @@ def map_raw_span(target, raw_span): map_raw_span(prediction.bib_entry_url, raw_pred.url) return prediction - diff --git a/mmda/predictors/hf_predictors/mention_predictor.py b/mmda/predictors/hf_predictors/mention_predictor.py index 169a2ec5..0bfd0e6d 100644 --- a/mmda/predictors/hf_predictors/mention_predictor.py +++ b/mmda/predictors/hf_predictors/mention_predictor.py @@ -1,6 +1,5 @@ import itertools import os.path -import string from typing import Dict, Iterator, List, Optional from optimum.onnxruntime import ORTModelForTokenClassification @@ -81,7 +80,8 @@ def predict_page(self, page: Annotation, counter: Iterator[int], print_warnings: ret = [] words: List[str] = ["".join(token.symbols) for token in page.tokens] - word_spans: List[List[Span]] = [[Span.from_json(span_dict=span.to_json()) for span in token.spans] for token in page.tokens] + word_spans: List[List[Span]] = [[Span.from_json(span_dict=span.to_json()) + for span in token.spans] for token in page.tokens] inputs = self.tokenizer( [words], diff --git a/mmda/predictors/hf_predictors/span_group_classification_predictor.py b/mmda/predictors/hf_predictors/span_group_classification_predictor.py index dcc9957d..77d2a6b3 100644 --- a/mmda/predictors/hf_predictors/span_group_classification_predictor.py +++ b/mmda/predictors/hf_predictors/span_group_classification_predictor.py @@ -4,7 +4,7 @@ """ -from typing import List, Any, Tuple, Optional, Sequence +from typing import List, Any, Tuple, Optional from collections import defaultdict import numpy as np @@ -20,7 +20,7 @@ ) from mmda.types.metadata import Metadata -from mmda.types.annotation import Annotation, Span, SpanGroup +from mmda.types.annotation import Annotation, SpanGroup from mmda.types.document import Document from mmda.predictors.hf_predictors.base_hf_predictor import BaseHFPredictor @@ -34,11 +34,11 @@ def __init__( context_id: List[int] ): assert len(input_ids) == len(attention_mask) == len(span_group_ids) == len(context_id), \ - f"Inputs to batch arent same length" + "Inputs to batch arent same length" self.batch_size = len(input_ids) assert [len(example) for example in input_ids] == \ [len(example) for example in attention_mask] == \ - [len(example) for example in span_group_ids], f"Examples in batch arent same length" + [len(example) for example in span_group_ids], "Examples in batch arent same length" self.input_ids = input_ids self.attention_mask = attention_mask self.span_group_ids = span_group_ids @@ -147,9 +147,9 @@ def __init__( device=device ) # combining everything - self.preprocess_mapper = self.tokenizer_mapper >> \ - self.unpacking_mapper >> \ - self.batch_size_mapper + self.preprocess_mapper = (self.tokenizer_mapper >> + self.unpacking_mapper >> + self.batch_size_mapper) @classmethod def from_pretrained( diff --git a/mmda/predictors/hf_predictors/token_classification_predictor.py b/mmda/predictors/hf_predictors/token_classification_predictor.py index 4e37ca91..31b27d97 100644 --- a/mmda/predictors/hf_predictors/token_classification_predictor.py +++ b/mmda/predictors/hf_predictors/token_classification_predictor.py @@ -1,4 +1,4 @@ -from typing import List, Union, Dict, Any, Tuple, Optional, Sequence +from typing import List, Dict, Optional from abc import abstractmethod from tqdm import tqdm @@ -9,7 +9,6 @@ ) -from mmda.types.names import * from mmda.types.annotation import Annotation, Span, SpanGroup from mmda.types.document import Document from mmda.types.metadata import Metadata @@ -18,6 +17,7 @@ convert_sequence_tagging_to_spans, ) from mmda.predictors.hf_predictors.base_hf_predictor import BaseHFPredictor +from mmda.types.names import Blocks, Rows, Pages, Tokens class BaseSinglePageTokenClassificationPredictor(BaseHFPredictor): @@ -80,7 +80,8 @@ def predict( ) assert len(model_predictions) == len( - page.tokens), f"Model predictions and tokens are not the same length ({len(model_predictions)} != {len(page.tokens)}) for page {page_id}" + page.tokens), (f"Model predictions and tokens are not the same length" + f"({len(model_predictions)} != {len(page.tokens)}) for page {page_id}") page_prediction_results.extend( self.postprocess(page, model_predictions) @@ -142,4 +143,3 @@ def REQUIRED_DOCUMENT_FIELDS(self) -> List: elif self.predictor.preprocessor.config.agg_level == "block": base_reqs.append(Blocks) return base_reqs - diff --git a/mmda/predictors/hf_predictors/utils.py b/mmda/predictors/hf_predictors/utils.py index e0ecd0c0..7c80f339 100644 --- a/mmda/predictors/hf_predictors/utils.py +++ b/mmda/predictors/hf_predictors/utils.py @@ -3,7 +3,7 @@ import itertools from mmda.types.document import Document -from mmda.types.names import * +from mmda.types.names import Rows, Blocks def normalize_bbox( @@ -82,14 +82,14 @@ def convert_document_page_to_pdf_dict( ( token.symbols[0], # words token.spans[0] - .box.get_absolute(page_width=page_width, page_height=page_height) - .coordinates, # bbox + .box.get_absolute(page_width=page_width, page_height=page_height) + .coordinates, # bbox get_visual_group_id(token, Rows, -1), # line_ids get_visual_group_id(token, Blocks, -1) # block_ids ) for token in document.tokens ] - words, bbox, line_ids, block_ids = (list(l) for l in zip(*token_data)) + words, bbox, line_ids, block_ids = (list(line) for line in zip(*token_data)) line_ids = shift_index_sequence_to_zero_start(line_ids) block_ids = shift_index_sequence_to_zero_start(block_ids) diff --git a/mmda/predictors/hf_predictors/vila_predictor.py b/mmda/predictors/hf_predictors/vila_predictor.py index a8b78fa1..4561154b 100644 --- a/mmda/predictors/hf_predictors/vila_predictor.py +++ b/mmda/predictors/hf_predictors/vila_predictor.py @@ -2,7 +2,7 @@ # https://github.com/allenai/VILA/blob/dd242d2fcbc5fdcf05013174acadb2dc896a28c3/src/vila/predictors.py#L1 # to reduce the dependency on the VILA package. -from typing import List, Union, Dict, Any, Tuple +from typing import List, Union, Dict, Any from abc import abstractmethod from dataclasses import dataclass import inspect @@ -17,7 +17,6 @@ ) from vila.dataset.preprocessors import instantiate_dataset_preprocessor -from mmda.types.names import * from mmda.types.annotation import Annotation, Span, SpanGroup from mmda.types.metadata import Metadata from mmda.types.document import Document @@ -31,6 +30,8 @@ # Two constants for the constraining the size of the page for # inputs to the model. # TODO: Move this to somewhere else. +from mmda.types.names import Pages, Tokens, Rows + MAX_PAGE_WIDTH = 1000 MAX_PAGE_HEIGHT = 1000 @@ -48,8 +49,8 @@ class VILAPreprocessorConfig: group_bbox_agg: str = "first" added_special_sepration_token: str = "[SEP]" - # This is introduced to support the updates in the - # vila 0.4.0 which fixes the typo. + # This is introduced to support the updates in the + # vila 0.4.0 which fixes the typo. @property def added_special_separation_token(self): return self.added_special_sepration_token @@ -197,7 +198,7 @@ def predict(self, document: Document) -> List[Annotation]: return page_prediction_results ############################################ - ###### Some other auxiliary functions ###### + # Some other auxiliary functions ########### ############################################ def get_category_prediction(self, model_outputs): @@ -226,7 +227,7 @@ def get_true_token_level_category_prediction( encoded_labels = model_inputs["labels"] true_predictions = [ - [(p, l) for (p, l) in zip(prediction, label) if l != -100] + [(pred, label) for (pred, label) in zip(prediction, label) if label != -100] for prediction, label in zip(model_predictions, encoded_labels) ] @@ -298,7 +299,7 @@ def get_true_token_level_category_prediction( encoded_labels = model_inputs["labels"] true_predictions = [ - [(p, l) for (p, l) in zip(prediction, label) if l != -100] + [(pred, label) for (pred, label) in zip(prediction, label) if label != -100] for prediction, label in zip(model_predictions, encoded_labels) ] diff --git a/mmda/predictors/lp_predictors.py b/mmda/predictors/lp_predictors.py index 878fb7be..9a50bcdd 100644 --- a/mmda/predictors/lp_predictors.py +++ b/mmda/predictors/lp_predictors.py @@ -1,11 +1,12 @@ -from typing import Union, List, Dict, Any, Optional +from typing import List, Dict, Optional +from PIL.Image import Image from tqdm import tqdm import layoutparser as lp from mmda.types import Document, Box, BoxGroup, Metadata -from mmda.types.names import * from mmda.predictors.base_predictors.base_predictor import BasePredictor +from mmda.types.names import Pages, Images class LayoutParserPredictor(BasePredictor): @@ -41,25 +42,25 @@ def from_pretrained( return cls(model) - def postprocess(self, - model_outputs: lp.Layout, - page_index: int, - image: "PIL.Image") -> List[BoxGroup]: + def postprocess(self, + model_outputs: lp.Layout, + page_index: int, + image: Image) -> List[BoxGroup]: """Convert the model outputs into the mmda format Args: - model_outputs (lp.Layout): - The layout detection results from layoutparser for + model_outputs (lp.Layout): + The layout detection results from layoutparser for a page image - page_index (int): - The index of the current page, used for creating the + page_index (int): + The index of the current page, used for creating the `Box` object - image (PIL.Image): + image (PIL.Image): The image of the current page, used for converting to relative coordinates for the box objects Returns: - List[BoxGroup]: + List[BoxGroup]: The detected layout stored in the BoxGroup format. """ @@ -90,11 +91,11 @@ def predict(self, document: Document) -> List[BoxGroup]: """Returns a list of Boxgroups for the detected layouts for all pages Args: - document (Document): - The input document object + document (Document): + The input document object Returns: - List[BoxGroup]: + List[BoxGroup]: The returned Boxgroups for the detected layouts for all pages """ document_prediction = [] @@ -105,4 +106,4 @@ def predict(self, document: Document) -> List[BoxGroup]: self.postprocess(model_outputs, image_index, image) ) - return document_prediction \ No newline at end of file + return document_prediction diff --git a/mmda/predictors/xgb_predictors/citation_link_predictor.py b/mmda/predictors/xgb_predictors/citation_link_predictor.py index d3490079..5f36a0c0 100644 --- a/mmda/predictors/xgb_predictors/citation_link_predictor.py +++ b/mmda/predictors/xgb_predictors/citation_link_predictor.py @@ -1,39 +1,37 @@ import numpy as np import os -import pandas as pd -from typing import List, Dict, Tuple +from typing import List, Tuple import xgboost as xgb from mmda.types.document import Document from mmda.featurizers.citation_link_featurizers import CitationLink, featurize + class CitationLinkPredictor: def __init__(self, artifacts_dir: str): full_model_path = os.path.join(artifacts_dir, "links_v0.json") model = xgb.XGBClassifier() model.load_model(full_model_path) self.model = model - + # returns a paired mention id and bib id to represent a link def predict(self, doc: Document) -> List[Tuple[str, str]]: predicted_links = [] - + # iterate over mentions for mention in doc.mentions: - # create all possible links for this mention - possible_links = [] + # create all possible links for this mention + possible_links = [] for bib in doc.bibs: - link = CitationLink(mention = mention, bib = bib) + link = CitationLink(mention=mention, bib=bib) possible_links.append(link) - + # featurize and find link with highest score X_instances = featurize(possible_links) y_pred = self.model.predict_proba(X_instances) - match_scores = [pred[1] for pred in y_pred] # probability that label is 1 + match_scores = [pred[1] for pred in y_pred] # probability that label is 1 match_index = np.argmax(match_scores) selected_link = possible_links[match_index] predicted_links.append((selected_link.mention.id, selected_link.bib.id)) - - return predicted_links - + return predicted_links diff --git a/mmda/rasterizers/__init__.py b/mmda/rasterizers/__init__.py index 00a46b50..921e4887 100644 --- a/mmda/rasterizers/__init__.py +++ b/mmda/rasterizers/__init__.py @@ -2,4 +2,4 @@ __all__ = [ 'PDF2ImageRasterizer' -] \ No newline at end of file +] diff --git a/mmda/rasterizers/rasterizer.py b/mmda/rasterizers/rasterizer.py index f5c4f25c..9c273443 100644 --- a/mmda/rasterizers/rasterizer.py +++ b/mmda/rasterizers/rasterizer.py @@ -23,6 +23,7 @@ def rasterize(self, input_pdf_path: str, dpi: int, **kwargs) -> Iterable[PILImag """ raise NotImplementedError + class PDF2ImageRasterizer(Rasterizer): def rasterize(self, input_pdf_path: str, dpi: int, **kwargs) -> Iterable[PILImage]: images = pdf2image.convert_from_path(pdf_path=input_pdf_path, dpi=dpi) diff --git a/mmda/types/__init__.py b/mmda/types/__init__.py index d0f3929c..eafbeaa9 100644 --- a/mmda/types/__init__.py +++ b/mmda/types/__init__.py @@ -13,4 +13,4 @@ 'Box', 'PILImage', 'Metadata' -] \ No newline at end of file +] diff --git a/mmda/types/annotation.py b/mmda/types/annotation.py index 4857df5c..acae8d32 100644 --- a/mmda/types/annotation.py +++ b/mmda/types/annotation.py @@ -22,7 +22,6 @@ __all__ = ["Annotation", "BoxGroup", "SpanGroup", "Relation"] - def warn_deepcopy_of_annotation(obj: "Annotation") -> None: """Warns when a deepcopy is performed on an Annotation.""" @@ -34,7 +33,6 @@ def warn_deepcopy_of_annotation(obj: "Annotation") -> None: warnings.warn(msg, UserWarning, stacklevel=2) - class Annotation: """Annotation is intended for storing model predictions for a document.""" @@ -77,7 +75,6 @@ def __getattr__(self, field: str) -> List["Annotation"]: return self.__getattribute__(field) - class BoxGroup(Annotation): def __init__( self, @@ -284,6 +281,5 @@ def text(self, text: Union[str, None]) -> None: self.metadata.text = text - class Relation(Annotation): - pass \ No newline at end of file + pass diff --git a/mmda/types/box.py b/mmda/types/box.py index e00d89f1..31078040 100644 --- a/mmda/types/box.py +++ b/mmda/types/box.py @@ -66,7 +66,8 @@ def from_pdf_coordinates( _y2 = _y1 if (_x1, _y1, _x2, _y2) != (x1, y1, x2, y2): warnings.warn( - f"The coordinates ({x1}, {y1}, {x2}, {y2}) are not valid and converted to ({_x1}, {_y1}, {_x2}, {_y2})." + f"The coordinates ({x1}, {y1}, {x2}, {y2}) are not valid and converted to" + f"({_x1}, {_y1}, {_x2}, {_y2})." ) return cls(_x1, _y1, _x2 - _x1, _y2 - _y1, page) diff --git a/mmda/types/document.py b/mmda/types/document.py index cbd00655..4674a302 100644 --- a/mmda/types/document.py +++ b/mmda/types/document.py @@ -6,7 +6,6 @@ import itertools import warnings -from copy import deepcopy from typing import Dict, Iterable, List, Optional from mmda.types.annotation import Annotation, BoxGroup, SpanGroup @@ -35,7 +34,7 @@ def fields(self) -> List[str]: def find_overlapping(self, query: Annotation, field_name: str) -> List[Annotation]: if not isinstance(query, SpanGroup): raise NotImplementedError( - f"Currently only supports query of type SpanGroup" + "Currently only supports query of type SpanGroup" ) return self.__indexers[field_name].find(query=query) diff --git a/mmda/types/image.py b/mmda/types/image.py index 90bb9f57..97110c64 100644 --- a/mmda/types/image.py +++ b/mmda/types/image.py @@ -27,6 +27,6 @@ def frombase64(img_str): return img -PILImage.tobase64 = tobase64 # This is the method applied to individual Image classes -PILImage.to_json = tobase64 # Use the same API as the others -PILImage.frombase64 = frombase64 # This is bind to the module, used for loading the images +PILImage.tobase64 = tobase64 # This is the method applied to individual Image classes +PILImage.to_json = tobase64 # Use the same API as the others +PILImage.frombase64 = frombase64 # This is bind to the module, used for loading the images diff --git a/mmda/types/indexers.py b/mmda/types/indexers.py index beb12b1f..424f5e2b 100644 --- a/mmda/types/indexers.py +++ b/mmda/types/indexers.py @@ -7,7 +7,7 @@ from typing import List from abc import abstractmethod -from dataclasses import dataclass, field +from dataclasses import dataclass from mmda.types.annotation import SpanGroup, Annotation from ncls import NCLS @@ -76,7 +76,7 @@ def _ensure_disjoint(self) -> None: def find(self, query: SpanGroup) -> List[SpanGroup]: if not isinstance(query, SpanGroup): - raise ValueError(f'SpanGroupIndexer only works with `query` that is SpanGroup type') + raise ValueError('SpanGroupIndexer only works with `query` that is SpanGroup type') if not query.spans: return [] @@ -93,5 +93,3 @@ def find(self, query: SpanGroup) -> List[SpanGroup]: # TODO: provide option to return matched span groups in same order as self._sgs # (the span groups the index was built with originally) return sorted(list(matched_span_groups)) - - diff --git a/mmda/types/old/annotations.old.py b/mmda/types/old/annotations.old.py index fbe069f7..e3ac1f56 100644 --- a/mmda/types/old/annotations.old.py +++ b/mmda/types/old/annotations.old.py @@ -10,7 +10,6 @@ import json - from mmda.types.span import Span from mmda.types.boundingbox import BoundingBox @@ -23,7 +22,6 @@ def __repr__(self): return json.dumps(self.to_json()) - class SpanAnnotation(Annotation): def __init__(self, span: Span, label: str): self.span = span @@ -45,7 +43,6 @@ def to_json(self): return {'bbox': self.bbox.to_json(), 'label': self.label} - if __name__ == '__main__': # In this example, we construct a sequence tagger training dataset using these classes. diff --git a/mmda/types/old/boundingbox.old.py b/mmda/types/old/boundingbox.old.py index 102c28d1..2b0b928f 100644 --- a/mmda/types/old/boundingbox.old.py +++ b/mmda/types/old/boundingbox.old.py @@ -10,6 +10,7 @@ import json + class BoundingBox: def __init__(self, l: float, t: float, w: float, h: float, page: int): """Assumes x=0.0 and y=0.0 is the top-left of the page, and @@ -19,9 +20,9 @@ def __init__(self, l: float, t: float, w: float, h: float, page: int): if t < 0.0 or t > 1.0: raise ValueError(f't={t} is not within 0.0~1.0') if l + w < 0.0 or l + w > 1.0: - raise ValueError(f'l+w={l+w} is not within 0.0~1.0') + raise ValueError(f'l+w={l + w} is not within 0.0~1.0') if t + h < 0.0 or t + h > 1.0: - raise ValueError(f't+h={t+h} is not within 0.0~1.0') + raise ValueError(f't+h={t + h} is not within 0.0~1.0') self.l = l self.t = t self.w = w diff --git a/mmda/types/old/document_elements.py b/mmda/types/old/document_elements.py index 80c50a58..49b207ef 100644 --- a/mmda/types/old/document_elements.py +++ b/mmda/types/old/document_elements.py @@ -6,11 +6,12 @@ # TODO[kylel] not sure this class needs to exist; seems extra boilerplate for no benefit - -from typing import List, Optional, Dict, Tuple, Type +from typing import List, Optional, Dict from abc import abstractmethod from dataclasses import dataclass, field +from mmda.types import Document +from mmda.types.annotation import Annotation @dataclass @@ -27,7 +28,7 @@ def to_json(self) -> Dict: # TODO: unclear if should be `annotations` or `annotation` @abstractmethod @classmethod - def load(cls, field_name: str, annotations: List["Annotation"], document: Optional["Document"] = None): + def load(cls, field_name: str, annotations: List[Annotation], document: Optional[Document] = None): pass diff --git a/mmda/utils/tools.py b/mmda/utils/tools.py index f0b37107..12d7a0a6 100644 --- a/mmda/utils/tools.py +++ b/mmda/utils/tools.py @@ -1,8 +1,8 @@ -from typing import List, Union, Dict, Any, Tuple, Optional from collections import defaultdict +from typing import List, Tuple -from mmda.types.span import Span from mmda.types.box import Box +from mmda.types.span import Span def allocate_overlapping_tokens_for_box( @@ -134,4 +134,3 @@ def build_merged_spans_from_connected_components(self, index): merged_spans.append(Span(start=min([span.start for span in nodes_in_comp[comp]]), end=max([span.end for span in nodes_in_comp[comp]]), box=merged_box)) return merged_spans - diff --git a/setup.cfg b/setup.cfg index f805d280..1c9e7f22 100644 --- a/setup.cfg +++ b/setup.cfg @@ -10,3 +10,17 @@ omit = tests/fixtures/* [coverage:report] fail_under = 57 + +[flake8] +exclude = + tests + ai2_internal + build + examples + setup.py + mmda/types/old/document.old.py + mmda/types/old/image.old.py + mmda/types/old/boundingbox.old.py +per-file-ignores = + +max-line-length = 119 \ No newline at end of file diff --git a/setup.py b/setup.py index 7289cb3d..8b21d552 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,7 @@ 'pytest', 'pytest-xdist', 'pytest-cov', + 'flake8', ] setup(