tweag · Nour-Mws · May 21, 2024 · May 21, 2024 · May 21, 2024 · May 21, 2024
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -0,0 +1,31 @@
+name: CI
+
+on: [push, pull_request]
+
+
+jobs:
+  ci:
+    strategy:
+      matrix:
+        os: [ubuntu-22.04]
+        python-version: [3.11]
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - uses: Gr1N/setup-poetry@v8
+        with:
+          poetry-version: "1.6.1"
+      - name: Install dependencies
+        run: poetry install
+      - name: Run isort
+        run: poetry run isort work_daigest/ --check --diff
+      - name: Run black
+        run: poetry run black . --check --diff
+      - name: Run ruff
+        run: poetry run ruff .
+      - name: Run fawltydeps
+        run: poetry run fawltydeps
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,11 +18,12 @@ packages = [
 [tool.poetry.dependencies]
 python = "^3.11"
 ics = "^0.7.2"
-setuptools = "^68.2.2"
 requests = "^2.31.0"
 boto3 = "^1.34.37"
+botocore = "^1.34.99"
 pytz = "^2024.1"
 streamlit = "^1.30.0"
+python-dateutil = "^2.9.0.post0"
 
 [tool.poetry.scripts]
 work-daigest = "work_daigest:main.main"

diff --git a/work_daigest/__main__.py b/work_daigest/__main__.py
diff --git a/work_daigest/bedrock.py b/work_daigest/bedrock.py
@@ -9,15 +9,20 @@
 
 def list_models(client, pattern: str):
     response = client.list_foundation_models()
-    return [model['modelId'] for model in response['modelSummaries']
-            if pattern in model['modelId'] and 'TEXT' in model['outputModalities']]
+    return [
+        model["modelId"]
+        for model in response["modelSummaries"]
+        if pattern in model["modelId"] and "TEXT" in model["outputModalities"]
+    ]
 
 
 def init_client(service_name: str, region_name: str):
     return boto3.client(service_name, region_name=region_name)
 
 
-def invoke_jurassic2(client, prompt: str, model_id: str = "ai21.j2-jumbo-instruct") -> str:
+def invoke_jurassic2(
+    client, prompt: str, model_id: str = "ai21.j2-jumbo-instruct"
+) -> str:
     """
     Invokes the AI21 Labs Jurassic-2 large-language model to run an inference
     using the input provided in the request body.
@@ -41,9 +46,7 @@ def invoke_jurassic2(client, prompt: str, model_id: str = "ai21.j2-jumbo-instruc
             "maxTokens": 200,
         }
 
-        response = client.invoke_model(
-            modelId=model_id, body=json.dumps(body)
-        )
+        response = client.invoke_model(modelId=model_id, body=json.dumps(body))
 
         response_body = json.loads(response["body"].read())
         completion = response_body["completions"][0]["data"]["text"]
@@ -55,7 +58,9 @@ def invoke_jurassic2(client, prompt: str, model_id: str = "ai21.j2-jumbo-instruc
         raise
 
 
-def invoke_llama2(client, prompt: str, model_id: str = "meta.llama2-70b-chat-v1") -> str:
+def invoke_llama2(
+    client, prompt: str, model_id: str = "meta.llama2-70b-chat-v1"
+) -> str:
     """
     Invokes the Meta Llama 2 large-language model to run an inference
     using the input provided in the request body.
@@ -72,9 +77,7 @@ def invoke_llama2(client, prompt: str, model_id: str = "meta.llama2-70b-chat-v1"
             "max_gen_len": 1000,
         }
 
-        response = client.invoke_model(
-            modelId=model_id, body=json.dumps(body)
-        )
+        response = client.invoke_model(modelId=model_id, body=json.dumps(body))
 
         response_body = json.loads(response["body"].read())
         completion = response_body["generation"]
@@ -85,7 +88,10 @@ def invoke_llama2(client, prompt: str, model_id: str = "meta.llama2-70b-chat-v1"
         logger.error("Couldn't invoke Llama 2")
         raise
 
-def invoke_claude3(client, prompt: str, model_id: str = "anthropic.claude-3-sonnet-20240229-v1:0") -> str:
+
+def invoke_claude3(
+    client, prompt: str, model_id: str = "anthropic.claude-3-sonnet-20240229-v1:0"
+) -> str:
     """
     Invokes the Anthropics Claude-3 large-language model to run an inference
     using the input provided in the request body.
@@ -103,20 +109,10 @@ def invoke_claude3(client, prompt: str, model_id: str = "anthropic.claude-3-sonn
             "temperature": 0.3,
             "top_p": 0.3,
             "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "text",
-                            "text": prompt
-                        }
-                    ]
-                }
-            ]
+                {"role": "user", "content": [{"type": "text", "text": prompt}]}
+            ],
         }
-        response = client.invoke_model(
-            modelId=model_id, body=json.dumps(body)
-        )
+        response = client.invoke_model(modelId=model_id, body=json.dumps(body))
         response_body = json.loads(response["body"].read())
         completion = response_body["content"][0]["text"]
 
@@ -126,7 +122,8 @@ def invoke_claude3(client, prompt: str, model_id: str = "anthropic.claude-3-sonn
         logger.error("Couldn't invoke Claude-3")
         raise e
 
-if __name__ == '__main__':
-    client = init_client('bedrock', 'us-east-1')
-    for a in list_models(client, ''):
+
+if __name__ == "__main__":
+    client = init_client("bedrock", "us-east-1")
+    for a in list_models(client, ""):
         print(a)
diff --git a/work_daigest/fetchers/github.py b/work_daigest/fetchers/github.py
@@ -11,7 +11,10 @@
 CommentText = NewType("CommentText", str)
 RepositoryName = NewType("RepositoryName", str)
 CommentType = NewType("CommentType", str)
-Action = Literal["created", "updated", "closed", "reopened", "merged", "commented", "committed"]
+Action = Literal[
+    "created", "updated", "closed", "reopened", "merged", "commented", "committed"
+]
+
 
 @dataclass
 class GitHubComment:
@@ -27,6 +30,7 @@ def to_github_datetime_format(dt: datetime.datetime) -> str:
     """
     return dt.isoformat()[:19] + "Z"
 
+
 BASE_URL = "https://api.github.com/search"
 
 HEADERS = {
@@ -36,6 +40,7 @@ def to_github_datetime_format(dt: datetime.datetime) -> str:
     print("Github token found, using it to authenticate")
     HEADERS["Authorization"] = f"token {token}"
 
+
 def extract_next_page_link_from_header(link_header: str) -> str | None:
     """
     Extract the URL of the next page of results from the "Link" header
@@ -53,6 +58,7 @@ def extract_next_page_link_from_header(link_header: str) -> str | None:
             return url.lstrip("<").rstrip(">")
     return None
 
+
 def send_query(url: str, query: str) -> list[dict]:
     """
     Send a query to the GitHub API and return the `items` field of the response
@@ -78,6 +84,7 @@ def send_query(url: str, query: str) -> list[dict]:
 
     return items
 
+
 def get_latest_action(comment_json: dict) -> (str, str):
     min_date = "1970-01-01T00:00:00Z"
     created = ("created", comment_json.get("created_at") or min_date)
@@ -88,13 +95,17 @@ def get_latest_action(comment_json: dict) -> (str, str):
     return actions[-1]
 
 
-def fetch_issues(handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime) -> list[GitHubComment]:
+def fetch_issues(
+    handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime
+) -> list[GitHubComment]:
     """
     Fetch all GitHub issues authored by user `handle`
     """
     # TODO: could also try to use "updated_at" or "closed_at" fields
     datetime_filter = f"created:{to_github_datetime_format(lower_date)}..{to_github_datetime_format(upper_date)}"
-    response_items = send_query(f"{BASE_URL}/issues", f"is:issue+author:{handle}+{datetime_filter}")
+    response_items = send_query(
+        f"{BASE_URL}/issues", f"is:issue+author:{handle}+{datetime_filter}"
+    )
     all_comments = []
     for comment_json in response_items:
         latest_action, date = get_latest_action(comment_json)
@@ -104,19 +115,26 @@ def fetch_issues(handle: str, lower_date: datetime.datetime, upper_date: datetim
                 CommentText(comment_json["body"]),
                 # example repo URL: https://api.github.com/repos/tweag/chainsail
                 # so we use "tweag/chainsail" as human-readable repo identifier
-                RepositoryName("/".join(comment_json["repository_url"].split("/")[-2:])),
-                latest_action
+                RepositoryName(
+                    "/".join(comment_json["repository_url"].split("/")[-2:])
+                ),
+                latest_action,
             )
         )
     return all_comments
 
-def fetch_prs(handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime) -> list[GitHubComment]:
+
+def fetch_prs(
+    handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime
+) -> list[GitHubComment]:
     """
     Fetch all GitHub pull requests authored by user `handle`
     """
     # TODO: could also try to use "updated_at" or "closed_at" fields
     datetime_filter = f"created:{to_github_datetime_format(lower_date)}..{to_github_datetime_format(upper_date)}"
-    response_items = send_query(f"{BASE_URL}/issues", f"is:pull-request+author:{handle}+{datetime_filter}")
+    response_items = send_query(
+        f"{BASE_URL}/issues", f"is:pull-request+author:{handle}+{datetime_filter}"
+    )
     all_comments = []
     for comment_json in response_items:
         latest_action, date = get_latest_action(comment_json)
@@ -126,30 +144,39 @@ def fetch_prs(handle: str, lower_date: datetime.datetime, upper_date: datetime.d
                 CommentText(comment_json["body"]),
                 # example repo URL: https://api.github.com/repos/tweag/chainsail
                 # so we use "tweag/chainsail" as human-readable repo identifier
-                RepositoryName("/".join(comment_json["repository_url"].split("/")[-2:])),
-                latest_action
+                RepositoryName(
+                    "/".join(comment_json["repository_url"].split("/")[-2:])
+                ),
+                latest_action,
             )
         )
     return all_comments
 
-def fetch_commits(handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime) -> list[GitHubComment]:
+
+def fetch_commits(
+    handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime
+) -> list[GitHubComment]:
     """
     Fetch all GitHub commits authored by user `handle`
     """
     datetime_filter = f"author-date:{to_github_datetime_format(lower_date)}..{to_github_datetime_format(upper_date)}"
-    response_items = send_query(f"{BASE_URL}/commits", f"author:{handle}+committer:{handle}+{datetime_filter}")
+    response_items = send_query(
+        f"{BASE_URL}/commits", f"author:{handle}+committer:{handle}+{datetime_filter}"
+    )
     return [
         GitHubComment(
             dateutil.parser.parse(comment_json["commit"]["author"]["date"]),
             CommentText(comment_json["commit"]["message"]),
             RepositoryName(comment_json["repository"]["full_name"]),
-            "committed"
+            "committed",
         )
         for comment_json in response_items
     ]
 
 
-def fetch_comments(handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime) -> list[GitHubComment]:
+def fetch_comments(
+    handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime
+) -> list[GitHubComment]:
     """
     Fetch all GitHub comments authored by user `handle`
     """
@@ -159,6 +186,7 @@ def fetch_comments(handle: str, lower_date: datetime.datetime, upper_date: datet
     all_comments.extend(fetch_commits(handle, lower_date, upper_date))
     return all_comments
 
+
 if __name__ == "__main__":
     lower_date = datetime.datetime.now() - datetime.timedelta(days=7)
     upper_date = datetime.datetime.now()

diff --git a/work_daigest/fetchers/google_calendar.py b/work_daigest/fetchers/google_calendar.py
@@ -7,14 +7,16 @@
 def remove_text_pattern(description):
     pattern = r"-::~:~::~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~::~:~::-[\s\S]+-::~:~::~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~::~:~::-"
     # remove the pattern from the description
-    return re.sub(pattern, '', description)
+    return re.sub(pattern, "", description)
 
 
 def filter_events(calendar: Calendar, start: datetime, end: datetime, email):
     events = calendar.events
     events = [e for e in events if e.begin >= start and e.end <= end]
     all_events = []
-    is_valid_attendee = lambda att: att.email == email and (att.partstat not in ("DECLINED", "NEEDS-ACTION"))
+    is_valid_attendee = lambda att: att.email == email and (
+        att.partstat not in ("DECLINED", "NEEDS-ACTION")
+    )
 
     for e in events:
         event_text = []