diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..2b9f0d7 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,31 @@ +name: CI + +on: [push, pull_request] + + +jobs: + ci: + strategy: + matrix: + os: [ubuntu-22.04] + python-version: [3.11] + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - uses: Gr1N/setup-poetry@v8 + with: + poetry-version: "1.6.1" + - name: Install dependencies + run: poetry install + - name: Run isort + run: poetry run isort work_daigest/ --check --diff + - name: Run black + run: poetry run black . --check --diff + - name: Run ruff + run: poetry run ruff . + - name: Run fawltydeps + run: poetry run fawltydeps diff --git a/poetry.lock b/poetry.lock index 2c5d8c0..5c21230 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1571,4 +1571,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "d29cd1343f312486894d20046d7c664ee1925d6a40689ec91bdd6018bf0adf6d" +content-hash = "da4560a975b0e887831d5b6ae573592f1ec755c2083eea4e28eb0455f274bf5e" diff --git a/pyproject.toml b/pyproject.toml index 930e593..7a563a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,11 +18,12 @@ packages = [ [tool.poetry.dependencies] python = "^3.11" ics = "^0.7.2" -setuptools = "^68.2.2" requests = "^2.31.0" boto3 = "^1.34.37" +botocore = "^1.34.99" pytz = "^2024.1" streamlit = "^1.30.0" +python-dateutil = "^2.9.0.post0" [tool.poetry.scripts] work-daigest = "work_daigest:main.main" diff --git a/work_daigest/__main__.py b/work_daigest/__main__.py deleted file mode 100644 index 15b6a64..0000000 --- a/work_daigest/__main__.py +++ /dev/null @@ -1 +0,0 @@ -from .main import * diff --git a/work_daigest/bedrock.py b/work_daigest/bedrock.py index 7307819..8fbc143 100644 --- a/work_daigest/bedrock.py +++ b/work_daigest/bedrock.py @@ -9,15 +9,20 @@ def list_models(client, pattern: str): response = client.list_foundation_models() - return [model['modelId'] for model in response['modelSummaries'] - if pattern in model['modelId'] and 'TEXT' in model['outputModalities']] + return [ + model["modelId"] + for model in response["modelSummaries"] + if pattern in model["modelId"] and "TEXT" in model["outputModalities"] + ] def init_client(service_name: str, region_name: str): return boto3.client(service_name, region_name=region_name) -def invoke_jurassic2(client, prompt: str, model_id: str = "ai21.j2-jumbo-instruct") -> str: +def invoke_jurassic2( + client, prompt: str, model_id: str = "ai21.j2-jumbo-instruct" +) -> str: """ Invokes the AI21 Labs Jurassic-2 large-language model to run an inference using the input provided in the request body. @@ -41,9 +46,7 @@ def invoke_jurassic2(client, prompt: str, model_id: str = "ai21.j2-jumbo-instruc "maxTokens": 200, } - response = client.invoke_model( - modelId=model_id, body=json.dumps(body) - ) + response = client.invoke_model(modelId=model_id, body=json.dumps(body)) response_body = json.loads(response["body"].read()) completion = response_body["completions"][0]["data"]["text"] @@ -55,7 +58,9 @@ def invoke_jurassic2(client, prompt: str, model_id: str = "ai21.j2-jumbo-instruc raise -def invoke_llama2(client, prompt: str, model_id: str = "meta.llama2-70b-chat-v1") -> str: +def invoke_llama2( + client, prompt: str, model_id: str = "meta.llama2-70b-chat-v1" +) -> str: """ Invokes the Meta Llama 2 large-language model to run an inference using the input provided in the request body. @@ -72,9 +77,7 @@ def invoke_llama2(client, prompt: str, model_id: str = "meta.llama2-70b-chat-v1" "max_gen_len": 1000, } - response = client.invoke_model( - modelId=model_id, body=json.dumps(body) - ) + response = client.invoke_model(modelId=model_id, body=json.dumps(body)) response_body = json.loads(response["body"].read()) completion = response_body["generation"] @@ -85,7 +88,10 @@ def invoke_llama2(client, prompt: str, model_id: str = "meta.llama2-70b-chat-v1" logger.error("Couldn't invoke Llama 2") raise -def invoke_claude3(client, prompt: str, model_id: str = "anthropic.claude-3-sonnet-20240229-v1:0") -> str: + +def invoke_claude3( + client, prompt: str, model_id: str = "anthropic.claude-3-sonnet-20240229-v1:0" +) -> str: """ Invokes the Anthropics Claude-3 large-language model to run an inference using the input provided in the request body. @@ -103,20 +109,10 @@ def invoke_claude3(client, prompt: str, model_id: str = "anthropic.claude-3-sonn "temperature": 0.3, "top_p": 0.3, "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": prompt - } - ] - } - ] + {"role": "user", "content": [{"type": "text", "text": prompt}]} + ], } - response = client.invoke_model( - modelId=model_id, body=json.dumps(body) - ) + response = client.invoke_model(modelId=model_id, body=json.dumps(body)) response_body = json.loads(response["body"].read()) completion = response_body["content"][0]["text"] @@ -126,7 +122,8 @@ def invoke_claude3(client, prompt: str, model_id: str = "anthropic.claude-3-sonn logger.error("Couldn't invoke Claude-3") raise e -if __name__ == '__main__': - client = init_client('bedrock', 'us-east-1') - for a in list_models(client, ''): + +if __name__ == "__main__": + client = init_client("bedrock", "us-east-1") + for a in list_models(client, ""): print(a) diff --git a/work_daigest/fetchers/github.py b/work_daigest/fetchers/github.py index 09b90af..702799c 100644 --- a/work_daigest/fetchers/github.py +++ b/work_daigest/fetchers/github.py @@ -11,7 +11,10 @@ CommentText = NewType("CommentText", str) RepositoryName = NewType("RepositoryName", str) CommentType = NewType("CommentType", str) -Action = Literal["created", "updated", "closed", "reopened", "merged", "commented", "committed"] +Action = Literal[ + "created", "updated", "closed", "reopened", "merged", "commented", "committed" +] + @dataclass class GitHubComment: @@ -27,6 +30,7 @@ def to_github_datetime_format(dt: datetime.datetime) -> str: """ return dt.isoformat()[:19] + "Z" + BASE_URL = "https://api.github.com/search" HEADERS = { @@ -36,6 +40,7 @@ def to_github_datetime_format(dt: datetime.datetime) -> str: print("Github token found, using it to authenticate") HEADERS["Authorization"] = f"token {token}" + def extract_next_page_link_from_header(link_header: str) -> str | None: """ Extract the URL of the next page of results from the "Link" header @@ -53,6 +58,7 @@ def extract_next_page_link_from_header(link_header: str) -> str | None: return url.lstrip("<").rstrip(">") return None + def send_query(url: str, query: str) -> list[dict]: """ Send a query to the GitHub API and return the `items` field of the response @@ -78,6 +84,7 @@ def send_query(url: str, query: str) -> list[dict]: return items + def get_latest_action(comment_json: dict) -> (str, str): min_date = "1970-01-01T00:00:00Z" created = ("created", comment_json.get("created_at") or min_date) @@ -88,13 +95,17 @@ def get_latest_action(comment_json: dict) -> (str, str): return actions[-1] -def fetch_issues(handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime) -> list[GitHubComment]: +def fetch_issues( + handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime +) -> list[GitHubComment]: """ Fetch all GitHub issues authored by user `handle` """ # TODO: could also try to use "updated_at" or "closed_at" fields datetime_filter = f"created:{to_github_datetime_format(lower_date)}..{to_github_datetime_format(upper_date)}" - response_items = send_query(f"{BASE_URL}/issues", f"is:issue+author:{handle}+{datetime_filter}") + response_items = send_query( + f"{BASE_URL}/issues", f"is:issue+author:{handle}+{datetime_filter}" + ) all_comments = [] for comment_json in response_items: latest_action, date = get_latest_action(comment_json) @@ -104,19 +115,26 @@ def fetch_issues(handle: str, lower_date: datetime.datetime, upper_date: datetim CommentText(comment_json["body"]), # example repo URL: https://api.github.com/repos/tweag/chainsail # so we use "tweag/chainsail" as human-readable repo identifier - RepositoryName("/".join(comment_json["repository_url"].split("/")[-2:])), - latest_action + RepositoryName( + "/".join(comment_json["repository_url"].split("/")[-2:]) + ), + latest_action, ) ) return all_comments -def fetch_prs(handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime) -> list[GitHubComment]: + +def fetch_prs( + handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime +) -> list[GitHubComment]: """ Fetch all GitHub pull requests authored by user `handle` """ # TODO: could also try to use "updated_at" or "closed_at" fields datetime_filter = f"created:{to_github_datetime_format(lower_date)}..{to_github_datetime_format(upper_date)}" - response_items = send_query(f"{BASE_URL}/issues", f"is:pull-request+author:{handle}+{datetime_filter}") + response_items = send_query( + f"{BASE_URL}/issues", f"is:pull-request+author:{handle}+{datetime_filter}" + ) all_comments = [] for comment_json in response_items: latest_action, date = get_latest_action(comment_json) @@ -126,30 +144,39 @@ def fetch_prs(handle: str, lower_date: datetime.datetime, upper_date: datetime.d CommentText(comment_json["body"]), # example repo URL: https://api.github.com/repos/tweag/chainsail # so we use "tweag/chainsail" as human-readable repo identifier - RepositoryName("/".join(comment_json["repository_url"].split("/")[-2:])), - latest_action + RepositoryName( + "/".join(comment_json["repository_url"].split("/")[-2:]) + ), + latest_action, ) ) return all_comments -def fetch_commits(handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime) -> list[GitHubComment]: + +def fetch_commits( + handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime +) -> list[GitHubComment]: """ Fetch all GitHub commits authored by user `handle` """ datetime_filter = f"author-date:{to_github_datetime_format(lower_date)}..{to_github_datetime_format(upper_date)}" - response_items = send_query(f"{BASE_URL}/commits", f"author:{handle}+committer:{handle}+{datetime_filter}") + response_items = send_query( + f"{BASE_URL}/commits", f"author:{handle}+committer:{handle}+{datetime_filter}" + ) return [ GitHubComment( dateutil.parser.parse(comment_json["commit"]["author"]["date"]), CommentText(comment_json["commit"]["message"]), RepositoryName(comment_json["repository"]["full_name"]), - "committed" + "committed", ) for comment_json in response_items ] -def fetch_comments(handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime) -> list[GitHubComment]: +def fetch_comments( + handle: str, lower_date: datetime.datetime, upper_date: datetime.datetime +) -> list[GitHubComment]: """ Fetch all GitHub comments authored by user `handle` """ @@ -159,6 +186,7 @@ def fetch_comments(handle: str, lower_date: datetime.datetime, upper_date: datet all_comments.extend(fetch_commits(handle, lower_date, upper_date)) return all_comments + if __name__ == "__main__": lower_date = datetime.datetime.now() - datetime.timedelta(days=7) upper_date = datetime.datetime.now() diff --git a/work_daigest/fetchers/google_calendar.py b/work_daigest/fetchers/google_calendar.py index 4762fe6..332a8a1 100644 --- a/work_daigest/fetchers/google_calendar.py +++ b/work_daigest/fetchers/google_calendar.py @@ -7,14 +7,16 @@ def remove_text_pattern(description): pattern = r"-::~:~::~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~::~:~::-[\s\S]+-::~:~::~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~:~::~:~::-" # remove the pattern from the description - return re.sub(pattern, '', description) + return re.sub(pattern, "", description) def filter_events(calendar: Calendar, start: datetime, end: datetime, email): events = calendar.events events = [e for e in events if e.begin >= start and e.end <= end] all_events = [] - is_valid_attendee = lambda att: att.email == email and (att.partstat not in ("DECLINED", "NEEDS-ACTION")) + is_valid_attendee = lambda att: att.email == email and ( + att.partstat not in ("DECLINED", "NEEDS-ACTION") + ) for e in events: event_text = [] diff --git a/work_daigest/main.py b/work_daigest/main.py index 7900667..252d81c 100644 --- a/work_daigest/main.py +++ b/work_daigest/main.py @@ -36,10 +36,17 @@ AI: """ + def datetime_to_readable_date(dt: datetime.datetime) -> str: - return dt.strftime('%Y-%m-%d') + return dt.strftime("%Y-%m-%d") + -def munge_calendar_data(cal_file: pathlib.PosixPath | UploadedFile, min_date: datetime.datetime, max_date: datetime.datetime, email: str) -> List[str]: +def munge_calendar_data( + cal_file: pathlib.PosixPath | UploadedFile, + min_date: datetime.datetime, + max_date: datetime.datetime, + email: str, +) -> List[str]: """ Munge calendar data to be used in the prompt template. @@ -52,17 +59,20 @@ def munge_calendar_data(cal_file: pathlib.PosixPath | UploadedFile, min_date: da if isinstance(cal_file, UploadedFile): file_content = cal_file.getvalue().decode("utf-8") elif isinstance(cal_file, pathlib.PosixPath): - with open(cal_file, 'r') as f: + with open(cal_file, "r") as f: file_content = f.read() else: raise ValueError(f"Invalid file type: {type(cal_file)}") calendar = Calendar(file_content) utc = pytz.UTC - events = filter_events(calendar, utc.localize(min_date), utc.localize(max_date), email) + events = filter_events( + calendar, utc.localize(min_date), utc.localize(max_date), email + ) return events + def munge_github_data(file_path: str) -> str: """ Munge GitHub data to be used in the prompt template. @@ -71,11 +81,12 @@ def munge_github_data(file_path: str) -> str: as produced by the GitHub fetcher. :return: Munged GitHub data. """ - with open(file_path, 'r') as f: + with open(file_path, "r") as f: github_data = json.load(f) return json.dumps(github_data) + def convert_to_datetime(datestr: str) -> datetime.datetime: """ Convert a date string of the format YYYY-MM-DD to a datetime object @@ -85,18 +96,22 @@ def convert_to_datetime(datestr: str) -> datetime.datetime: return datetime.datetime.strptime(datestr, "%Y-%m-%d").replace(microsecond=1) -def process_data(calendar_file, github_handle, email, lower_date, upper_date, model_choice): - runtime_client = init_client('bedrock-runtime', 'us-east-1') +def process_data( + calendar_file, github_handle, email, lower_date, upper_date, model_choice +): + runtime_client = init_client("bedrock-runtime", "us-east-1") model_functions = { "jurassic2": functools.partial(invoke_jurassic2, client=runtime_client), "llama2": functools.partial(invoke_llama2, client=runtime_client), - "claude3": functools.partial(invoke_claude3, client=runtime_client) + "claude3": functools.partial(invoke_claude3, client=runtime_client), } model_fn = model_functions.get(model_choice) if model_fn is None: - raise ValueError(f"Invalid model choice: {model_choice}. Choose from {model_functions.keys()}.") + raise ValueError( + f"Invalid model choice: {model_choice}. Choose from {model_functions.keys()}." + ) calendar_data = munge_calendar_data(calendar_file, lower_date, upper_date, email) github_data = fetch_comments(github_handle, lower_date, upper_date) @@ -109,21 +124,61 @@ def main(): Main program flow. """ parser = argparse.ArgumentParser(description="Generate a summary of your work") - parser.add_argument("--calendar-data", type=pathlib.Path, help="Path to the calendar .ics file", required=True) - parser.add_argument("--github-handle", type=str, help="GitHub handle to use when fetching GitHub data", required=True) - parser.add_argument("--email", type=str, help="Email address to use when filtering calendar events", required=True) - parser.add_argument("--lower-date", type=convert_to_datetime, help="Lower date limit to consider data for, in the format YYYY-MM-DD. Defaults to today - 7 days.", default=(datetime.datetime.today() - datetime.timedelta(days=7)).strftime("%Y-%m-%d")) - parser.add_argument("--upper-date", type=convert_to_datetime, help="Upper date limit to consider data for, in the format YYYY-MM-DD. Defaults to today.", default=datetime.datetime.now().strftime("%Y-%m-%d")) - parser.add_argument("--model", type=str, choices=["jurassic2", "llama2", "claude3"], default="claude3", help="Model to use for summary generation") + parser.add_argument( + "--calendar-data", + type=pathlib.Path, + help="Path to the calendar .ics file", + required=True, + ) + parser.add_argument( + "--github-handle", + type=str, + help="GitHub handle to use when fetching GitHub data", + required=True, + ) + parser.add_argument( + "--email", + type=str, + help="Email address to use when filtering calendar events", + required=True, + ) + parser.add_argument( + "--lower-date", + type=convert_to_datetime, + help="Lower date limit to consider data for, in the format YYYY-MM-DD. Defaults to today - 7 days.", + default=(datetime.datetime.today() - datetime.timedelta(days=7)).strftime( + "%Y-%m-%d" + ), + ) + parser.add_argument( + "--upper-date", + type=convert_to_datetime, + help="Upper date limit to consider data for, in the format YYYY-MM-DD. Defaults to today.", + default=datetime.datetime.now().strftime("%Y-%m-%d"), + ) + parser.add_argument( + "--model", + type=str, + choices=["jurassic2", "llama2", "claude3"], + default="claude3", + help="Model to use for summary generation", + ) args = parser.parse_args() - model_fn, calendar_data, github_data = process_data(args.calendar_data, args.github_handle, args.email, args.lower_date, args.upper_date, args.model) + model_fn, calendar_data, github_data = process_data( + args.calendar_data, + args.github_handle, + args.email, + args.lower_date, + args.upper_date, + args.model, + ) summary = model_fn( prompt=PROMPT_TEMPLATE.format( - calendar_data='\n'.join(calendar_data), + calendar_data="\n".join(calendar_data), github_data=github_data, lower_date=datetime_to_readable_date(args.lower_date), - upper_date=datetime_to_readable_date(args.upper_date) + upper_date=datetime_to_readable_date(args.upper_date), ) ) diff --git a/work_daigest/ui.py b/work_daigest/ui.py index 3b3ba16..abb14fc 100644 --- a/work_daigest/ui.py +++ b/work_daigest/ui.py @@ -2,7 +2,7 @@ import streamlit as st -from work_daigest.main import datetime_to_readable_date, PROMPT_TEMPLATE, process_data +from work_daigest.main import PROMPT_TEMPLATE, datetime_to_readable_date, process_data # Title and description st.set_page_config(layout="wide") @@ -47,10 +47,10 @@ st.success(f"Generating summary for {email} using {model_choice}...") summary = model_fn( prompt=PROMPT_TEMPLATE.format( - calendar_data='\n'.join(calendar_data), + calendar_data="\n".join(calendar_data), github_data=github_data, lower_date=datetime_to_readable_date(lower_date), - upper_date=datetime_to_readable_date(upper_date) + upper_date=datetime_to_readable_date(upper_date), ) ) st.write(summary)