Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: typilus/typilus-action
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: mloncode/typilus-action
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: dirty-cli-dry-run
Choose a head ref
Able to merge. These branches can be automatically merged.
  • 4 commits
  • 5 files changed
  • 1 contributor

Commits on Jun 10, 2020

  1. allow file renames without any changes

    Signed-off-by: Alexander Bezzubov <[email protected]>
    bzz committed Jun 10, 2020
    Copy the full SHA
    afb7e6b View commit details
  2. Env Vars for repo path, debug, dry-run and a personal access token

    Signed-off-by: Alexander Bezzubov <[email protected]>
    bzz committed Jun 10, 2020
    Copy the full SHA
    d666c93 View commit details

Commits on Jun 12, 2020

  1. fix graph extraction stats

    Signed-off-by: Alexander Bezzubov <[email protected]>
    bzz committed Jun 12, 2020
    Copy the full SHA
    d931636 View commit details
  2. cli: add simple tool for testing

    Signed-off-by: Alexander Bezzubov <[email protected]>
    bzz committed Jun 12, 2020
    Copy the full SHA
    0e8627c View commit details
Showing with 218 additions and 10 deletions.
  1. +38 −0 README.md
  2. +14 −7 entrypoint.py
  3. +2 −0 src/changeutils.py
  4. +3 −3 src/graph_generator/extract_graphs.py
  5. +161 −0 typilus.py
38 changes: 38 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -27,6 +27,44 @@ suggestions with only a partial context, at the cost of suggesting some false
positives.


### Run locally from CLI

```
git clone <your-python-repo> test-repo
cat <<EOF >pr.json
{
"action": "opened",
"pull_request": {
"url": "https://api.github.com/repos/<your-python-repo>/pulls/1",
"review_comments_url": "https://api.github.com/repos/<your-python-repo>/pulls/1/comments",
"head": {
"sha": "ec26c3e57ca3a959ca5aad62de7213c562f8c821"
}
}
}
EOF
docker build -t typilus:v0.9-cli-auth-rename .
docker run -it \
-v "$PWD":/data \
-e TY_DRY_RUN="1" \
-e TY_REPO_PATH=/data/test-repo \
-e GITHUB_USER="<your-github-username>" \
-e GITHUB_TOKEN="<your-personal-access-token>" \
-e GITHUB_EVENT_NAME=pull_request \
-e GITHUB_EVENT_PATH=/data/pr.json \
typilus:v0.9-cli-auth-rename .
```

### CLI reporting tool

```sh
pip3 install -r requirements.txt
wget https://github.com/typilus/typilus-action/releases/download/v0.1/typilus20200507.pkl.gz

./typilus.py --model typilus20200507.pkl.gz --repo . --file ./entrypoint.py
```

### Install Action in your Repository

To use the GitHub action, create a workflow file. For example,
21 changes: 14 additions & 7 deletions entrypoint.py
Original file line number Diff line number Diff line change
@@ -53,15 +53,15 @@ def __repr__(self) -> str:
os.environ["GITHUB_EVENT_NAME"] == "pull_request"
), "This action runs only on pull request events."
github_token = os.environ["GITHUB_TOKEN"]
debug = False
debug = os.getenv("TY_DEBUG", False)

with open(os.environ["GITHUB_EVENT_PATH"]) as f:
event_data = json.load(f)
if debug:
print("Event data:")
print(json.dumps(event_data, indent=4))

repo_path = "." # TODO: Is this always true?
repo_path = os.getenv("TY_REPO_PATH", "/usr/src") #"." # TODO: Is this always true?

if debug:
print("ENV Variables")
@@ -71,9 +71,10 @@ def __repr__(self) -> str:
diff_rq = requests.get(
event_data["pull_request"]["url"],
headers={
"authorization": f"Bearer {github_token}",
#"authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github.v3.diff",
},
auth=(os.environ["GITHUB_USER"], github_token),
)
print("Diff GET Status Code: ", diff_rq.status_code)

@@ -164,7 +165,7 @@ def data_iter():
for suggestion in type_suggestions:
if suggestion.symbol_kind == "class-or-function":
suggestion.annotation_lineno = find_annotation_line(
suggestion.filepath[1:], suggestion.file_location, suggestion.name
os.path.join(repo_path, suggestion.filepath[1:]), suggestion.file_location, suggestion.name
)
else: # when the underlying symbol is a parameter
suggestion.annotation_lineno = suggestion.file_location[0]
@@ -188,11 +189,12 @@ def report_confidence(suggestions):
for s in suggestions
)

dry_run = os.environ["TY_DRY_RUN"]
for same_line_suggestions in grouped_suggestions:
suggestion = same_line_suggestions[0]
path = suggestion.filepath[1:] # No slash in the beginning
annotation_lineno = suggestion.annotation_lineno
with open(path) as file:
with open(os.path.join(repo_path, path)) as file:
target_line = file.readlines()[annotation_lineno - 1]
data = {
"path": path,
@@ -210,7 +212,12 @@ def report_confidence(suggestions):
"authorization": f"Bearer {github_token}",
"Accept": "application/vnd.github.v3.raw+json",
}
r = requests.post(comment_url, data=json.dumps(data), headers=headers)

if dry_run:
print("Skip posting actual comment to Github")
else:
r = requests.post(comment_url, data=json.dumps(data), headers=headers)

if debug:
print("URL: ", comment_url)
print(f"Data: {data}. Status Code: {r.status_code}. Text: {r.text}")
print(f"Data: {data}" + (f" Status Code: {r.status_code}. Text: {r.text}" if not dry_run else ""))
2 changes: 2 additions & 0 deletions src/changeutils.py
Original file line number Diff line number Diff line change
@@ -47,6 +47,8 @@ def get_changed_files(diff: str, suffix=".py") -> Dict[str, Set[int]]:
elif file_diff_lines[1].startswith("similarity"):
assert file_diff_lines[2].startswith("rename")
assert file_diff_lines[3].startswith("rename")
if len(file_diff_lines) == 4:
continue # skip file renames \wo any changes
assert file_diff_lines[4].startswith("index")
assert file_diff_lines[5].startswith("--- a/")
assert file_diff_lines[6].startswith("+++ b/")
6 changes: 3 additions & 3 deletions src/graph_generator/extract_graphs.py
Original file line number Diff line number Diff line change
@@ -65,13 +65,13 @@ def explore_files(
if not os.path.isfile(file_path):
continue
with open(file_path, encoding="utf-8", errors="ignore") as f:
monitoring.increment_count()
monitoring.enter_file(file_path)

# import pdb; pdb.set_trace()
if file_path[len(root_dir) :] not in files_to_extract:
continue

monitoring.increment_count()
monitoring.enter_file(file_path)

graph = build_graph(f.read(), monitoring, type_lattice)
if graph is None or len(graph["supernodes"]) == 0:
continue
161 changes: 161 additions & 0 deletions typilus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python

import argparse
import os
import sys
import uuid

from glob import iglob
from os.path import dirname
from pathlib import Path
from typing import Tuple, List
sys.path.append(os.path.join(dirname(__file__), "src"))

from dpu_utils.utils import load_jsonl_gz
from ptgnn.implementations.typilus.graph2class import Graph2Class

from annotationutils import (
annotate_line,
find_annotation_line,
group_suggestions,
annotation_rewrite,
)
from changeutils import get_changed_files
from graph_generator.extract_graphs import extract_graphs

import warnings
warnings.filterwarnings("ignore")

# copy, as entyrpoint does not have main(), so import will trigger the execution
# from entrypoint import TypeSuggestion
class TypeSuggestion:
def __init__(
self,
filepath: str,
name: str,
file_location: Tuple[int, int],
suggestion: str,
symbol_kind: str,
confidence: float,
annotation_lineno: int = 0,
is_disagreement: bool = False,
):
self.filepath = filepath
self.name = name
self.file_location = file_location
self.suggestion = suggestion
self.symbol_kind = symbol_kind
self.confidence = confidence
self.annotation_lineno = annotation_lineno
self.is_disagreement = is_disagreement

def __repr__(self) -> str:
return (
f"Suggestion@{self.filepath}:{self.file_location} "
f"Symbol Name: `{self.name}` Suggestion `{self.suggestion}` "
f"Confidence: {self.confidence:.2%}"
)



parser = argparse.ArgumentParser(description='Inference from the pretained model using https://github.com/typilus/typilus')
parser.add_argument('--model', dest="model_path", required=True, help='path to the pretrained model in .pkl.gz format')
parser.add_argument('--repo', dest="repo_path", required=True, help='path to source code repository to analyzer')
parser.add_argument('--file', dest="file_path", required=True, help='suggest type only for a given file (must be under --repo)')
parser.add_argument('-v', dest="debug", action="store_true", default=False, help='verbose debug output')
# parser.add_argument('-', dest="diff_stdin", action="store_true", default=False, help="suggest types only for the changed files (read diff from stdin)")

# Usage:
# wget https://github.com/typilus/typilus-action/releases/download/v0.1/typilus20200507.pkl.gz
# ./typilus.py --model typilus20200507.pkl.gz --repo . --file entrypoint.py

# TODO(bzz):
# ./typilus.py --model typilus20200507.pkl.gz --repo .
# ./typilus.py --model typilus20200507.pkl.gz --repo . - < git diff master^

def main():
args = parser.parse_args()
debug = args.debug
model_path = args.model_path
repo_path = args.repo_path
out_dir = os.path.join("graph", str(uuid.uuid4()))
print(f"Intermediate output is saved under '{out_dir}'")

# if args.file_path:
changed_files = {args.file_path[len(repo_path) :]: set()}
# else:
# #TODO list all files under "path" by default

# if args.diff_stdin:
# # diff = <read diff from stdin>
# changed_files = get_changed_files(diff)

if len(changed_files) == 0:
print("No relevant changes found.")
return

Path(out_dir).mkdir(parents=True)
typing_rules_path = os.path.join(dirname(__file__), "src", "metadata", "typingRules.json")
assert Path(typing_rules_path).exists()
extract_graphs(
repo_path, typing_rules_path, files_to_extract=set(changed_files), target_folder=out_dir,
)

## the rest is exactly the same as entrypoint.py
def data_iter():
for datafile_path in iglob(os.path.join(out_dir, "*.jsonl.gz")):
print(f"\nLooking into {datafile_path}...")
for graph in load_jsonl_gz(datafile_path):
yield graph

# model_path = os.getenv("MODEL_PATH", "/usr/src/model.pkl.gz")
model, nn = Graph2Class.restore_model(model_path, "cpu")

type_suggestions: List[TypeSuggestion] = []
for graph, predictions in model.predict(data_iter(), nn, "cpu"):
# predictions has the type: Dict[int, Tuple[str, float]]
filepath = graph["filename"]

if debug:
print("Predictions:", predictions)
print("SuperNodes:", graph["supernodes"])

for supernode_idx, (predicted_type, predicted_prob) in predictions.items():
supernode_data = graph["supernodes"][str(supernode_idx)]
if supernode_data["type"] == "variable":
continue # Do not suggest annotations on variables for now.
lineno, colno = supernode_data["location"]
suggestion = TypeSuggestion(
filepath,
supernode_data["name"],
(lineno, colno),
annotation_rewrite(predicted_type),
supernode_data["type"],
predicted_prob,
is_disagreement=supernode_data["annotation"] != "??"
and supernode_data["annotation"] != predicted_type,
)

print("\t", suggestion)

if lineno not in changed_files[filepath]:
continue
elif suggestion.name == "%UNK%":
continue

if (
supernode_data["annotation"] == "??"
and suggestion.confidence > suggestion_confidence_threshold
):
type_suggestions.append(suggestion)
elif (
suggestion.is_disagreement
# and suggestion.confidence > diagreement_confidence_threshold
):
pass # TODO: Disabled for now: type_suggestions.append(suggestion)

print(f"Done, {len(type_suggestions)} suggestions found.")


if __name__ == "__main__":
main()