Skip to content

Commit

Permalink
style: apply black and add some comments
Browse files Browse the repository at this point in the history
  • Loading branch information
k4black committed Nov 16, 2023
1 parent 1cfe4c3 commit 17411d4
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 28 deletions.
5 changes: 4 additions & 1 deletion codebleu/bleu.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
from .utils import ngrams


# _normalize=False was removed in 3.12, add custom class for back-compatibility
class Fraction(_Fraction):
"""Fraction class with _normalize=False support.
_normalize=False was removed in 3.12, add custom class for back-compatibility
"""

# We're immutable, so use __new__ not __init__
def __new__(cls, numerator: Any = 0, denominator: Any = None, *, _normalize: bool = True) -> "Fraction":
if sys.version_info >= (3, 12):
Expand Down
4 changes: 2 additions & 2 deletions codebleu/dataflow_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ def corpus_dataflow_match(references, candidates, lang, langso_so_file):
candidate = candidates[i]
for reference in references_sample:
try:
candidate = remove_comments_and_docstrings(candidate, "java")
candidate = remove_comments_and_docstrings(candidate, lang)
except Exception:
pass
try:
reference = remove_comments_and_docstrings(reference, "java")
reference = remove_comments_and_docstrings(reference, lang)
except Exception:
pass

Expand Down
30 changes: 15 additions & 15 deletions codebleu/parser/build.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
# Copyright (c) Microsoft Corporation.
# Copyright (c) 2023 Konstantin Chernyshev.
# Licensed under the MIT license.

from tree_sitter import Language

Language.build_library(
"my-languages.so",
[
"tree-sitter/go",
"tree-sitter/javascript",
"tree-sitter/python",
"tree-sitter/php",
"tree-sitter/java",
"tree-sitter/ruby",
"tree-sitter/c-sharp",
"tree-sitter/c",
"tree-sitter/cpp",
],
)
if __name__ == "__main__":
Language.build_library(
"my-languages.so",
[
"tree-sitter/go",
"tree-sitter/javascript",
"tree-sitter/python",
"tree-sitter/php",
"tree-sitter/java",
"tree-sitter/ruby",
"tree-sitter/c-sharp",
"tree-sitter/c",
"tree-sitter/cpp",
],
)
25 changes: 15 additions & 10 deletions codebleu/syntax_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,23 @@ def calc_syntax_match(references, candidate, lang, lang_so_file):


def corpus_syntax_match(references, candidates, lang, lang_so_file):
# print(os.listdir())
JAVA_LANGUAGE = Language(lang_so_file, lang)
tree_sitter_language = Language(lang_so_file, lang)
parser = Parser()
parser.set_language(JAVA_LANGUAGE)
parser.set_language(tree_sitter_language)
match_count = 0
match_count_candidate_to_reference = 0
total_count = 0

for i in range(len(candidates)):
references_sample = references[i]
candidate = candidates[i]
for reference in references_sample:
try:
candidate = remove_comments_and_docstrings(candidate, "java")
candidate = remove_comments_and_docstrings(candidate, lang)
except Exception:
pass
try:
reference = remove_comments_and_docstrings(reference, "java")
reference = remove_comments_and_docstrings(reference, lang)
except Exception:
pass

Expand All @@ -69,14 +69,19 @@ def get_all_sub_trees(root_node):
return sub_tree_sexp_list

cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)]
ref_sexps = get_all_sub_trees(reference_tree)
ref_sexps = [x[0] for x in get_all_sub_trees(reference_tree)]

# print(cand_sexps)
# print(ref_sexps)

for sub_tree, depth in ref_sexps:
# TODO: fix, now we count number of reference subtrees matching candidate,
# but we should count number of candidate subtrees matching reference
# See (4) in "3.2 Syntactic AST Match" of https://arxiv.org/pdf/2009.10297.pdf
for sub_tree in ref_sexps:
if sub_tree in cand_sexps:
match_count += 1

for sub_tree in cand_sexps:
if sub_tree in ref_sexps:
match_count_candidate_to_reference += 1

total_count += len(ref_sexps)

score = match_count / total_count
Expand Down

0 comments on commit 17411d4

Please sign in to comment.