diff --git a/codebleu/syntax_match.py b/codebleu/syntax_match.py index 92dd6db..802e495 100644 --- a/codebleu/syntax_match.py +++ b/codebleu/syntax_match.py @@ -71,6 +71,13 @@ def get_all_sub_trees(root_node): cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)] ref_sexps = [x[0] for x in get_all_sub_trees(reference_tree)] + print('cand_sexps') + for tree, depth in get_all_sub_trees(candidate_tree): + print(' ', depth, tree) + print('ref_sexps') + for tree, depth in get_all_sub_trees(reference_tree): + print(' ', depth, tree) + # TODO: fix, now we count number of reference subtrees matching candidate, # but we should count number of candidate subtrees matching reference # See (4) in "3.2 Syntactic AST Match" of https://arxiv.org/pdf/2009.10297.pdf @@ -83,6 +90,7 @@ def get_all_sub_trees(root_node): match_count_candidate_to_reference += 1 total_count += len(ref_sexps) - + print(f'match_count {match_count} / {total_count}') + print(f'match_count_fixed {match_count_candidate_to_reference} / {total_count}') score = match_count / total_count return score