Skip to content

Commit

Permalink
test: recalculate exapmes tests
Browse files Browse the repository at this point in the history
  • Loading branch information
k4black committed Nov 16, 2023
1 parent 12cbf9b commit 1cfe4c3
Showing 1 changed file with 32 additions and 18 deletions.
50 changes: 32 additions & 18 deletions tests/test_codebleu.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@
["def test ( ) :\n pass"],
0.25,
), # 'cause data_flow is 0 and considered as 1
(["def bar ( y , x ) :\n a = x * x\n return a"], ["def foo ( x ) :\n return x"], 0.4),
(["def foo ( x ) :\n return x * x"], ["def bar ( x ) :\n return x"], 0.6),
(["def bar ( x ) :\n return x"], ["def foo ( x ) :\n return x"], 0.8),
(["def bar ( y , x ) :\n a = x * x\n return a"], ["def foo ( x ) :\n return x"], 0.38),
(["def foo ( x ) :\n return x * x"], ["def bar ( x ) :\n return x"], 0.61),
(["def bar ( x ) :\n return x"], ["def foo ( x ) :\n return x"], 0.85),
(["def foo ( x ) :\n return x"], ["def foo ( x ) :\n return x"], 1.0),
],
)
def test_simple_cases(predictions: List[Any], references: List[Any], codebleu: float) -> None:
result = calc_codebleu(references, predictions, "python")
logging.debug(result)
assert result["codebleu"] == pytest.approx(codebleu, 0.1)
assert result["codebleu"] == pytest.approx(codebleu, 0.01)


@pytest.mark.parametrize(["lang"], [(lang,) for lang in AVAILABLE_LANGS])
Expand All @@ -48,7 +48,7 @@ def test_exact_match_works_for_all_langs(lang: str) -> None:
def test_simple_cases_work_for_all_langs(lang: str, predictions: List[Any], references: List[Any]) -> None:
result = calc_codebleu(references, predictions, lang)
logging.debug(result)
assert result["codebleu"] == pytest.approx(0.6, 0.1)
assert result["codebleu"] == pytest.approx(0.6, 0.05)


def test_error_when_lang_not_supported() -> None:
Expand All @@ -61,25 +61,39 @@ def test_error_when_input_length_mismatch() -> None:
calc_codebleu(["def foo : pass"], ["def bar : pass", "def buz : pass"], "python")


# https://github.com/microsoft/CodeXGLUE/blob/main/Code-Code/code-to-code-trans/example.png
@pytest.mark.parametrize(
["predictions", "references", "codebleu"],
["predictions", "references", "bleu", "codebleu"],
[
# (
# ['public static int Sign ( double d ) { return ( float ) ( ( d == 0 ) ? 0 : ( c < 0.0 ) ? - 1 : 1) ; }'],
# ['public static int Sign ( double d ) { return ( int ) ( ( d == 0 ) ? 0 : ( d < 0 ) ? - 1 : 1) ; }'],
# 0.7238 # TODO: lol, not working at <3.12
# ),
# (
# ['public static int Sign ( double c ) { return ( int ) ( ( c == 0 ) ? 0 : ( c < 0 ) ? - 1 : 1) ; }'],
# ['public static int Sign ( double d ) { return ( int ) ( ( d == 0 ) ? 0 : ( d < 0 ) ? - 1 : 1) ; }'],
# 0.8397 # TODO: check, lol, not working
# ),
# https://github.com/microsoft/CodeXGLUE/blob/main/Code-Code/code-to-code-trans/example.png
(
["public static int Sign ( double d ) { return ( float ) ( ( d == 0 ) ? 0 : ( c < 0.0 ) ? - 1 : 1) ; }"],
["public static int Sign ( double d ) { return ( int ) ( ( d == 0 ) ? 0 : ( d < 0 ) ? - 1 : 1) ; }"],
0.7846,
0.7238, # TODO: lol, not working at <3.12
),
# https://arxiv.org/pdf/2009.10297.pdf "3.4 Two Examples" at the page 4
(
["public static int Sign ( double d ) { return ( float ) ( ( d == 0 ) ? 0 : ( c < 0.0 ) ? - 1 : 1) ;"],
["public static int Sign ( double d ) { return ( int ) ( ( d == 0 ) ? 0 : ( d < 0 ) ? - 1 : 1) ; }"],
0.7543,
0.7091, # Should be 0.6973 if AST=13/21, however at the moment tee-sitter AST is 14/21
),
# https://arxiv.org/pdf/2009.10297.pdf "3.4 Two Examples" at the page 4
(
["public static int Sign ( double c ) { return ( int ) ( ( c == 0 ) ? 0 : ( c < 0 ) ? - 1 : 1) ; }"],
["public static int Sign ( double d ) { return ( int ) ( ( d == 0 ) ? 0 : ( d < 0 ) ? - 1 : 1) ; }"],
0.7571, # Error in the Figure 4, text "Example 2" states 0.7571, not 0.6814,
0.8804, # Error in the Figure 4, text "Example 2" states 0.8804, not 0.8397,
),
],
)
def test_code_x_glue_readme_examples(predictions: List[Any], references: List[Any], codebleu: float) -> None:
def test_code_x_glue_readme_examples(
predictions: List[Any], references: List[Any], bleu: float, codebleu: float
) -> None:
result = calc_codebleu(references, predictions, "java")
logging.debug(result)

assert result["ngram_match_score"] == pytest.approx(bleu, 0.01)
assert result["codebleu"] == pytest.approx(codebleu, 0.01)


Expand Down

0 comments on commit 1cfe4c3

Please sign in to comment.