diff --git a/codebleu/codebleu.py b/codebleu/codebleu.py index b35e597..e4d928f 100644 --- a/codebleu/codebleu.py +++ b/codebleu/codebleu.py @@ -79,7 +79,7 @@ def make_weights(reference_tokens, key_word_list): alpha * ngram_match_score + beta * weighted_ngram_match_score + gamma * syntax_match_score - + theta * (dataflow_match_score or 0) + + theta * (dataflow_match_score or 1.0) ) return { diff --git a/codebleu/dataflow_match.py b/codebleu/dataflow_match.py index bcd89ac..2e4217b 100644 --- a/codebleu/dataflow_match.py +++ b/codebleu/dataflow_match.py @@ -1,6 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. - +import logging from tree_sitter import Language, Parser from .parser import ( @@ -67,7 +67,7 @@ def corpus_dataflow_match(references, candidates, lang, langso_so_file): match_count += 1 normalized_cand_dfg.remove(dataflow) if total_count == 0: - print( + logging.warning( "WARNING: There is no reference data-flows extracted from the whole corpus, " "and the data-flow match score degenerates to 0. Please consider ignoring this score." ) diff --git a/codebleu/weighted_ngram_match.py b/codebleu/weighted_ngram_match.py index d03a04a..507cb76 100644 --- a/codebleu/weighted_ngram_match.py +++ b/codebleu/weighted_ngram_match.py @@ -192,7 +192,6 @@ def corpus_bleu( # it tries to retain the Fraction object as much as the # smoothing method allows. p_n = smoothing_function(p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths) - # pdb.set_trace() s = (w_i * math.log(p_i[0] / p_i[1]) for w_i, p_i in zip(weights, p_n)) s = bp * math.exp(math.fsum(s)) return s @@ -212,7 +211,6 @@ def modified_recall(references, hypothesis, n): """ # Extracts all ngrams in hypothesis # Set an empty Counter if hypothesis is empty. - # pdb.set_trace() numerator = 0 denominator = 0 diff --git a/tests/test_codebleu.py b/tests/test_codebleu.py index ec016cd..3875a94 100644 --- a/tests/test_codebleu.py +++ b/tests/test_codebleu.py @@ -2,6 +2,7 @@ from typing import Any, List import pytest +import logging from codebleu.codebleu import AVAILABLE_LANGS, calc_codebleu @@ -16,7 +17,7 @@ ]) def test_simple_cases(predictions: List[Any], references: List[Any], codebleu: float) -> None: result = calc_codebleu(references, predictions, 'python') - print(result) + logging.debug(result) assert result['codebleu'] == pytest.approx(codebleu, 0.1) @@ -37,7 +38,7 @@ def test_exact_match_works_for_all_langs(lang: str) -> None: ]) def test_simple_cases_work_for_all_langs(lang: str, predictions: List[Any], references: List[Any]) -> None: result = calc_codebleu(references, predictions, lang) - print(result) + logging.debug(result) assert result['codebleu'] == pytest.approx(0.6, 0.1) @@ -55,17 +56,17 @@ def test_error_when_input_length_mismatch() -> None: ( ['public static int Sign ( double d ) { return ( float ) ( ( d == 0 ) ? 0 : ( c < 0.0 ) ? - 1 : 1) ; }'], ['public static int Sign ( double d ) { return ( int ) ( ( d == 0 ) ? 0 : ( d < 0 ) ? - 1 : 1) ; }'], - 0.7238 + 0.7019 + ), + ( + ['public static int Sign ( double c ) { return ( int ) ( ( c == 0 ) ? 0 : ( c < 0 ) ? - 1 : 1) ; }'], + ['public static int Sign ( double d ) { return ( int ) ( ( d == 0 ) ? 0 : ( d < 0 ) ? - 1 : 1) ; }'], + 0.8804 ), - # ( - # ['public static int Sign ( double c ) { return ( int ) ( ( c == 0 ) ? 0 : ( c < 0 ) ? - 1 : 1) ; }'], - # ['public static int Sign ( double d ) { return ( int ) ( ( d == 0 ) ? 0 : ( d < 0 ) ? - 1 : 1) ; }'], - # 0.8397 - # ), ]) def test_code_x_glue_readme_examples(predictions: List[Any], references: List[Any], codebleu: float) -> None: result = calc_codebleu(references, predictions, 'java') - print(result) + logging.debug(result) assert result['codebleu'] == pytest.approx(codebleu, 0.01)