feat: add rust lang (#33)

* feat: add rust lang * fix: delete use file
k4black · Mar 1, 2024 · a2e9524 · a2e9524
1 parent a30cb9f
commit a2e9524
Show file tree

Hide file tree

Showing 5 changed files with 5 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@
 
 This repository contains an unofficial `CodeBLEU` implementation that supports `Linux`, `MacOS` (incl. M-series) and `Windows`. It is available through `PyPI` and the `evaluate` library.
 
-Available for: `Python`, `C`, `C#`, `C++`, `Java`, `JavaScript`, `PHP`, `Go`, `Ruby`.
+Available for: `Python`, `C`, `C#`, `C++`, `Java`, `JavaScript`, `PHP`, `Go`, `Ruby`, `Rust`.
 
 ---
 

diff --git a/evaluate_app/README.md b/evaluate_app/README.md
@@ -16,7 +16,7 @@ pinned: false
 
 This repository contains an unofficial `CodeBLEU` implementation that supports `Linux`, `MacOS` and `Windows`. It is available through `PyPI` and the `evaluate` library.
 
-Available for: `Python`, `C`, `C#`, `C++`, `Java`, `JavaScript`, `PHP`, `Go`, `Ruby`.
+Available for: `Python`, `C`, `C#`, `C++`, `Java`, `JavaScript`, `PHP`, `Go`, `Ruby`, `Rust`.
 
 ---
 

diff --git a/evaluate_app/codebleu.py b/evaluate_app/codebleu.py
@@ -41,7 +41,7 @@
         should be a string with tokens separated by spaces.
     references: list of reference for each prediction. Each
         reference should be a string with tokens separated by spaces.
-    language: programming language in ['java','js','c_sharp','php','c','python','cpp']. Please note that, due to the way Datasets works, the number of entities in the language array must match the number of entries in the predictions and references arrays, but only the first value from the languages array will be used. This means that you will not be able to compute a metric for different langauges at the same time, but mst do them as sequential calls to CodeBleu.
+    language: programming language in ['java','js','c_sharp','php','c','python','cpp','go','ruby','rust']. Please note that, due to the way Datasets works, the number of entities in the language array must match the number of entries in the predictions and references arrays, but only the first value from the languages array will be used. This means that you will not be able to compute a metric for different langauges at the same time, but mst do them as sequential calls to CodeBleu.
     weights: tuple of 4 floats to use as weights for scores. Defaults to (0.25, 0.25, 0.25, 0.25).
 Returns:
     codebleu: resulting `CodeBLEU` score,

diff --git a/tests/test_codebleu.py b/tests/test_codebleu.py
@@ -41,12 +41,13 @@ def test_exact_match_works_for_all_langs(lang: str) -> None:
         ("php", ["function foo ( x ) { return x }"], ["function bar ( y ) {\n   return y\n}"]),
         ("go", ["func foo ( x ) { return x }"], ["func bar ( y ) {\n   return y\n}"]),
         ("ruby", ["def foo ( x ) :\n    return x"], ["def bar ( y ) :\n    return y"]),
+        ("rust", ["fn foo ( x ) -> i32 { x }"], ["fn bar ( y ) -> i32 { y }"]),
     ],
 )
 def test_simple_cases_work_for_all_langs(lang: str, predictions: List[Any], references: List[Any]) -> None:
     result = calc_codebleu(references, predictions, lang)
     logging.debug(result)
-    assert result["codebleu"] == pytest.approx(0.6, 0.05)
+    assert result["codebleu"] == pytest.approx(0.6, 0.1)
 
 
 def test_error_when_lang_not_supported() -> None:

diff --git a/use.py b/use.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,7 +7,7 @@ @@
     This repository contains an unofficial `CodeBLEU` implementation that supports `Linux`, `MacOS` (incl. M-series) and `Windows`. It is available through `PyPI` and the `evaluate` library.
-    Available for: `Python`, `C`, `C#`, `C++`, `Java`, `JavaScript`, `PHP`, `Go`, `Ruby`.
+    Available for: `Python`, `C`, `C#`, `C++`, `Java`, `JavaScript`, `PHP`, `Go`, `Ruby`, `Rust`.
     ---
@@ Expand Down @@