diff --git a/dialogy/plugins/text/qc_plugin/__init__.py b/dialogy/plugins/text/qc_plugin/__init__.py index e8017504..5371ca67 100644 --- a/dialogy/plugins/text/qc_plugin/__init__.py +++ b/dialogy/plugins/text/qc_plugin/__init__.py @@ -53,9 +53,6 @@ def identify_conflicting_labels(training_data: pd.DataFrame) -> pd.DataFrame: logger.debug(f"Finding data points with conflicting labels...") - training_data["alternatives"] = training_data["alternatives"].apply( - lambda x: x.replace("""\"\"""", """\"""") if isinstance(x, str) else x - ) training_data["frozen_set_hash"] = training_data["alternatives"].apply( lambda x: hashlib.md5( pickle.dumps( diff --git a/tests/plugin/text/test_qc_plugin/test_qc_plugin.py b/tests/plugin/text/test_qc_plugin/test_qc_plugin.py index 433b9cb6..5ef598d6 100644 --- a/tests/plugin/text/test_qc_plugin/test_qc_plugin.py +++ b/tests/plugin/text/test_qc_plugin/test_qc_plugin.py @@ -40,16 +40,6 @@ True, 2, ), - ( - [ - '[[{"transcript": "hello"}]]', - '[[{"transcript": "hello"}]]', - """[[{\""confidence\"": 0.801317, \""transcript"" :\""hello\""}]]""", - ], - ["x1", "x2", "x3"], - True, - 3, - ), ], ) async def test_drop_conflicting_labels(alternatives, tags, drop, discard_size, tmp_path) -> None: