From e707c91176b560a1d291e501d188dc325d9cba49 Mon Sep 17 00:00:00 2001 From: skanderhaddad Date: Wed, 13 Sep 2023 18:35:17 +0100 Subject: [PATCH 1/3] pandas change append -> concat (append in dfs removed in pandas>=2.) --- .../nlp/ipynb/masked_language_modeling.ipynb | 29 ++++++++----------- examples/nlp/masked_language_modeling.py | 3 +- examples/nlp/md/masked_language_modeling.md | 3 +- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/examples/nlp/ipynb/masked_language_modeling.ipynb b/examples/nlp/ipynb/masked_language_modeling.ipynb index d11b10a591..fa5e9477b4 100644 --- a/examples/nlp/ipynb/masked_language_modeling.ipynb +++ b/examples/nlp/ipynb/masked_language_modeling.ipynb @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab_type": "code" }, @@ -91,13 +91,12 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ - "\n", "@dataclass\n", "class Config:\n", " MAX_LEN = 256\n", @@ -126,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab_type": "code" }, @@ -138,13 +137,12 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ - "\n", "def get_text_list_from_files(files):\n", " text_list = []\n", " for name in files:\n", @@ -155,7 +153,6 @@ "\n", "\n", "def get_data_from_text_files(folder_name):\n", - "\n", " pos_files = glob.glob(\"aclImdb/\" + folder_name + \"/pos/*.txt\")\n", " pos_texts = get_text_list_from_files(pos_files)\n", " neg_files = glob.glob(\"aclImdb/\" + folder_name + \"/neg/*.txt\")\n", @@ -173,7 +170,8 @@ "train_df = get_data_from_text_files(\"train\")\n", "test_df = get_data_from_text_files(\"test\")\n", "\n", - "all_data = train_df.append(test_df)" + "# all_data = train_df.append(test_df)\n", + "all_data = pd.concat([train_df, test_df], ignore_index=True)" ] }, { @@ -199,13 +197,12 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ - "\n", "def custom_standardization(input_data):\n", " lowercase = tf.strings.lower(input_data)\n", " stripped_html = tf.strings.regex_replace(lowercase, \"
\", \" \")\n", @@ -341,13 +338,12 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ - "\n", "def bert_module(query, key, value, i):\n", " # Multi headed self-attention\n", " attention_output = layers.MultiHeadAttention(\n", @@ -520,7 +516,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab_type": "code" }, @@ -545,7 +541,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab_type": "code" }, @@ -617,13 +613,12 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "colab_type": "code" }, "outputs": [], "source": [ - "\n", "def get_end_to_end(model):\n", " inputs_string = keras.Input(shape=(1,), dtype=\"string\")\n", " indices = vectorize_layer(inputs_string)\n", @@ -670,4 +665,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/examples/nlp/masked_language_modeling.py b/examples/nlp/masked_language_modeling.py index 7c6ed30e82..fe3b016c64 100644 --- a/examples/nlp/masked_language_modeling.py +++ b/examples/nlp/masked_language_modeling.py @@ -111,7 +111,8 @@ def get_data_from_text_files(folder_name): train_df = get_data_from_text_files("train") test_df = get_data_from_text_files("test") -all_data = train_df.append(test_df) +# all_data = train_df.append(test_df) +all_data = pd.concat([train_df, test_df], ignore_index=True) """ ## Dataset preparation diff --git a/examples/nlp/md/masked_language_modeling.md b/examples/nlp/md/masked_language_modeling.md index 59aefd28b8..3cc382d347 100644 --- a/examples/nlp/md/masked_language_modeling.md +++ b/examples/nlp/md/masked_language_modeling.md @@ -120,7 +120,8 @@ def get_data_from_text_files(folder_name): train_df = get_data_from_text_files("train") test_df = get_data_from_text_files("test") -all_data = train_df.append(test_df) +#all_data = train_df.append(test_df) +all_data = pd.concat([train_df, test_df], ignore_index=True) ```
``` From 54a28e79d3c68f13e4cc9d67b0ee8cd79ccc612e Mon Sep 17 00:00:00 2001 From: skanderhaddad Date: Thu, 21 Sep 2023 22:35:14 +0100 Subject: [PATCH 2/3] fix unused(commented) code --- examples/nlp/ipynb/masked_language_modeling.ipynb | 1 - examples/nlp/masked_language_modeling.py | 1 - examples/nlp/md/masked_language_modeling.md | 1 - 3 files changed, 3 deletions(-) diff --git a/examples/nlp/ipynb/masked_language_modeling.ipynb b/examples/nlp/ipynb/masked_language_modeling.ipynb index fa5e9477b4..47d7cde0de 100644 --- a/examples/nlp/ipynb/masked_language_modeling.ipynb +++ b/examples/nlp/ipynb/masked_language_modeling.ipynb @@ -170,7 +170,6 @@ "train_df = get_data_from_text_files(\"train\")\n", "test_df = get_data_from_text_files(\"test\")\n", "\n", - "# all_data = train_df.append(test_df)\n", "all_data = pd.concat([train_df, test_df], ignore_index=True)" ] }, diff --git a/examples/nlp/masked_language_modeling.py b/examples/nlp/masked_language_modeling.py index fe3b016c64..de7011caa3 100644 --- a/examples/nlp/masked_language_modeling.py +++ b/examples/nlp/masked_language_modeling.py @@ -111,7 +111,6 @@ def get_data_from_text_files(folder_name): train_df = get_data_from_text_files("train") test_df = get_data_from_text_files("test") -# all_data = train_df.append(test_df) all_data = pd.concat([train_df, test_df], ignore_index=True) """ diff --git a/examples/nlp/md/masked_language_modeling.md b/examples/nlp/md/masked_language_modeling.md index 3cc382d347..15432b27af 100644 --- a/examples/nlp/md/masked_language_modeling.md +++ b/examples/nlp/md/masked_language_modeling.md @@ -120,7 +120,6 @@ def get_data_from_text_files(folder_name): train_df = get_data_from_text_files("train") test_df = get_data_from_text_files("test") -#all_data = train_df.append(test_df) all_data = pd.concat([train_df, test_df], ignore_index=True) ```
From d22481e76fd53b47221495c7fc08d63f75f51f10 Mon Sep 17 00:00:00 2001 From: skanderhaddad Date: Thu, 21 Sep 2023 22:35:14 +0100 Subject: [PATCH 3/3] remove commented code for cleaner codebase --- examples/nlp/ipynb/masked_language_modeling.ipynb | 1 - examples/nlp/masked_language_modeling.py | 1 - examples/nlp/md/masked_language_modeling.md | 1 - 3 files changed, 3 deletions(-) diff --git a/examples/nlp/ipynb/masked_language_modeling.ipynb b/examples/nlp/ipynb/masked_language_modeling.ipynb index fa5e9477b4..47d7cde0de 100644 --- a/examples/nlp/ipynb/masked_language_modeling.ipynb +++ b/examples/nlp/ipynb/masked_language_modeling.ipynb @@ -170,7 +170,6 @@ "train_df = get_data_from_text_files(\"train\")\n", "test_df = get_data_from_text_files(\"test\")\n", "\n", - "# all_data = train_df.append(test_df)\n", "all_data = pd.concat([train_df, test_df], ignore_index=True)" ] }, diff --git a/examples/nlp/masked_language_modeling.py b/examples/nlp/masked_language_modeling.py index fe3b016c64..de7011caa3 100644 --- a/examples/nlp/masked_language_modeling.py +++ b/examples/nlp/masked_language_modeling.py @@ -111,7 +111,6 @@ def get_data_from_text_files(folder_name): train_df = get_data_from_text_files("train") test_df = get_data_from_text_files("test") -# all_data = train_df.append(test_df) all_data = pd.concat([train_df, test_df], ignore_index=True) """ diff --git a/examples/nlp/md/masked_language_modeling.md b/examples/nlp/md/masked_language_modeling.md index 3cc382d347..15432b27af 100644 --- a/examples/nlp/md/masked_language_modeling.md +++ b/examples/nlp/md/masked_language_modeling.md @@ -120,7 +120,6 @@ def get_data_from_text_files(folder_name): train_df = get_data_from_text_files("train") test_df = get_data_from_text_files("test") -#all_data = train_df.append(test_df) all_data = pd.concat([train_df, test_df], ignore_index=True) ```