Update text-gen

JHyuk2 · JHyuk2 · commit 830b44d4c371 · 2025-01-31T23:12:49.000+09:00
diff --git a/Projects/AI 영어대화/ASR_re.ipynb b/Projects/AI 영어대화/ASR_re.ipynb
@@ -117,7 +117,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -132,7 +132,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -211,13 +211,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "09b03a8d3eda4528886536386846475b",
+       "model_id": "f8c10f91754041029d307cb9745e761c",
        "version_major": 2,
        "version_minor": 0
       },
@@ -231,7 +231,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "61a4f068b0114f9cafa0ee880c90209e",
+       "model_id": "5f3e10341d67458f89579f9b027b6d92",
        "version_major": 2,
        "version_minor": 0
       },
@@ -245,7 +245,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cb4bab95308c4e3a93acf76e614e2b0d",
+       "model_id": "c10c6305100a465b89a810e20ef0edd7",
        "version_major": 2,
        "version_minor": 0
       },
@@ -259,7 +259,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e253fb7d2ee94f4fa6c08eaea3c1a60c",
+       "model_id": "9431585b60704006912dfe735be57542",
        "version_major": 2,
        "version_minor": 0
       },
@@ -273,7 +273,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d42c0f31a5a446419de494afaa837470",
+       "model_id": "970714d48715463cae581faaad89fb26",
        "version_major": 2,
        "version_minor": 0
       },
@@ -311,7 +311,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -342,7 +342,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -392,26 +392,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.\n",
-      "Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.\n",
-      "c:\\Users\\win\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\ai_영어대화-HAZ4-uhy-py3.12\\Lib\\site-packages\\transformers\\generation\\configuration_utils.py:695: UserWarning: `num_beams` is set to 1. However, `length_penalty` is set to `1.3` -- this flag is only used in beam-based generation modes. You should set `num_beams>1` or unset `length_penalty`.\n",
-      "  warnings.warn(\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import torch\n",
     "from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline\n",
     "from datasets import load_dataset\n",
-    "\n",
-    "\n",
     "import torch\n",
     "# from transformers.models.whisper import EncoderDecoderCache\n",
     "\n",
@@ -436,7 +423,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -455,7 +442,7 @@
     "    generates_ids = model.generate(\n",
     "        input_features,\n",
     "        # max_length=300,\n",
-    "        temperature=0.1, # 다양성\n",
+    "        temperature=0.3, # 다양성\n",
     "        num_beams=2, # beam search를 사용한 텍스트 생성\n",
     "        length_penalty=1.3, # 기본값 1, 긴 텍스트에 조금 더 유리하도록 설정\n",
     "        attention_mask=attention_mask, # 입력 데이터에 대해 명시적으로 attention_mask 생성 후 전달\n",
@@ -467,13 +454,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.\n",
+      "Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.\n",
       "c:\\Users\\win\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\ai_영어대화-HAZ4-uhy-py3.12\\Lib\\site-packages\\transformers\\generation\\configuration_utils.py:695: UserWarning: `num_beams` is set to 1. However, `length_penalty` is set to `1.3` -- this flag is only used in beam-based generation modes. You should set `num_beams>1` or unset `length_penalty`.\n",
       "  warnings.warn(\n"
      ]
@@ -492,19 +481,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[' 이익을 얼마나 냈는지, 구독자가 얼마나 늘었는지, 내출이 어떻게 됐는지, 팔로워가 늘었는지, 아니면 내가 뭘 배웠는지까지 다 리조트에 들어가요. 그래서 이게 SDAI 스타일맨소드이고, 여기서 제가 아까 말씀드렸던, 취업을 잘하는 학생들의 학교 차이는, 그래서 이 경험으로 내가 이 회사의 이 비정화와 어떻게 매치시킬 건지까지 얘기합니다.',\n",
-       " ' 아, 면접을 보다 보면 이 정도로 얘기를 하면 사실은 아까 STAR을 얘기하는 동안 쟤는 어떤 애구나 알게 되는 것인데 마지막에 나에게 다 했으니까 당신에서 내가 알고 있어요. 맞아 떨어지는 거지 아는 두 가지가 맞아 떨어지면 진짜 아는 거지 여기서 또 중요한 게 하나 더 있는데 마인드즈시거든요. 마인드즈시 뭐냐면 여기 아니어도 돼.',\n",
-       " ' 아, 저의 기관은?',\n",
+       "[' 이익을 얼마나 낸는지, 구독자가 얼마나 늘었는지, 매출이 어떻게 됐는지, 팔로워가 늘었는지, 아니면 내가 뭘 배웠는지까지 다 리조트에 들어가요. 그래서 이게 SDAI 스타일메소드이고, 여기서 제가 아까 말씀드렸던, 취업을 잘하는 학생들의 학교 차이는 그래서 이 경험으로 내가 이 회사의 이 비정화와 어떻게 매치시킬 건지까지',\n",
+       " ' 아, 성경을 잘해야 돼',\n",
+       " ' 이 시원이랑 나랑 맞춰서 반드시 여기 가야 돼요 그럼 안돼 절박하면 안 돼 절박하면 티나고 절박한 사람은 사실 안 보고도',\n",
        " ' you']"
       ]
      },
-     "execution_count": 65,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }