Skip to content

Commit

Permalink
更新教材 2024.11.28
Browse files Browse the repository at this point in the history
  • Loading branch information
telunyang committed Nov 28, 2024
1 parent 6eedac5 commit 202dda7
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 58 deletions.
20 changes: 14 additions & 6 deletions 11 selenium 瀏覽器自動化測試.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
"outputs": [],
"source": [
"# 開啟網頁\n",
"driver.get(\"http://crptransfer.moe.gov.tw/\")\n",
"driver.get(\"https://crptransfer.moe.gov.tw/\")\n",
"\n",
"# 跳出 alert 視窗 (在 chrome 裡面執行 javascript 語法)\n",
"driver.execute_script(\"window.alert('這是我們自訂的彈跳視窗');\")\n",
Expand All @@ -97,10 +97,12 @@
"outputs": [],
"source": [
"# 開啟網頁\n",
"driver.get(\"http://crptransfer.moe.gov.tw/\")\n",
"driver.get(\"https://crptransfer.moe.gov.tw/\")\n",
"\n",
"# 尋找網頁中的搜尋框\n",
"inputElement = driver.find_element(By.CSS_SELECTOR, 'input#SN')\n",
"inputElement = driver.find_element(\n",
" By.CSS_SELECTOR, 'input#SN'\n",
")\n",
"\n",
"# 在搜尋框中輸入文字\n",
"inputElement.send_keys(\"人帥真好\")\n",
Expand All @@ -123,7 +125,9 @@
" )\n",
" \n",
" # 取得第一頁搜尋結果\n",
" element = driver.find_element(By.CSS_SELECTOR, cssSelector)\n",
" element = driver.find_element(\n",
" By.CSS_SELECTOR, cssSelector\n",
" )\n",
" \n",
" # 輸出想要爬取的文字\n",
" print(element.text) \n",
Expand All @@ -149,7 +153,9 @@
"driver.get(\"https://www.104.com.tw/jobs/main/\")\n",
"\n",
"# 尋找網頁中的搜尋框\n",
"inputElement = driver.find_element(By.CSS_SELECTOR, 'input[data-gtm-index^=\"搜尋欄位\"]')\n",
"inputElement = driver.find_element(\n",
" By.CSS_SELECTOR, 'input[data-gtm-index^=\"搜尋欄位\"]'\n",
")\n",
"\n",
"# 在搜尋框中輸入文字\n",
"inputElement.send_keys(\"python\")\n",
Expand All @@ -169,7 +175,9 @@
" )\n",
" \n",
" # 取得按鈕元素\n",
" btn = driver.find_element(By.CSS_SELECTOR, cssSelectorBtn)\n",
" btn = driver.find_element(\n",
" By.CSS_SELECTOR, cssSelectorBtn\n",
" )\n",
" \n",
" # 按下按鈕\n",
" btn.click()\n",
Expand Down
88 changes: 36 additions & 52 deletions cases/twse.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,9 @@
"# 操作 browser 的 驅動程式\n",
"from selenium import webdriver\n",
"\n",
"# 自動下載 Chrome Driver 的套件\n",
"from webdriver_manager.chrome import ChromeDriverManager\n",
"\n",
"# 例外處理的工具\n",
"from selenium.common.exceptions import TimeoutException\n",
"\n",
"# 面對動態網頁,等待、了解某個元素的狀態,通常與 exptected_conditions 和 By 搭配\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"\n",
"# 搭配 WebDriverWait 使用,對元素狀態的一種期待條件,若條件發生,則等待結束,往下一行執行\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"\n",
"# 期待元素出現要透過什麼方式指定,經常與 EC、WebDriverWait 一起使用\n",
"from selenium.webdriver.common.by import By\n",
"\n",
"# 取得系統時間的工具\n",
"from datetime import datetime\n",
"\n",
"# 強制停止/強制等待 (程式執行期間休息一下)\n",
"from time import sleep\n",
"\n",
Expand Down Expand Up @@ -95,7 +80,12 @@
"# 自訂下載路徑 (不會詢問下載位置)\n",
"my_options.add_experimental_option(\"prefs\", {\n",
" \"download.default_directory\": folderPath,\n",
" \"profile.default_content_settings.popups\": 0\n",
" \"profile.default_content_settings.popups\": 0,\n",
" \"download.prompt_for_download\": False,\n",
" # \"download.directory_upgrade\": True,\n",
" # \"safebrowsing_for_trusted_sources_enabled\": False,\n",
" # \"safebrowsing.enabled\": False,\n",
" # \"plugins.always_open_pdf_externally\": True\n",
"})\n",
"\n",
"# 使用 Chrome 的 WebDriver\n",
Expand All @@ -109,7 +99,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "4c81f7bd",
"metadata": {},
"outputs": [],
Expand All @@ -120,44 +110,38 @@
"\n",
"# 選取下拉式選單的項目\n",
"def setDropDownMenu(year, value, index):\n",
" try:\n",
" # 強制等待\n",
" sleep(1)\n",
"\n",
" # 選擇 select[name=\"yy\"] 元素,並依 option 的 innerText 來進行選取\n",
" yy = Select(driver.find_element(By.CSS_SELECTOR, 'select[name=yy]'))\n",
" yy.select_by_visible_text(f'民國 {year} 年')\n",
"\n",
" # 選擇 select[name=\"mm\"] 元素,並依 option 的 value 來進行選取\n",
" mm = Select(driver.find_element(By.CSS_SELECTOR, 'select[name=mm]'))\n",
" mm.select_by_value(str(value))\n",
"\n",
" # 選擇 select[name=\"dd\"] 元素,並依 option 的 index 來進行選取\n",
" dd = Select(driver.find_element(By.CSS_SELECTOR, 'select[name=dd]'))\n",
" dd.select_by_index(index)\n",
"\n",
" # 按下查詢\n",
" driver.find_element(\n",
" By.CSS_SELECTOR, \n",
" 'div.submit'\n",
" ).click()\n",
" except Exception as err:\n",
" print(err)\n",
" # 強制等待\n",
" sleep(1)\n",
"\n",
" # 選擇 select[name=\"yy\"] 元素,並依 option 的 innerText 來進行選取\n",
" yy = Select(driver.find_element(By.CSS_SELECTOR, 'select[name=yy]'))\n",
" yy.select_by_visible_text(f'民國 {year} 年')\n",
"\n",
" # 選擇 select[name=\"mm\"] 元素,並依 option 的 value 來進行選取\n",
" mm = Select(driver.find_element(By.CSS_SELECTOR, 'select[name=mm]'))\n",
" mm.select_by_value(str(value))\n",
"\n",
" # 選擇 select[name=\"dd\"] 元素,並依 option 的 index 來進行選取\n",
" dd = Select(driver.find_element(By.CSS_SELECTOR, 'select[name=dd]'))\n",
" dd.select_by_index(index)\n",
"\n",
" # 按下查詢\n",
" driver.find_element(\n",
" By.CSS_SELECTOR, \n",
" 'div.submit'\n",
" ).click()\n",
" \n",
"# 下載檔案\n",
"def download(year, value, index):\n",
" try:\n",
" # 下載 csv\n",
" year = 1911 + year\n",
" value = '0' + str(value) if value < 10 else str(value)\n",
" index = '0' + str(index + 1) if (index + 1) < 10 else str(index + 1)\n",
" date = f'{year}{value}{index}'\n",
" os.system(f'curl \"https://www.twse.com.tw/rwd/zh/fund/TWT38U?date={date}&response=csv\" -o {folderPath}/{date}.csv')\n",
" \n",
" # 擷圖\n",
" driver.save_screenshot(f\"{folderPath}/{date}.png\")\n",
" except TimeoutException:\n",
" print(\"等待逾時,即將關閉瀏覽器…\")\n",
" # 下載 csv\n",
" year = 1911 + year\n",
" value = '0' + str(value) if value < 10 else str(value)\n",
" index = '0' + str(index + 1) if (index + 1) < 10 else str(index + 1)\n",
" date = f'{year}{value}{index}'\n",
" os.system(f'curl \"https://www.twse.com.tw/rwd/zh/fund/TWT38U?date={date}&response=csv\" -o {folderPath}/{date}.csv')\n",
" \n",
" # 擷圖\n",
" driver.save_screenshot(f\"{folderPath}/{date}.png\")\n",
"\n",
"# 關閉瀏覽器\n",
"def close():\n",
Expand Down
Binary file modified python_web_scraping.docx
Binary file not shown.
Binary file modified python_web_scraping.pdf
Binary file not shown.

0 comments on commit 202dda7

Please sign in to comment.