Skip to content

Commit 2abe51b

Browse files
committed
Merge remote-tracking branch 'origin/dev'
2 parents a8bdd93 + 70f5987 commit 2abe51b

20 files changed

Lines changed: 976 additions & 20 deletions

File tree

.github/workflows/deploy.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: deploy
2+
3+
on:
4+
workflow_dispatch:
5+
6+
push:
7+
branches: [ dev ]
8+
9+
jobs:
10+
build:
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- name: Deploy
15+
uses: appleboy/ssh-action@v1.1.0
16+
with:
17+
host: ${{ secrets.REMOTE_HOST }}
18+
port: ${{ secrets.REMOTE_PORT }}
19+
username: ${{ secrets.REMOTE_USER }}
20+
password: ${{ secrets.REMOTE_PASSWD }}
21+
script: |
22+
mkdir -p /www/code
23+
cd /www/code
24+
git clone https://github.com/codists/python-talk.git
25+
26+
mkdir -p /www/frontend/python_talk
27+
cd /www/code/python-talk/frontend
28+
npm install -y
29+
npm run build
30+
cp -r /www/code/python-talk/frontend/dist /www/frontend/python_talk/
31+
32+
cp -r /www/code/python-talk/deployment/common /www
33+
cd /www/common/
34+
docker compose -f docker-compose-common.yml up -d
35+
36+
mkdir -p /www/backend/python_talk
37+
cp -r /www/code/python-talk/backend/ /www/backend/python_talk/
38+
cd /www/backend/python_talk/backend
39+
docker build -t python_talk:0.0.1 /www/backend/python_talk/backend
40+
docker compose -f /www/backend/python_talk/backend/docker-compose.yml up -d

.github/workflows/workflow.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
name: workflow.yml
2+
on:
3+
4+
jobs:

backend/booksbot/booksbot/__init__.py

Whitespace-only changes.

backend/booksbot/booksbot/items.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Define here the models for your scraped items
2+
#
3+
# See documentation in:
4+
# https://docs.scrapy.org/en/latest/topics/items.html
5+
6+
import scrapy
7+
8+
9+
# class BooksbotItem(scrapy.Item):
10+
# # define the fields for your item here like:
11+
# # name = scrapy.Field()
12+
# pass
13+
14+
class BookItem(scrapy.Item):
15+
# 书名
16+
title = scrapy.Field()
17+
# 作者
18+
author = scrapy.Field()
19+
# 出版日期
20+
price = scrapy.Field()
21+
# 描述
22+
description = scrapy.Field()
23+
# 书籍详情页
24+
url = scrapy.Field()
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Define here the models for your spider middleware
2+
#
3+
# See documentation in:
4+
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
5+
6+
from scrapy import signals
7+
8+
# useful for handling different item types with a single interface
9+
from itemadapter import ItemAdapter
10+
11+
12+
class BooksbotSpiderMiddleware:
13+
# Not all methods need to be defined. If a method is not defined,
14+
# scrapy acts as if the spider middleware does not modify the
15+
# passed objects.
16+
17+
@classmethod
18+
def from_crawler(cls, crawler):
19+
# This method is used by Scrapy to create your spiders.
20+
s = cls()
21+
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
22+
return s
23+
24+
def process_spider_input(self, response, spider):
25+
# Called for each response that goes through the spider
26+
# middleware and into the spider.
27+
28+
# Should return None or raise an exception.
29+
return None
30+
31+
def process_spider_output(self, response, result, spider):
32+
# Called with the results returned from the Spider, after
33+
# it has processed the response.
34+
35+
# Must return an iterable of Request, or item objects.
36+
for i in result:
37+
yield i
38+
39+
def process_spider_exception(self, response, exception, spider):
40+
# Called when a spider or process_spider_input() method
41+
# (from other spider middleware) raises an exception.
42+
43+
# Should return either None or an iterable of Request or item objects.
44+
pass
45+
46+
async def process_start(self, start):
47+
# Called with an async iterator over the spider start() method or the
48+
# maching method of an earlier spider middleware.
49+
async for item_or_request in start:
50+
yield item_or_request
51+
52+
def spider_opened(self, spider):
53+
spider.logger.info("Spider opened: %s" % spider.name)
54+
55+
56+
class BooksbotDownloaderMiddleware:
57+
# Not all methods need to be defined. If a method is not defined,
58+
# scrapy acts as if the downloader middleware does not modify the
59+
# passed objects.
60+
61+
@classmethod
62+
def from_crawler(cls, crawler):
63+
# This method is used by Scrapy to create your spiders.
64+
s = cls()
65+
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
66+
return s
67+
68+
def process_request(self, request, spider):
69+
# Called for each request that goes through the downloader
70+
# middleware.
71+
72+
# Must either:
73+
# - return None: continue processing this request
74+
# - or return a Response object
75+
# - or return a Request object
76+
# - or raise IgnoreRequest: process_exception() methods of
77+
# installed downloader middleware will be called
78+
return None
79+
80+
def process_response(self, request, response, spider):
81+
# Called with the response returned from the downloader.
82+
83+
# Must either;
84+
# - return a Response object
85+
# - return a Request object
86+
# - or raise IgnoreRequest
87+
return response
88+
89+
def process_exception(self, request, exception, spider):
90+
# Called when a download handler or a process_request()
91+
# (from other downloader middleware) raises an exception.
92+
93+
# Must either:
94+
# - return None: continue processing this exception
95+
# - return a Response object: stops process_exception() chain
96+
# - return a Request object: stops process_exception() chain
97+
pass
98+
99+
def spider_opened(self, spider):
100+
spider.logger.info("Spider opened: %s" % spider.name)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Define your item pipelines here
2+
#
3+
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
4+
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
5+
6+
7+
# useful for handling different item types with a single interface
8+
from itemadapter import ItemAdapter
9+
10+
11+
# class BooksbotPipeline:
12+
# def process_item(self, item, spider):
13+
# return item
14+
from python_talk.models.book import Book
15+
from python_talk.extensions import db
16+
17+
class BookPipeline:
18+
def process_item(self, item, spider):
19+
"""
20+
将采集到的数据存入数据库
21+
"""
22+
print(item)
23+
book = Book(**item)
24+
db.session.add(book)
25+
db.session.commit()
26+
27+
def open_spider(self, spider):
28+
print('spider 打开')
29+
30+
def close_spider(self, spider):
31+
print('spider 关闭')
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Scrapy settings for booksbot project
2+
#
3+
# For simplicity, this file contains only settings considered important or
4+
# commonly used. You can find more settings consulting the documentation:
5+
#
6+
# https://docs.scrapy.org/en/latest/topics/settings.html
7+
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
8+
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
9+
10+
BOT_NAME = "booksbot"
11+
12+
SPIDER_MODULES = ["booksbot.spiders"]
13+
NEWSPIDER_MODULE = "booksbot.spiders"
14+
15+
ADDONS = {}
16+
17+
18+
# Crawl responsibly by identifying yourself (and your website) on the user-agent
19+
#USER_AGENT = "booksbot (+http://www.yourdomain.com)"
20+
21+
# Obey robots.txt rules
22+
ROBOTSTXT_OBEY = True
23+
24+
# Concurrency and throttling settings
25+
#CONCURRENT_REQUESTS = 16
26+
CONCURRENT_REQUESTS_PER_DOMAIN = 1
27+
DOWNLOAD_DELAY = 1
28+
29+
# Disable cookies (enabled by default)
30+
#COOKIES_ENABLED = False
31+
32+
# Disable Telnet Console (enabled by default)
33+
#TELNETCONSOLE_ENABLED = False
34+
35+
# Override the default request headers:
36+
#DEFAULT_REQUEST_HEADERS = {
37+
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
38+
# "Accept-Language": "en",
39+
#}
40+
41+
# Enable or disable spider middlewares
42+
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
43+
#SPIDER_MIDDLEWARES = {
44+
# "booksbot.middlewares.BooksbotSpiderMiddleware": 543,
45+
#}
46+
47+
# Enable or disable downloader middlewares
48+
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
49+
#DOWNLOADER_MIDDLEWARES = {
50+
# "booksbot.middlewares.BooksbotDownloaderMiddleware": 543,
51+
#}
52+
53+
# Enable or disable extensions
54+
# See https://docs.scrapy.org/en/latest/topics/extensions.html
55+
#EXTENSIONS = {
56+
# "scrapy.extensions.telnet.TelnetConsole": None,
57+
#}
58+
59+
# Configure item pipelines
60+
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
61+
# ITEM_PIPELINES = {
62+
# "booksbot.pipelines.BooksbotPipeline": 300,
63+
# }
64+
ITEM_PIPELINES = {
65+
"booksbot.pipelines.BookPipeline": 300,
66+
}
67+
68+
# Enable and configure the AutoThrottle extension (disabled by default)
69+
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
70+
#AUTOTHROTTLE_ENABLED = True
71+
# The initial download delay
72+
#AUTOTHROTTLE_START_DELAY = 5
73+
# The maximum download delay to be set in case of high latencies
74+
#AUTOTHROTTLE_MAX_DELAY = 60
75+
# The average number of requests Scrapy should be sending in parallel to
76+
# each remote server
77+
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
78+
# Enable showing throttling stats for every response received:
79+
#AUTOTHROTTLE_DEBUG = False
80+
81+
# Enable and configure HTTP caching (disabled by default)
82+
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
83+
#HTTPCACHE_ENABLED = True
84+
#HTTPCACHE_EXPIRATION_SECS = 0
85+
#HTTPCACHE_DIR = "httpcache"
86+
#HTTPCACHE_IGNORE_HTTP_CODES = []
87+
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
88+
89+
# Set settings whose default value is deprecated to a future-proof value
90+
# TWISTED_REACTOR = 'twisted.internet.selectreactor.SelectReactor'
91+
FEED_EXPORT_ENCODING = "utf-8"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# This package will contain the spiders of your Scrapy project
2+
#
3+
# Please refer to the documentation for information on how to create and manage
4+
# your spiders.

0 commit comments

Comments
 (0)