5
5
from os .path import exists
6
6
7
7
stock = '博思软件'
8
- RESULTS_DIR = f'D:\\ 年报\\ { stock } '
8
+ announcement_list = ['分红派息实施公告' , '利润分配预案' , '年度报告' , '半年度|季度' , '招股说明书' ]
9
+ announcement = '季度报告'
10
+ ban = '摘要|已取消|提示性公告'
11
+ RESULTS_DIR = f'D:\\ 报告\\ { stock } \\ { announcement } '
9
12
exists (RESULTS_DIR ) or makedirs (RESULTS_DIR )
10
13
11
14
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0' }
12
15
orgid_url = 'http://www.cninfo.com.cn/new/data/szse_stock.json'
13
16
url = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
14
17
DETAIL_URL = 'http://static.cninfo.com.cn/'
15
-
16
18
logging .basicConfig (level = logging .INFO , format = '%(asctime)s - %(levelname)s: %(message)s' )
17
19
18
20
@@ -24,25 +26,23 @@ def get_orgid():
24
26
stock_lists = orgids ['stockList' ]
25
27
for stock_list in stock_lists :
26
28
if stock_list ['zwjc' ] == stock :
27
- logging .info (f'获得股票信息: { stock } ' )
28
29
return {
29
30
'code' : stock_list ['code' ],
30
31
'orgid' : stock_list ['orgId' ]
31
32
}
32
33
33
34
34
35
def get_pdf_url (page , data ):
35
- """获得年报及招股说明书pdf下载信息 """
36
+ """获得公告的pdf下载信息 """
36
37
code = data .get ('code' )
37
38
orgid = data .get ('orgid' )
38
-
39
39
post_data = {
40
40
'stock' : f'{ code } ,{ orgid } ' ,
41
41
'tabName' : 'fulltext' ,
42
42
'pageSize' : 30 ,
43
43
'pageNum' : page ,
44
44
'column' : 'szse' ,
45
- 'category' : 'category_ndbg_szsh;category_sf_szsh; ' ,
45
+ 'category' : '' ,
46
46
'plate' : 'sz' ,
47
47
'seDate' : '' ,
48
48
'searchkey' : '' ,
@@ -58,18 +58,44 @@ def get_pdf_url(page, data):
58
58
dats = an .get ('announcements' )
59
59
stock_list = []
60
60
for dat in dats :
61
- if re .search ('摘要|已取消' , dat ['announcementTitle' ]):
61
+ if re .search (ban , dat ['announcementTitle' ]):
62
62
continue
63
- elif re .search ('招股说明书|年度报告' , dat ['announcementTitle' ]):
63
+ elif re .search (announcement , dat ['announcementTitle' ]):
64
64
stock_list .append ({
65
65
'announcementTitle' : dat ['announcementTitle' ],
66
66
'adjunctUrl' : dat ['adjunctUrl' ]
67
67
})
68
68
return stock_list
69
69
70
70
71
+ def get_totalpages (data ):
72
+ """获得公告的总页数"""
73
+ code = data .get ('code' )
74
+ orgid = data .get ('orgid' )
75
+ post_data = {
76
+ 'stock' : f'{ code } ,{ orgid } ' ,
77
+ 'tabName' : 'fulltext' ,
78
+ 'pageSize' : 30 ,
79
+ 'pageNum' : 1 ,
80
+ 'column' : 'szse' ,
81
+ 'category' : '' ,
82
+ 'plate' : 'sz' ,
83
+ 'seDate' : '' ,
84
+ 'searchkey' : '' ,
85
+ 'secid' : '' ,
86
+ 'sortName' : '' ,
87
+ 'sortType' : '' ,
88
+ 'isHLtitle' : 'true'
89
+ }
90
+ with httpx .Client (headers = headers ) as client :
91
+ res = client .post (url , data = post_data )
92
+ an = res .json ()
93
+ totalpages = an .get ('totalpages' )
94
+ return totalpages
95
+
96
+
71
97
def save_pdf (datas ):
72
- """保存年报pdf """
98
+ """保存公告pdf """
73
99
for data in datas :
74
100
part_url = data .get ('adjunctUrl' )
75
101
name = data .get ('announcementTitle' )
@@ -84,9 +110,13 @@ def save_pdf(datas):
84
110
85
111
86
112
def main ():
87
- for page in range (1 , 2 ):
113
+ pages = get_totalpages (get_orgid ())
114
+ logging .info (f'一共{ pages } 页公告信息...' )
115
+ for page in range (1 , pages + 1 ):
88
116
pdfdata = get_pdf_url (page , get_orgid ())
117
+ logging .info (f'获得第{ page } 页股票信息...' )
89
118
save_pdf (pdfdata )
119
+ logging .info ('下载完成' )
90
120
91
121
92
122
if __name__ == '__main__' :
0 commit comments