-
Notifications
You must be signed in to change notification settings - Fork 6
/
check_parsing_speed.py
80 lines (53 loc) · 1.75 KB
/
check_parsing_speed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import requests
import time
from joblib import Parallel, delayed
from multiprocessing import Queue
results_queue = Queue()
data = requests.get('http://tmall.aliexpress.com/wholesale?q=tv')
text = data.text
print(text)
all_data = [text for i in range(100000)]
results = []
"""
start_time = int(round(time.time()))
for t in all_data:
items = t.split('/item')
for item in items:
if item.startswith('/'):
p_id = item.split('.html')[0].split('/')[-1]
results.append(p_id)
end_time = int(round(time.time()))
print('1 thread TASK WAS DONE IN {0} SECONDS'.format(end_time - start_time))
print('Results: ', len(results))
print('\n\n\n')
"""
def check(t):
items = t.split('/item')
for item in items:
if item.startswith('/'):
p_id = item.split('.html')[0].split('/')[-1]
results_queue.put(p_id)
def check2(t):
items = t.split('/item')
for item in items:
if item.startswith('/'):
p_id = item.split('.html')[0].split('/')[-1]
results.append(p_id)
results = []
start_time = int(round(time.time()))
# Run function "check" in 10 threads:
#, backend='threading') \
Parallel(n_jobs=10, timeout=300) \
(delayed(check)(task) for task in all_data)
end_time = int(round(time.time()))
print('10 threads TASK WAS DONE IN {0} SECONDS'.format(end_time - start_time))
print('Results: ', results_queue.qsize())
print('\n\n\n')
start_time = int(round(time.time()))
# Run function "check" in 10 threads:
#, backend='threading') \
Parallel(n_jobs=100, timeout=300) \
(delayed(check2)(task) for task in all_data)
end_time = int(round(time.time()))
print('10 threads TASK WAS DONE IN {0} SECONDS'.format(end_time - start_time))
print('Results: ', len(results))