-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathstream_post.py
94 lines (86 loc) · 2.74 KB
/
stream_post.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import mimetools
import re
import email.parser
class LineReader(object):
def __init__(self, fp):
self.fp = fp
self.data = ""
def readline(self, size=None):
if self.data == None:
return None
pos = self.data.find("\n")
if pos == -1:
more_data = self.fp.read(size)
else:
more_data = None
while more_data:
self.data += more_data
pos = self.data.find("\n")
if pos == -1:
if size and len(self.data) >= size:
self.data = ""
return self.data
more_data = self.fp.read(4096)
else:
more_data = None
if pos == -1:
line = self.data
self.data = None
else:
line = self.data[:pos+1]
self.data = self.data[pos+1:]
return line
def multipart_iter_content(fp, boundary):
# Contains a few lines from Python's cgi.py.
nextpart = "--" + boundary
lastpart = "--" + boundary + "--"
partdict = {}
terminator = ""
fp = LineReader(fp)
while terminator != lastpart:
is_file = False
if terminator:
# At start of next part. Read headers first.
headers = mimetools.Message(fp)
clength = headers.getheader('content-length')
line = headers.getheader('content-disposition')
if line and re.search(r'filename=".+warc\.gz"', line):
is_file = True
bytes = 0
if clength:
try:
bytes = int(clength)
except ValueError:
pass
if bytes > 0:
if maxlen and bytes > maxlen:
raise ValueError, 'Maximum content length exceeded'
offset = 0
while offset < bytes:
data = fp.read(min(bytes - offset, 4096))
if is_file:
yield data
offset += 4096
# Read lines until end of part.
data = []
data_len = 0
while 1:
line = fp.readline(4096)
if not line:
terminator = lastpart # End outer loop
break
if line[:2] == "--":
terminator = line.strip()
if terminator in (nextpart, lastpart):
break
if is_file:
data.append(line)
data_len += len(line)
if data_len > 4096:
yield "".join(data)
data = []
data_len = 0
if data_len > 0:
yield "".join(data)
# Done with part.
yield ""