-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
Copy pathread_emails.py
140 lines (131 loc) · 6.12 KB
/
read_emails.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import sys
# for encoding/decoding messages in base64
from base64 import urlsafe_b64decode
from common import gmail_authenticate, search_messages
def get_size_format(b, factor=1024, suffix="B"):
"""
Scale bytes to its proper byte format
e.g:
1253656 => '1.20MB'
1253656678 => '1.17GB'
"""
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
if b < factor:
return f"{b:.2f}{unit}{suffix}"
b /= factor
return f"{b:.2f}Y{suffix}"
def clean(text):
# clean text for creating a folder
return "".join(c if c.isalnum() else "_" for c in text)
def parse_parts(service, parts, folder_name, message):
"""
Utility function that parses the content of an email partition
"""
if parts:
for part in parts:
filename = part.get("filename")
mimeType = part.get("mimeType")
body = part.get("body")
data = body.get("data")
file_size = body.get("size")
part_headers = part.get("headers")
if part.get("parts"):
# recursively call this function when we see that a part
# has parts inside
parse_parts(service, part.get("parts"), folder_name, message)
if mimeType == "text/plain":
# if the email part is text plain
if data:
text = urlsafe_b64decode(data).decode()
print(text)
elif mimeType == "text/html":
# if the email part is an HTML content
# save the HTML file and optionally open it in the browser
if not filename:
filename = "index.html"
filepath = os.path.join(folder_name, filename)
print("Saving HTML to", filepath)
with open(filepath, "wb") as f:
f.write(urlsafe_b64decode(data))
else:
# attachment other than a plain text or HTML
for part_header in part_headers:
part_header_name = part_header.get("name")
part_header_value = part_header.get("value")
if part_header_name == "Content-Disposition":
if "attachment" in part_header_value:
# we get the attachment ID
# and make another request to get the attachment itself
print("Saving the file:", filename, "size:", get_size_format(file_size))
attachment_id = body.get("attachmentId")
attachment = service.users().messages() \
.attachments().get(id=attachment_id, userId='me', messageId=message['id']).execute()
data = attachment.get("data")
filepath = os.path.join(folder_name, filename)
if data:
with open(filepath, "wb") as f:
f.write(urlsafe_b64decode(data))
def read_message(service, message):
"""
This function takes Gmail API `service` and the given `message_id` and does the following:
- Downloads the content of the email
- Prints email basic information (To, From, Subject & Date) and plain/text parts
- Creates a folder for each email based on the subject
- Downloads text/html content (if available) and saves it under the folder created as index.html
- Downloads any file that is attached to the email and saves it in the folder created
"""
msg = service.users().messages().get(userId='me', id=message['id'], format='full').execute()
# parts can be the message body, or attachments
payload = msg['payload']
headers = payload.get("headers")
parts = payload.get("parts")
folder_name = "email"
has_subject = False
if headers:
# this section prints email basic info & creates a folder for the email
for header in headers:
name = header.get("name")
value = header.get("value")
if name.lower() == 'from':
# we print the From address
print("From:", value)
if name.lower() == "to":
# we print the To address
print("To:", value)
if name.lower() == "subject":
# make our boolean True, the email has "subject"
has_subject = True
# make a directory with the name of the subject
folder_name = clean(value)
# we will also handle emails with the same subject name
folder_counter = 0
while os.path.isdir(folder_name):
folder_counter += 1
# we have the same folder name, add a number next to it
if folder_name[-1].isdigit() and folder_name[-2] == "_":
folder_name = f"{folder_name[:-2]}_{folder_counter}"
elif folder_name[-2:].isdigit() and folder_name[-3] == "_":
folder_name = f"{folder_name[:-3]}_{folder_counter}"
else:
folder_name = f"{folder_name}_{folder_counter}"
os.mkdir(folder_name)
print("Subject:", value)
if name.lower() == "date":
# we print the date when the message was sent
print("Date:", value)
if not has_subject:
# if the email does not have a subject, then make a folder with "email" name
# since folders are created based on subjects
if not os.path.isdir(folder_name):
os.mkdir(folder_name)
parse_parts(service, parts, folder_name, message)
print("="*50)
if __name__ == "__main__":
service = gmail_authenticate()
# get emails that match the query you specify from the command lines
results = search_messages(service, sys.argv[1])
print(f"Found {len(results)} results.")
# for each email matched, read it (output plain/text to console & save HTML and attachments)
for msg in results:
read_message(service, msg)