Skip to content

Commit

Permalink
request emoji from cdnUrl
Browse files Browse the repository at this point in the history
  • Loading branch information
ppwwyyxx committed Jun 18, 2016
1 parent 049e3c3 commit 1344589
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 34 deletions.
2 changes: 1 addition & 1 deletion dump-html.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_args():
output_file = args.output

parser = WeChatDBParser(args.db)
res = Resource(args.res, args.avt)
res = Resource(parser, args.res, args.avt)

if name and name in parser.msgs_by_chat:
msgs = parser.msgs_by_chat[name]
Expand Down
10 changes: 1 addition & 9 deletions wechat/libchathelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,7 @@ def _get_image(self, msg):
return img, 'jpeg'
elif msg.type == TYPE_EMOJI:
md5 = msg.imgPath
if md5 in self.parser.internal_emojis:
emoji_img, format = self.res.get_internal_emoji(
self.parser.internal_emojis[md5])
else:
if md5 in self.parser.emojis:
group, _ = self.parser.emojis[md5]
else:
group = None
emoji_img, format = self.res.get_emoji(md5, group)
emoji_img, format = self.res.get_emoji_by_md5(md5)
return emoji_img, format
elif msg.type == TYPE_CUSTOM_EMOJI:
pq = PyQuery(msg.content)
Expand Down
20 changes: 12 additions & 8 deletions wechat/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def __init__(self, db_fname):
self.cc = self.db_conn.cursor()
self.contacts = {}
self.msgs_by_chat = defaultdict(list)
self.emojis = {}
self.emoji_groups = {}
self.emoji_url = {}
self.internal_emojis = {}
self._parse()

Expand All @@ -59,7 +60,7 @@ def _parse_msg(self):
SELECT {} FROM message
""".format(','.join(WeChatDBParser.FIELDS)))
for row in db_msgs:
values = self._parse_row(row)
values = self._parse_msg_row(row)
if not values:
continue
msg = WeChatMsg(values)
Expand Down Expand Up @@ -96,16 +97,18 @@ def _find_msg_by_type(self, msgs=None):
def _parse_emoji(self):
# wechat provided emojis
emojiinfo_q = self.cc.execute(
""" SELECT md5, desc, groupid FROM EmojiInfoDesc """)
""" SELECT md5, groupid FROM EmojiInfoDesc """)
for row in emojiinfo_q:
md5, desc, group = row
self.emojis[md5] = (group, desc)
md5, group = row
self.emoji_groups[md5] = group

NEEDED_EMOJI_CATALOG = [49, 50, 17]
emojiinfo_q = self.cc.execute(
""" SELECT md5, catalog, name FROM EmojiInfo WHERE name <> ''""")
""" SELECT md5, catalog, name, cdnUrl FROM EmojiInfo""")
for row in emojiinfo_q:
md5, catalog, name = row
md5, catalog, name, cdnUrl = row
if cdnUrl:
self.emoji_url[md5] = cdnUrl
if catalog not in NEEDED_EMOJI_CATALOG:
continue
self.internal_emojis[md5] = name
Expand All @@ -119,7 +122,8 @@ def _parse(self):
self._parse_emoji()

# process the values in a row
def _parse_row(self, row):
def _parse_msg_row(self, row):
""" parse a record of message into my format"""
values = dict(zip(WeChatDBParser.FIELDS, row))
if values['content']:
values['content'] = ensure_unicode(values['content'])
Expand Down
15 changes: 4 additions & 11 deletions wechat/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def __init__(self, parser, res=None):
self.time_html = open(TIME_HTML_FILE).read()
self.parser = parser
self.res = res
if self.res is None:
logger.warn("Resource Directory not given. Images / Voice Message won't be displayed.")
assert self.res is not None, \
"Resource Directory not given. Cannot render HTML."
self.smiley = SmileyProvider()

css_files = glob.glob(os.path.join(LIB_PATH, 'static/*.css'))
Expand Down Expand Up @@ -124,14 +124,7 @@ def fallback():
return template.format(**format_dict)
elif msg.type == TYPE_EMOJI:
md5 = msg.imgPath
if md5 in self.parser.internal_emojis:
emoji_img, format = self.res.get_internal_emoji(self.parser.internal_emojis[md5])
else:
if md5 in self.parser.emojis:
group, _ = self.parser.emojis[md5]
else:
group = None
emoji_img, format = self.res.get_emoji(md5, group)
emoji_img, format = self.res.get_emoji_by_md5(md5)
format_dict['emoji_format'] = format
format_dict['emoji_img'] = emoji_img
return template.format(**format_dict)
Expand Down Expand Up @@ -184,7 +177,7 @@ def prepare_avatar_css(self, talkers):
css = avatar_tpl.format(name='me', avatar=my_avatar)

for talker in talkers:
avatar = self.res.get_avatar(self.parser.contacts_rev[talker])
avatar = self.res.get_contact_avatar(talker)
css += avatar_tpl.format(name=talker, avatar=avatar)
self.css_string.append(css)

Expand Down
74 changes: 69 additions & 5 deletions wechat/res.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
logger = logging.getLogger(__name__)
import imghdr
from multiprocessing import Pool

import atexit
import cPickle as pickle
import requests

from .avatar import AvatarReader
from common.textutil import md5, get_file_b64
Expand All @@ -32,15 +34,47 @@

JPEG_QUALITY = 50

class EmojiCache(object):
def __init__(self, fname):
self.fname = fname
if os.path.isfile(fname):
self.dic = pickle.load(open(fname))
else:
self.dic = {}

def query(self, md5):
return self.dic.get(md5, (None, None))

def fetch(self, md5, url):
try:
logger.info("Requesting emoji {} from {} ...".format(md5, url))
r = requests.get(url).content
im = Image.open(cStringIO.StringIO(r))
format = im.format.lower()
ret = (base64.b64encode(r), format)
self.dic[md5] = ret
self.flush()
return ret
except Exception as e:
logger.exception("Error processing emoji from {}".format(url))
return None, None

def flush(self):
with open(self.fname, 'wb') as f:
pickle.dump(self.dic, f)

class Resource(object):
""" multimedia resources in chat"""
def __init__(self, res_dir, avt_db):
def __init__(self, parser, res_dir, avt_db,
emoji_cache_file='emoji.cache'):
def check(subdir):
assert os.path.isdir(os.path.join(res_dir, subdir)), \
"No such directory: {}".format(subdir)
[check(k) for k in ['', AVATAR_DIRNAME, IMG_DIRNAME, EMOJI_DIRNAME, VOICE_DIRNAME]]

self.emoji_cache = EmojiCache(emoji_cache_file)
self.res_dir = res_dir
self.parser = parser
self.voice_cache_idx = {}
self.img_dir = os.path.join(res_dir, IMG_DIRNAME)
self.voice_dir = os.path.join(res_dir, VOICE_DIRNAME)
Expand Down Expand Up @@ -71,6 +105,7 @@ def cache_voice_mp3(self, msgs):
voice_paths = [msg.imgPath for msg in msgs if msg.type == TYPE_SPEAK]
self.voice_cache_idx = {k: idx for idx, k in enumerate(voice_paths)}
pool = Pool(3)
atexit.register(lambda x: x.terminate(), pool)
self.voice_cache = [pool.apply_async(parse_wechat_audio_file,
(self.get_voice_filename(k),)) for k in voice_paths]
# single-threaded version, for debug
Expand All @@ -87,6 +122,9 @@ def get_avatar(self, username):
jpeg_str = buf.getvalue()
return base64.b64encode(jpeg_str)

def get_contact_avatar(self, nickname):
return self.get_avatar(self.parser.contacts_rev[nickname])

def _get_img_file(self, fnames):
""" fnames: a list of filename to search for
return (filename, filename) of (big, small) image.
Expand Down Expand Up @@ -128,7 +166,10 @@ def name_is_thumbnail(name):


def get_img(self, fnames):
""" return two base64 jpg string"""
"""
:params fnames: possible file paths
:returns: two base64 jpg string
"""
fnames = [k for k in fnames if k] # filter out empty string
big_file, small_file = self._get_img_file(fnames)

Expand All @@ -147,7 +188,7 @@ def get_jpg_b64(img_file):
return big_file
return get_jpg_b64(small_file)

def get_emoji(self, md5, pack_id):
def _get_res_emoji(self, md5, pack_id):
path = self.emoji_dir
if pack_id:
path = os.path.join(path, pack_id)
Expand All @@ -170,8 +211,31 @@ def try_use(f):
return get_file_b64(f), imghdr.what(f)
return None, None

def get_internal_emoji(self, fname):
def _get_internal_emoji(self, fname):
f = os.path.join(INTERNAL_EMOJI_DIR, fname)
return get_file_b64(f), imghdr.what(f)

def get_emoji_by_md5(self, md5):
""" :returns: (b64 img, format)"""
if md5 in self.parser.internal_emojis:
emoji_img, format = self._get_internal_emoji(self.parser.internal_emojis[md5])
logger.warn("Cannot get emoji {}".format(md5))
return None, None
else:
img, format = self.emoji_cache.query(md5)
if format:
return img, format
group = self.parser.emoji_groups.get(md5, None)
emoji_img, format = self._get_res_emoji(md5, group)
if format:
return emoji_img, format
url = self.parser.emoji_url.get(md5, None)
if url:
emoji_img, format = self.emoji_cache.fetch(md5, url)
if format:
return emoji_img, format

logger.warn("Cannot get emoji {} in {}".format(md5, group))
return None, None


0 comments on commit 1344589

Please sign in to comment.