Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

python2 -> python3 #33

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 28 additions & 20 deletions filehunt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# filehunt: general file searching library for use by PANhunt and PassHunt
# By BB

import os, sys, zipfile, re, datetime, cStringIO, argparse, time, hashlib, unicodedata, codecs
import os, sys, zipfile, re, datetime, io, argparse, time, hashlib, unicodedata, codecs
import colorama
import progressbar
import pst # MS-PST files
Expand Down Expand Up @@ -37,10 +37,8 @@ def __init__(self, filename, file_dir):
self.type = None
self.matches = []

def __cmp__(self, other):

return cmp(self.path.lower(), other.path.lower())

def __lt__(self, other):
return self.path.lower() < other.path.lower()

def set_file_stats(self):

Expand Down Expand Up @@ -74,7 +72,7 @@ def size_friendly(self):
def set_error(self, error_msg):

self.errors.append(error_msg)
print colorama.Fore.RED + unicode2ascii(u'ERROR %s on %s' % (error_msg, self.path)) + colorama.Fore.WHITE
sys.stdout.buffer.write(colorama.Fore.RED.encode('ascii','ignore') + unicode2ascii('ERROR %s on %s\n' % (error_msg, self.path)) + colorama.Fore.WHITE.encode('ascii','ignore'))


def check_regexs(self, regexs, search_extensions):
Expand All @@ -94,7 +92,7 @@ def check_regexs(self, regexs, search_extensions):

elif self.type == 'TEXT':
try:
file_text = read_file(self.path, 'rb')
file_text = read_file(self.path, 'r')
self.check_text_regexs(file_text, regexs, '')
#except WindowsError:
# self.set_error(sys.exc_info()[1])
Expand Down Expand Up @@ -145,7 +143,7 @@ def check_pst_regexs(self, regexs, search_extensions, hunt_type, gauge_update_fu
if message.Subject:
message_path = os.path.join(folder.path, message.Subject)
else:
message_path = os.path.join(folder.path, u'[NoSubject]')
message_path = os.path.join(folder.path, '[NoSubject]')
if message.Body:
self.check_text_regexs(message.Body, regexs, message_path)
if message.HasAttachments:
Expand Down Expand Up @@ -185,7 +183,7 @@ def check_attachment_regexs(self, attachment, regexs, search_extensions, sub_pat
if attachment_ext in search_extensions['ZIP']:
if attachment.data:
try:
memory_zip = cStringIO.StringIO()
memory_zip = io.StringIO()
memory_zip.write(attachment.data)
zf = zipfile.ZipFile(memory_zip)
self.check_zip_regexs(zf, regexs, search_extensions, os.path.join(sub_path, attachment.Filename))
Expand All @@ -212,7 +210,7 @@ def check_zip_regexs(self, zf, regexs, search_extensions, sub_path):
for file_in_zip in files_in_zip:
if get_ext(file_in_zip) in search_extensions['ZIP']: # nested zip file
try:
memory_zip = cStringIO.StringIO()
memory_zip = io.StringIO()
memory_zip.write(zf.open(file_in_zip).read())
nested_zf = zipfile.ZipFile(memory_zip)
self.check_zip_regexs(nested_zf, regexs, search_extensions, os.path.join(sub_path, decode_zip_filename(file_in_zip)))
Expand All @@ -222,13 +220,13 @@ def check_zip_regexs(self, zf, regexs, search_extensions, sub_path):
elif get_ext(file_in_zip) in search_extensions['TEXT']: #normal doc
try:
file_text = zf.open(file_in_zip).read()
self.check_text_regexs(file_text, regexs, os.path.join(sub_path, decode_zip_filename(file_in_zip)))
self.check_text_regexs(decode_zip_text(file_text), regexs, os.path.join(sub_path, decode_zip_filename(file_in_zip)))
except: # RuntimeError: # e.g. zip needs password
self.set_error(sys.exc_info()[1])
else: # SPECIAL
try:
if get_ext(file_in_zip) == '.msg':
memory_msg = cStringIO.StringIO()
memory_msg = io.StringIO()
memory_msg.write(zf.open(file_in_zip).read())
msg = msmsg.MSMSG(memory_msg)
if msg.validMSG:
Expand All @@ -253,10 +251,10 @@ def find_all_files_in_directory(AFileClass, root_dir, excluded_directories, sear

global TEXT_FILE_SIZE_LIMIT

all_extensions = [ext for ext_list in search_extensions.values() for ext in ext_list]
all_extensions = [ext for ext_list in list(search_extensions.values()) for ext in ext_list]

extension_types = {}
for ext_type, ext_list in search_extensions.iteritems():
for ext_type, ext_list in search_extensions.items():
for ext in ext_list:
extension_types[ext] = ext_type

Expand Down Expand Up @@ -379,9 +377,13 @@ def load_object(fn):


def read_file(fn, open_mode="r"):
f = open(fn, open_mode)
try:
with open(fn, open_mode) as f:
s = f.read()
f.close()
except:
with open(fn, "rb") as f:
b = f.read()
s = b.decode("utf-8", "ignore")
return s


Expand Down Expand Up @@ -421,13 +423,19 @@ def unicode2ascii(unicode_str):
return unicodedata.normalize('NFKD', unicode_str).encode('ascii','ignore')


def decode_zip_filename(str):
def decode_zip_filename(instr):

if type(str) is unicode:
return str
if type(instr) is str:
return instr
else:
return str.decode('cp437')
return instr.decode('cp437')

def decode_zip_text(instr):

if type(instr) is str:
return instr
else:
return instr.decode('utf-8')

def get_ext(file_name):

Expand Down
45 changes: 22 additions & 23 deletions msmsg.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_stream(self, sector, size):

def __repr__(self):

return ', '.join(['%s:%s' % (hex(sector), hex(entry)) for sector, entry in zip(range(len(self.entries)), self.entries)])
return ', '.join(['%s:%s' % (hex(sector), hex(entry)) for sector, entry in zip(list(range(len(self.entries))), self.entries)])



Expand Down Expand Up @@ -102,7 +102,7 @@ def get_stream(self, sector, size):

def __repr__(self):

return ', '.join(['%s:%s' % (hex(sector), hex(entry)) for sector, entry in zip(range(len(self.entries)), self.entries)])
return ', '.join(['%s:%s' % (hex(sector), hex(entry)) for sector, entry in zip(list(range(len(self.entries))), self.entries)])



Expand Down Expand Up @@ -133,7 +133,7 @@ def set_entry_children(self, dir_entry):
child_ids_queue.append(dir_entry.ChildID)
while child_ids_queue:
child_entry = self.entries[child_ids_queue.pop()]
if child_entry.Name in dir_entry.childs.keys():
if child_entry.Name in list(dir_entry.childs.keys()):
raise MSGException('Directory Entry Name already in children dictionary')
dir_entry.childs[child_entry.Name] = child_entry
if child_entry.SiblingID != DirectoryEntry.NOSTREAM:
Expand All @@ -156,7 +156,7 @@ def get_directory_sector(self, sector):

def __repr__(self):

return u', '.join([entry.__repr__() for entry in self.entries])
return ', '.join([entry.__repr__() for entry in self.entries])



Expand Down Expand Up @@ -199,9 +199,8 @@ def __init__(self, mscfb, bytes):
self.childs = {}


def __cmp__(self, other):

return cmp(self.Name, other.Name)
def __lt__(self, other):
return self.Name < other.Name


def get_data(self):
Expand All @@ -222,7 +221,7 @@ def list_children(self, level=0, expand=False):
for child_entry in sorted(self.childs.values()):
line_sfx = ''
if child_entry.ObjectType == DirectoryEntry.OBJECT_STORAGE:
line_sfx = '(%s)' % len(child_entry.childs.keys())
line_sfx = '(%s)' % len(list(child_entry.childs.keys()))
s += '%s %s %s\n' % (line_pfx, child_entry.Name, line_sfx)
if expand:
s += child_entry.list_children(level+1, expand)
Expand All @@ -231,7 +230,7 @@ def list_children(self, level=0, expand=False):

def __repr__(self):

return u'%s (%s, %s, %s, %s, %s, %s)' % (self.Name, self.ObjectType, hex(self.SiblingID), hex(self.RightSiblingID), hex(self.ChildID), hex(self.StartingSectorLocation), hex(self.StreamSize))
return '%s (%s, %s, %s, %s, %s, %s)' % (self.Name, self.ObjectType, hex(self.SiblingID), hex(self.RightSiblingID), hex(self.ChildID), hex(self.StartingSectorLocation), hex(self.StreamSize))



Expand All @@ -240,7 +239,7 @@ class MSCFB:
def __init__(self, cfb_file):
"""cfb_file is unicode or string filename or a file object"""

if isinstance(cfb_file, str) or isinstance(cfb_file, unicode):
if isinstance(cfb_file, str) or isinstance(cfb_file, str):
self.fd = open(cfb_file,'rb')
else:
self.fd = cfb_file
Expand Down Expand Up @@ -330,22 +329,22 @@ def __init__(self, msmsg, parent_dir_entry, header_size):
property_entries_count = (len(bytes) - header_size) / 16
for i in range(property_entries_count):
prop_entry = PropertyEntry(self.msmsg, parent_dir_entry, bytes[header_size + i*16: header_size + i*16 + 16])
if prop_entry in self.properties.keys():
if prop_entry in list(self.properties.keys()):
raise MSGException('PropertyID already in properties dictionary')
self.properties[prop_entry.PropertyID] = prop_entry


def getval(self, prop_id):

if prop_id in self.properties.keys():
if prop_id in list(self.properties.keys()):
return self.properties[prop_id].value
else:
return None


def __repr__(self):

return u'\n'.join([prop.__repr__() for prop in self.properties.values()])
return '\n'.join([prop.__repr__() for prop in list(self.properties.values())])



Expand Down Expand Up @@ -389,7 +388,7 @@ def __init__(self, msmsg, parent_dir_entry, bytes):

def __repr__(self):

return u'%s=%s' % (hex(self.PropertyTag), self.value.__repr__())
return '%s=%s' % (hex(self.PropertyTag), self.value.__repr__())



Expand Down Expand Up @@ -723,7 +722,7 @@ def set_recipients(self):
recipient_dir_index = 0
while True:
recipient_dir_name = '__recip_version1.0_#%s' % zeropadhex(recipient_dir_index, 8)
if recipient_dir_name in self.root_dir_entry.childs.keys():
if recipient_dir_name in list(self.root_dir_entry.childs.keys()):
recipient_dir_entry = self.root_dir_entry.childs[recipient_dir_name]
rps = PropertyStream(self, recipient_dir_entry, PropertyStream.RECIP_OR_ATTACH_HEADER_SIZE)
recipient = Recipient(rps)
Expand All @@ -739,7 +738,7 @@ def set_attachments(self):
attachment_dir_index = 0
while True:
attachment_dir_name = '__attach_version1.0_#%s' % zeropadhex(attachment_dir_index, 8)
if attachment_dir_name in self.root_dir_entry.childs.keys():
if attachment_dir_name in list(self.root_dir_entry.childs.keys()):
attachment_dir_entry = self.root_dir_entry.childs[attachment_dir_name]
aps = PropertyStream(self, attachment_dir_entry, PropertyStream.RECIP_OR_ATTACH_HEADER_SIZE)
attachment = Attachment(aps)
Expand Down Expand Up @@ -870,27 +869,27 @@ def size_friendly(size):
def test_status_msg(msg_file):

msg = MSMSG(msg_file)
print msg.cfb.directory
print(msg.cfb.directory)
msg.close()


def test_folder_msgs(test_folder):

global error_log_list

s = u''
s = ''
for msg_filepath in [os.path.join(test_folder, filename) for filename in os.listdir(test_folder) if os.path.isfile(os.path.join(test_folder, filename)) and os.path.splitext(filename.lower())[1] == '.msg']:
#try:
s += u'Opening %s\n' % msg_filepath
s += 'Opening %s\n' % msg_filepath
error_log_list = []
msg = MSMSG(msg_filepath)
#s += u'MajorVersion: %s, FATSectors: %s, MiniFATSectors: %s, DIFATSectors %s\n' % (msg.cfb.MajorVersion, msg.cfb.FATSectors, msg.cfb.MiniFATSectors, msg.cfb.DIFATSectors)
#s += u'MiniStreamSectorLocation: %s, MiniStreamSize: %s\n' % (hex(msg.cfb.MiniStreamSectorLocation), msg.cfb.MiniStreamSize)
#s += u'\n' + msg.cfb.directory.entries[0].list_children(level=0, expand=True)
#s += u'\n' + msg.prop_stream.__repr__()
s += u'Recipients: %s\n' % u', '.join([recip.__repr__() for recip in msg.recipients])
s += u'Attachments: %s\n' % u', '.join([attach.__repr__() for attach in msg.attachments])
s += u'Subject: %s\nBody: %s\n' % (msg.Subject.__repr__(), msg.Body.__repr__())
s += 'Recipients: %s\n' % ', '.join([recip.__repr__() for recip in msg.recipients])
s += 'Attachments: %s\n' % ', '.join([attach.__repr__() for attach in msg.attachments])
s += 'Subject: %s\nBody: %s\n' % (msg.Subject.__repr__(), msg.Body.__repr__())
s += '\n\n\n'
# dump attachments:
if False:
Expand All @@ -917,6 +916,6 @@ def test_folder_msgs(test_folder):

if __name__=="__main__":

test_folder = u'D:\\'
test_folder = 'D:\\'
#test_status_msg(test_folder+'test.msg')
#test_folder_msgs(test_folder)
Loading