Skip to content

Commit

Permalink
可检测中文名文件的文件类型
Browse files Browse the repository at this point in the history
  • Loading branch information
qux-bbb committed Mar 20, 2022
1 parent dd29b91 commit d9578a0
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
1 change: 1 addition & 0 deletions tests/test_data/中文名测试.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
测试中文名称文件的文件类型识别
19 changes: 19 additions & 0 deletions tests/test_filetype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# coding:utf8

from pathlib import Path
from xanalyzer.file import FileAnalyzer

cur_dir_path = Path(__file__).parent


def test_common_filetype():
upxed_path = cur_dir_path / 'test_data' / 'Hello_upx.exe_'
file_analyzer = FileAnalyzer(upxed_path)
assert file_analyzer.get_type() == 'PE32 executable (console) Intel 80386, for MS Windows, UPX compressed'


def test_chinese_name_filetype():
chinese_name_path = cur_dir_path / 'test_data' / '中文名测试.txt'
file_analyzer = FileAnalyzer(chinese_name_path)
file_analyzer.get_type()
assert file_analyzer.get_type() == 'UTF-8 Unicode text, with no line terminators'
7 changes: 6 additions & 1 deletion xanalyzer/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ def __init__(self, file_path):
self.file_type = self.get_type()

def get_type(self):
return magic.from_file(self.file_path)
# return magic.from_file(self.file_path)
# magic.from_file不能通过中文路径读取文件,暂时使用magic.from_buffer
the_file = open(self.file_path, 'rb')
the_content = the_file.read()
the_file.close()
return magic.from_buffer(the_content)

@staticmethod
def get_windows_style_file_size(tmp_size):
Expand Down

0 comments on commit d9578a0

Please sign in to comment.