diff --git a/archinfo/__init__.py b/archinfo/__init__.py index 7972ad29..e3564186 100644 --- a/archinfo/__init__.py +++ b/archinfo/__init__.py @@ -7,64 +7,62 @@ __version__ = "9.2.72.dev0" -from .types import RegisterOffset, TmpVar, RegisterName from .arch import ( - Endness, - Register, Arch, - register_arch, ArchNotFound, + Register, + all_arches, arch_from_id, - reverse_ends, get_host_arch, - all_arches, + register_arch, + reverse_ends, ) -from .defines import defines -from .arch_amd64 import ArchAMD64 -from .arch_x86 import ArchX86 -from .arch_arm import ArchARM, ArchARMEL, ArchARMHF, ArchARMCortexM from .arch_aarch64 import ArchAArch64 +from .arch_amd64 import ArchAMD64 +from .arch_arm import ArchARM, ArchARMCortexM, ArchARMEL, ArchARMHF from .arch_avr import ArchAVR8 -from .arch_ppc32 import ArchPPC32 -from .arch_ppc64 import ArchPPC64 from .arch_mips32 import ArchMIPS32 from .arch_mips64 import ArchMIPS64 -from .arch_soot import ArchSoot -from .archerror import ArchError -from .arch_s390x import ArchS390X from .arch_pcode import ArchPcode +from .arch_ppc32 import ArchPPC32 +from .arch_ppc64 import ArchPPC64 from .arch_riscv64 import ArchRISCV64 - +from .arch_s390x import ArchS390X +from .arch_soot import ArchSoot +from .arch_x86 import ArchX86 +from .archerror import ArchError +from .defines import defines +from .types import Endness, RegisterName, RegisterOffset, TmpVar __all__ = [ - "RegisterOffset", - "TmpVar", - "RegisterName", - "Endness", - "Register", "Arch", - "register_arch", - "ArchNotFound", - "arch_from_id", - "reverse_ends", - "get_host_arch", - "all_arches", - "defines", + "ArchAArch64", "ArchAMD64", - "ArchX86", "ArchARM", + "ArchARMCortexM", "ArchARMEL", "ArchARMHF", - "ArchARMCortexM", - "ArchAArch64", "ArchAVR8", - "ArchPPC32", - "ArchPPC64", + "ArchError", "ArchMIPS32", "ArchMIPS64", - "ArchSoot", - "ArchError", - "ArchS390X", + "ArchNotFound", + "ArchPPC32", + "ArchPPC64", "ArchPcode", "ArchRISCV64", + "ArchS390X", + "ArchSoot", + "ArchX86", + "Endness", + "Register", + "RegisterName", + "RegisterOffset", + "TmpVar", + "all_arches", + "arch_from_id", + "defines", + "get_host_arch", + "register_arch", + "reverse_ends", ] diff --git a/archinfo/arch.py b/archinfo/arch.py index f0a496ee..3ffd2bf0 100644 --- a/archinfo/arch.py +++ b/archinfo/arch.py @@ -1,14 +1,15 @@ +import copy import logging -from typing import Dict, List, Tuple, Optional -import struct as _struct import platform as _platform import re +import struct as _struct +from typing import Any, Dict, List, Optional, Set, Tuple, Type, Union + +from archinfo.types import RegisterName, RegisterOffset -from archinfo.types import RegisterOffset, RegisterName from .archerror import ArchError from .tls import TLSArchInfo - -import copy +from .types import Endness log = logging.getLogger("archinfo.arch") log.addHandler(logging.NullHandler()) @@ -34,19 +35,6 @@ _keystone = None -class Endness: # pylint: disable=no-init - """Endness specifies the byte order for integer values - - :cvar LE: little endian, least significant byte is stored at lowest address - :cvar BE: big endian, most significant byte is stored at lowest address - :cvar ME: Middle-endian. Yep. - """ - - LE = "Iend_LE" - BE = "Iend_BE" - ME = "Iend_ME" - - class Register: """ A collection of information about a register. Each different architecture @@ -180,8 +168,8 @@ class Arch: """ byte_width = 8 - instruction_endness = "Iend_BE" - elf_tls: TLSArchInfo = None + instruction_endness = Endness.BE + elf_tls: Optional[TLSArchInfo] = None dwarf_registers: List[str] = [] def __init__(self, endness, instruction_endness=None): @@ -425,18 +413,18 @@ def struct_fmt(self, size=None, signed=False, endness=None): def _get_register_dict(self) -> Dict[RegisterName, Tuple[RegisterOffset, int]]: res = {} - for r in self.register_list: - if r.vex_offset is None: + for register in self.register_list: + if register.vex_offset is None: continue - res[r.name] = (r.vex_offset, r.size) - for i in r.alias_names: - res[i] = (r.vex_offset, r.size) - for reg, offset, size in r.subregisters: - res[reg] = (r.vex_offset + offset, size) + res[register.name] = (register.vex_offset, register.size) + for i in register.alias_names: + res[i] = (register.vex_offset, register.size) + for reg, offset, size in register.subregisters: + res[reg] = (register.vex_offset + offset, size) return res # e.g. sizeof['int'] = 32 - sizeof = {} + sizeof: Dict[str, int] = {} @property def capstone(self): @@ -527,7 +515,7 @@ def translate_dynamic_tag(self, tag): log.error("Please look up and add dynamic tag type %#x for %s", tag, self.name) return tag - def translate_symbol_type(self, tag): + def translate_symbol_type(self, tag: Union[str, int]): try: return self.symbol_type_translation[tag] except KeyError: @@ -682,22 +670,22 @@ def keystone_support(self): return self.ks_arch is not None - address_types = (int,) - function_address_types = (int,) + address_types: Tuple[type, ...] = (int,) + function_address_types: Tuple[type, ...] = (int,) # various names name: str - vex_arch = None - qemu_name = None - ida_processor = None - linux_name = None - triplet = None + vex_arch: Optional[str] = None + qemu_name: Optional[str] = None + ida_processor: Optional[str] = None + linux_name: Optional[str] = None + triplet: Optional[str] = None # instruction stuff - max_inst_bytes = None + max_inst_bytes: int ret_instruction = b"" nop_instruction = b"" - instruction_alignment = None + instruction_alignment: Optional[int] = None # register offsets ip_offset: Optional[RegisterOffset] = None @@ -711,18 +699,19 @@ def keystone_support(self): vex_conditional_helpers = False # memory stuff - bits = None + bits: int memory_endness = Endness.LE + default_endness = Endness.LE register_endness = Endness.LE - stack_change = None + stack_change: int # is it safe to cache IRSBs? cache_irsb = True branch_delay_slot = False - function_prologs = set() - function_epilogs = set() + function_prologs: Set[bytes] = set() + function_epilogs: Set[bytes] = set() # Capstone stuff cs_arch = None @@ -738,13 +727,13 @@ def keystone_support(self): uc_arch = None uc_mode = None uc_const = None - uc_prefix = None + uc_prefix: Optional[str] = None uc_regs = None artificial_registers_offsets = None - artificial_registers = None - cpu_flag_register_offsets_and_bitmasks_map = None - reg_blacklist = None - reg_blacklist_offsets = None + artificial_registers: Set[RegisterName] + cpu_flag_register_offsets_and_bitmasks_map: Dict + reg_blacklist: List[str] = [] + reg_blacklist_offsets: List[RegisterOffset] = [] vex_to_unicorn_map = None vex_cc_regs = None @@ -758,28 +747,28 @@ def keystone_support(self): # Register information register_list: List[Register] = [] - default_register_values = [] - entry_register_values = {} - default_symbolic_registers = [] + default_register_values: List = [] + entry_register_values: Dict[str, Any] = {} # I don't know the value type + default_symbolic_registers: List[RegisterName] = [] registers: Dict[RegisterName, Tuple[RegisterOffset, int]] = {} register_names: Dict[RegisterOffset, RegisterName] = {} - argument_registers = set() - argument_register_positions = {} - persistent_regs = [] - concretize_unique_registers = ( - set() - ) # this is a list of registers that should be concretized, if unique, at the end of each block - - lib_paths = [] - reloc_s_a = [] - reloc_b_a = [] - reloc_s = [] - reloc_copy = [] - reloc_tls_mod_id = [] - reloc_tls_doffset = [] - reloc_tls_offset = [] - dynamic_tag_translation = {} - symbol_type_translation = {} + argument_registers: Set[RegisterOffset] = set() + argument_register_positions: Dict[RegisterName, int] = {} + persistent_regs: List[RegisterName] = [] + # this is a set of registers that should be concretized, if unique, at the end of each block + concretize_unique_registers: Set[RegisterOffset] = set() + + lib_paths: List[str] = [] + # TODO: reloc_* are just guesses, couldn't find docs or usage + reloc_s_a: List[Any] = [] + reloc_b_a: List[Any] = [] + reloc_s: List[Any] = [] + reloc_copy: List[Any] = [] + reloc_tls_mod_id: List[Any] = [] + reloc_tls_doffset: List[Any] = [] + reloc_tls_offset: List[Any] = [] + dynamic_tag_translation: Dict[int, str] = {} + symbol_type_translation: Dict[Union[int, str], str] = {} got_section_name = "" vex_archinfo = None @@ -799,7 +788,7 @@ def _append_arch_unique(my_arch: Arch) -> bool: return True -def register_arch(regexes, bits, endness, my_arch): +def register_arch(regexes: List[str], bits: int, endness: Endness, my_arch: Type[Arch]): """ Register a new architecture. Architectures are loaded by their string name using ``arch_from_id()``, and @@ -825,15 +814,15 @@ def register_arch(regexes, bits, endness, my_arch): re.compile(rx) except re.error as e: raise ValueError("Invalid Regular Expression %s" % rx) from e - # if not isinstance(my_arch,Arch): - # raise TypeError("Arch must be a subclass of archinfo.Arch") + if not issubclass(my_arch, Arch): + raise TypeError("Arch must be a subclass of archinfo.Arch") if not isinstance(bits, int): raise TypeError("Bits must be an int") if endness is not None: - if endness not in (Endness.BE, Endness.LE, Endness.ME, "any"): + if endness not in (Endness.BE, Endness.LE, Endness.ME, Endness.ANY): raise TypeError("Endness must be Endness.BE, Endness.LE, or 'any'") arch_id_map.append((regexes, bits, endness, my_arch)) - if endness == "any": + if endness == Endness.ANY: _append_arch_unique(my_arch(Endness.BE)) _append_arch_unique(my_arch(Endness.LE)) else: @@ -844,7 +833,7 @@ class ArchNotFound(Exception): pass -def arch_from_id(ident, endness="any", bits="") -> Arch: +def arch_from_id(ident: str, endness=Endness.ANY, bits="") -> Arch: """ Take our best guess at the arch referred to by the given identifier, and return an instance of its class. @@ -873,14 +862,14 @@ def arch_from_id(ident, endness="any", bits="") -> Arch: elif "be" in endness: endness = Endness.BE elif "l" in endness: - endness = "unsure" + endness = Endness.UNSURE elif "b" in endness: - endness = "unsure" + endness = Endness.UNSURE else: - endness = "unsure" + endness = Endness.UNSURE ident = ident.lower() cls = None - aendness = "" + aendness = None for arxs, abits, aendness, acls in arch_id_map: found_it = False for rx in arxs: @@ -891,17 +880,17 @@ def arch_from_id(ident, endness="any", bits="") -> Arch: continue if bits and bits != abits: continue - if aendness == "any" or endness == aendness or endness == "unsure": + if aendness == Endness.ANY or endness == aendness or endness == Endness.UNSURE: cls = acls break if not cls: raise ArchNotFound( f"Can't find architecture info for architecture {ident} with {repr(bits)} bits and {endness} endness" ) - if endness == "unsure": - if aendness == "any": + if endness == Endness.UNSURE: + if aendness == Endness.ANY: # We really don't care, use default - return cls() + return cls(cls.default_endness) else: # We're expecting the ident to pick the endness. # ex. 'armeb' means obviously this is Iend_BE diff --git a/archinfo/arch_aarch64.py b/archinfo/arch_aarch64.py index 6b52d374..4e57b8df 100644 --- a/archinfo/arch_aarch64.py +++ b/archinfo/arch_aarch64.py @@ -1,4 +1,6 @@ -from .arch import Arch, register_arch, Endness, Register +from archinfo.types import RegisterOffset + +from .arch import Arch, Endness, Register, register_arch from .tls import TLSArchInfo try: @@ -33,7 +35,7 @@ def __init__(self, endness=Endness.LE): linux_name = "aarch64" triplet = "aarch64-linux-gnueabihf" max_inst_bytes = 4 - ret_offset = 16 + ret_offset = RegisterOffset(16) vex_conditional_helpers = True syscall_num_offset = 80 call_pushes_ret = False @@ -56,8 +58,6 @@ def __init__(self, endness=Endness.LE): ret_instruction = b"\xC0\x03\x5F\xD6" # ret nop_instruction = b"\x1F\x20\x03\xD5" # nop - function_prologs = set() - function_epilogs = set() instruction_alignment = 4 register_list = [ Register( @@ -527,4 +527,4 @@ def __init__(self, endness=Endness.LE): ] -register_arch([r".*arm64.*|.*aarch64*"], 64, "any", ArchAArch64) +register_arch([r".*arm64.*|.*aarch64*"], 64, Endness.ANY, ArchAArch64) diff --git a/archinfo/arch_amd64.py b/archinfo/arch_amd64.py index 338b20e8..cf49abfb 100644 --- a/archinfo/arch_amd64.py +++ b/archinfo/arch_amd64.py @@ -1,6 +1,8 @@ -from .arch import Arch, register_arch, Endness, Register -from .tls import TLSArchInfo +from archinfo.types import RegisterOffset + +from .arch import Arch, Endness, Register, register_arch from .archerror import ArchError +from .tls import TLSArchInfo try: import capstone as _capstone @@ -23,14 +25,14 @@ _pyvex = None -_NATIVE_FUNCTION_PROLOGS = [ +_NATIVE_FUNCTION_PROLOGS = { rb"\x55\x48\x89\xe5", # push rbp; mov rbp, rsp rb"\x48[\x83,\x81]\xec[\x00-\xff]", # sub rsp, xxx -] +} # every function prolog can potentially be prefixed with endbr64 _endbr64 = b"\xf3\x0f\x1e\xfa" -_prefixed = [(_endbr64 + prolog) for prolog in _NATIVE_FUNCTION_PROLOGS] -_FUNCTION_PROLOGS = _prefixed + _NATIVE_FUNCTION_PROLOGS +_prefixed = {(_endbr64 + prolog) for prolog in _NATIVE_FUNCTION_PROLOGS} +_FUNCTION_PROLOGS = _prefixed | _NATIVE_FUNCTION_PROLOGS class ArchAMD64(Arch): @@ -148,7 +150,7 @@ def _configure_keystone(self): linux_name = "x86_64" triplet = "x86_64-linux-gnu" max_inst_bytes = 15 - ret_offset = 16 + ret_offset = RegisterOffset(16) vex_conditional_helpers = True syscall_num_offset = 16 call_pushes_ret = True diff --git a/archinfo/arch_arm.py b/archinfo/arch_arm.py index d251270d..2962682b 100644 --- a/archinfo/arch_arm.py +++ b/archinfo/arch_arm.py @@ -1,6 +1,8 @@ import logging -from .arch import Arch, register_arch, Endness, Register +from archinfo.types import RegisterOffset + +from .arch import Arch, Endness, Register, register_arch from .tls import TLSArchInfo log = logging.getLogger("archinfo.arch_arm") @@ -178,8 +180,8 @@ def is_thumb(self, addr): # pylint:disable=unused-argument linux_name = "arm" triplet = "arm-linux-gnueabihf" max_inst_bytes = 4 - ret_offset = 8 - fp_ret_offset = 8 + ret_offset = RegisterOffset(8) + fp_ret_offset = RegisterOffset(8) vex_conditional_helpers = True syscall_num_offset = 36 call_pushes_ret = False @@ -341,7 +343,7 @@ class ArchARMHF(ArchARM): name = "ARMHF" triplet = "arm-linux-gnueabihf" ld_linux_name = "ld-linux-armhf.so.3" - fp_ret_offset = 128 # s0 + fp_ret_offset = RegisterOffset(128) # s0 class ArchARMEL(ArchARM): @@ -388,7 +390,7 @@ class ArchARMCortexM(ArchARMEL): # These are the standard THUMB prologs. We leave these off for other ARMs due to their length # For CM, we assume the FPs are OK, as they are almost guaranteed to appear all over the place - function_prologs = {} + function_prologs = set() thumb_prologs = {rb"[\x00-\xff]\xb5", rb"\x2d\xe9[\x00-\xff][\x00-\xff]"} # push {xxx,lr} # push.w {xxx, lr} function_epilogs = { @@ -549,8 +551,8 @@ def __init__(self, *args, **kwargs): # TODO: Add.... the NVIC? to SimOS -register_arch([r".*cortexm|.*cortex\-m.*|.*v7\-m.*"], 32, "any", ArchARMCortexM) -register_arch([r".*armhf.*"], 32, "any", ArchARMHF) +register_arch([r".*cortexm|.*cortex\-m.*|.*v7\-m.*"], 32, Endness.ANY, ArchARMCortexM) +register_arch([r".*armhf.*"], 32, Endness.ANY, ArchARMHF) register_arch([r".*armeb|.*armbe"], 32, Endness.BE, ArchARM) register_arch([r".*armel|arm.*"], 32, Endness.LE, ArchARMEL) -register_arch([r".*arm.*|.*thumb.*"], 32, "any", ArchARM) +register_arch([r".*arm.*|.*thumb.*"], 32, Endness.ANY, ArchARM) diff --git a/archinfo/arch_avr.py b/archinfo/arch_avr.py index afd8a1b2..fa421b0e 100644 --- a/archinfo/arch_avr.py +++ b/archinfo/arch_avr.py @@ -1,4 +1,4 @@ -from .arch import Arch, register_arch, Endness +from .arch import Arch, Endness, register_arch from .archerror import ArchError from .tls import TLSArchInfo diff --git a/archinfo/arch_mips32.py b/archinfo/arch_mips32.py index 9f4a0df7..5bfdb917 100644 --- a/archinfo/arch_mips32.py +++ b/archinfo/arch_mips32.py @@ -1,4 +1,4 @@ -from .arch import Arch, register_arch, Endness, Register +from .arch import Arch, Endness, Register, register_arch from .tls import TLSArchInfo try: @@ -38,6 +38,7 @@ def __init__(self, endness=Endness.BE): bits = 32 vex_arch = "VexArchMIPS32" name = "MIPS32" + default_endness = Endness.BE ida_processor = "mipsb" qemu_name = "mipsel" linux_name = "mipsel" # ??? diff --git a/archinfo/arch_mips64.py b/archinfo/arch_mips64.py index a7d3e914..4c777082 100644 --- a/archinfo/arch_mips64.py +++ b/archinfo/arch_mips64.py @@ -1,4 +1,6 @@ -from .arch import Arch, register_arch, Endness, Register +from archinfo.types import RegisterOffset + +from .arch import Arch, Endness, Register, register_arch from .tls import TLSArchInfo try: @@ -30,12 +32,13 @@ def __init__(self, endness=Endness.BE): bits = 64 vex_arch = "VexArchMIPS64" name = "MIPS64" + default_endness = Endness.BE qemu_name = "mips64el" ida_processor = "mips64" linux_name = "mips64el" # ??? triplet = "mips64el-linux-gnu" max_inst_bytes = 4 - ret_offset = 32 + ret_offset = RegisterOffset(32) syscall_register_offset = 16 call_pushes_ret = False stack_change = -8 @@ -191,4 +194,4 @@ def __init__(self, endness=Endness.BE): register_arch([r".*mipsel.*|.*mips64el|.*mipsel64"], 64, Endness.LE, ArchMIPS64) -register_arch([r".*mips64.*|.*mips.*"], 64, "any", ArchMIPS64) +register_arch([r".*mips64.*|.*mips.*"], 64, Endness.ANY, ArchMIPS64) diff --git a/archinfo/arch_pcode.py b/archinfo/arch_pcode.py index 47c4ed23..453f3f75 100644 --- a/archinfo/arch_pcode.py +++ b/archinfo/arch_pcode.py @@ -1,17 +1,20 @@ import logging from typing import Union +from .arch import Arch, Endness, Register +from .archerror import ArchError +from .tls import TLSArchInfo +from .types import RegisterOffset + try: import pypcode -except ImportError: - pypcode = None -from .arch import Arch, Endness, Register -from .tls import TLSArchInfo -from .archerror import ArchError + _has_pypcode = True +except ImportError: + _has_pypcode = False -log = logging.getLogger("__name__") +log = logging.getLogger(__name__) class ArchPcode(Arch): @@ -21,12 +24,14 @@ class ArchPcode(Arch): """ def __init__(self, language: Union["pypcode.ArchLanguage", str]): - if pypcode is None: + if not _has_pypcode: raise ArchError("pypcode not installed") if isinstance(language, str): language = self._get_language_by_id(language) + assert isinstance(language, pypcode.ArchLanguage) + self.name = language.id self.pcode_arch = language.id self.description = language.description @@ -42,7 +47,7 @@ def __init__(self, language: Union["pypcode.ArchLanguage", str]): # Get program counter register pc_offset = None - pc_tag = language.pspec.find("programcounter") + pc_tag = language.pspec.find("programcounter") if language.pspec is not None else None if pc_tag is not None: pc_reg = pc_tag.attrib.get("register", None) if pc_reg is not None: @@ -59,7 +64,7 @@ def __init__(self, language: Union["pypcode.ArchLanguage", str]): pc_offset = 0x80000000 sp_offset = None - ret_offset = None + ret_offset = RegisterOffset(0) if len(language.cspecs): def find_matching_cid(language, desired): @@ -96,17 +101,17 @@ def find_matching_cid(language, desired): output_register_tag = output_tag.find("register") if output_register_tag is not None: output_reg = output_register_tag.attrib["name"] - ret_offset = ctx.registers[output_reg].offset + ret_offset = RegisterOffset(ctx.registers[output_reg].offset) if sp_offset is None: log.warning("Unknown stack pointer register offset?") sp_offset = 0x80000008 self.instruction_alignment = 1 - self.ip_offset = pc_offset - self.sp_offset = sp_offset - self.bp_offset = sp_offset - self.ret_offset = ret_offset + self.ip_offset = RegisterOffset(pc_offset) + self.sp_offset = RegisterOffset(sp_offset) + self.bp_offset = RegisterOffset(sp_offset) + self.ret_offset = RegisterOffset(ret_offset) self.register_list = list(archinfo_regs.values()) self.initial_sp = (0x8000 << (self.bits - 16)) - 1 self.linux_name = "" # FIXME @@ -114,16 +119,18 @@ def find_matching_cid(language, desired): # TODO: Replace the following hardcoded function prologues by data sourced from patterns.xml if "PowerPC:BE" in self.name: - self.function_prologs = [ + self.function_prologs = { # stwu r1, xx(r1); mfspr rx, lr b"\x94\x21[\xc0-\xff][\x00\x10\x20\x30\x40\x50\x60\x70\x80\x90\xa0\xb0\xc0\xd0\xe0\xf0]" b"[\x7c-\x7f][\x08\x28\x48\x68\x88\xa8\xc8\xe8]\x02\xa6", - ] + } super().__init__(endness=self.endness, instruction_endness=self.instruction_endness) @staticmethod def _get_language_by_id(lang_id) -> "pypcode.ArchLanguage": + if not _has_pypcode: + raise ArchError("pypcode not installed") for arch in pypcode.Arch.enumerate(): for lang in arch.languages: if lang.id == lang_id: diff --git a/archinfo/arch_ppc32.py b/archinfo/arch_ppc32.py index 3a4cdf1f..8d8228cd 100644 --- a/archinfo/arch_ppc32.py +++ b/archinfo/arch_ppc32.py @@ -1,4 +1,6 @@ -from .arch import Arch, register_arch, Endness, Register +from archinfo.types import RegisterOffset + +from .arch import Arch, Endness, Register, register_arch from .tls import TLSArchInfo try: @@ -59,7 +61,7 @@ def __init__(self, endness=Endness.LE): max_inst_bytes = 4 # https://www.ibm.com/developerworks/community/forums/html/topic?id=77777777-0000-0000-0000-000013836863 # claims that r15 is the base pointer but that is NOT what I see in practice - ret_offset = 28 + ret_offset = RegisterOffset(28) syscall_num_offset = 16 call_pushes_ret = False stack_change = -4 @@ -318,5 +320,5 @@ def __init__(self, endness=Endness.LE): ] -register_arch([r".*p\w*pc.*be"], 32, "Iend_BE", ArchPPC32) -register_arch([r".*p\w*pc.*"], 32, "any", ArchPPC32) +register_arch([r".*p\w*pc.*be"], 32, Endness.BE, ArchPPC32) +register_arch([r".*p\w*pc.*"], 32, Endness.ANY, ArchPPC32) diff --git a/archinfo/arch_ppc64.py b/archinfo/arch_ppc64.py index 4ec927a0..e567aa0e 100644 --- a/archinfo/arch_ppc64.py +++ b/archinfo/arch_ppc64.py @@ -1,4 +1,6 @@ -from .arch import Arch, register_arch, Endness, Register +from archinfo.types import RegisterOffset + +from .arch import Arch, Endness, Register, register_arch from .tls import TLSArchInfo try: @@ -83,7 +85,7 @@ def __init__(self, endness=Endness.LE): triplet = "powerpc64le-linux-gnu" linux_name = "ppc750" max_inst_bytes = 4 - ret_offset = 40 + ret_offset = RegisterOffset(40) syscall_num_offset = 16 call_pushes_ret = False stack_change = -8 @@ -394,5 +396,5 @@ def __init__(self, endness=Endness.LE): ] -register_arch([r".*p\w*pc.*be"], 64, "Iend_BE", ArchPPC64) -register_arch([r".*p\w*pc.*"], 64, "any", ArchPPC64) +register_arch([r".*p\w*pc.*be"], 64, Endness.BE, ArchPPC64) +register_arch([r".*p\w*pc.*"], 64, Endness.ANY, ArchPPC64) diff --git a/archinfo/arch_riscv64.py b/archinfo/arch_riscv64.py index 9e6e2312..e5b65f19 100644 --- a/archinfo/arch_riscv64.py +++ b/archinfo/arch_riscv64.py @@ -1,4 +1,6 @@ -from .arch import Arch, register_arch, Endness, Register +from archinfo.types import RegisterOffset + +from .arch import Arch, Endness, Register, register_arch from .tls import TLSArchInfo try: @@ -36,7 +38,7 @@ def __init__(self, endness=Endness.LE): linux_name = "riscv64" triplet = "riscv64-linux-gnu" max_inst_bytes = 4 - ret_offset = 4 + ret_offset = RegisterOffset(4) vex_conditional_helpers = True syscall_num_offset = 132 call_pushes_ret = False @@ -147,4 +149,4 @@ def __init__(self, endness=Endness.LE): elf_tls = TLSArchInfo(1, 32, [], [0], [], 0, 0) -register_arch([r".*riscv.*"], 64, "any", ArchRISCV64) +register_arch([r".*riscv.*"], 64, Endness.ANY, ArchRISCV64) diff --git a/archinfo/arch_s390x.py b/archinfo/arch_s390x.py index 88659618..8cc7abc3 100644 --- a/archinfo/arch_s390x.py +++ b/archinfo/arch_s390x.py @@ -13,7 +13,9 @@ except ImportError: _pyvex = None -from .arch import Arch, register_arch, Endness, Register +from archinfo.types import RegisterOffset + +from .arch import Arch, Endness, Register, register_arch from .archerror import ArchError from .tls import TLSArchInfo @@ -43,11 +45,12 @@ def __init__(self, endness=Endness.BE): bits = 64 vex_arch = "VexArchS390X" # enum VexArch name = "S390X" + default_endness = Endness.BE qemu_name = "s390x" # target/s390x triplet = "s390x-linux-gnu" linux_name = "s390" # arch/s390 max_inst_bytes = 6 - ret_offset = 584 # offsetof(VexGuestS390XState, guest_r2) + ret_offset = RegisterOffset(584) # offsetof(VexGuestS390XState, guest_r2) syscall_num_offset = 576 # offsetof(VexGuestS390XState, guest_r1) call_pushes_ret = False stack_change = -8 diff --git a/archinfo/arch_x86.py b/archinfo/arch_x86.py index 6b9fae4a..c7447738 100644 --- a/archinfo/arch_x86.py +++ b/archinfo/arch_x86.py @@ -1,6 +1,8 @@ -from .arch import Arch, register_arch, Endness, Register -from .tls import TLSArchInfo +from archinfo.types import RegisterOffset + +from .arch import Arch, Endness, Register, register_arch from .archerror import ArchError +from .tls import TLSArchInfo try: import capstone as _capstone @@ -23,7 +25,7 @@ _pyvex = None -_NATIVE_FUNCTION_PROLOGS = [ +_NATIVE_FUNCTION_PROLOGS = { rb"\x8b\xff\x55\x8b\xec", # mov edi, edi; push ebp; mov ebp, esp rb"\x55\x8b\xec", # push ebp; mov ebp, esp rb"\x55\x89\xe5", # push ebp; mov ebp, esp @@ -35,11 +37,11 @@ # (push ebp; push eax; push edi; push ebx; push esi; push edx; push ecx) mov xxx, xxx rb"[\x50\x51\x52\x53\x55\x56\x57]{1,7}\x8b[\x00-\xff]{2}", rb"(\x81|\x83)\xec", # sub xxx %esp -] +} # every function prolog can potentially be prefixed with endbr32 _endbr32 = b"\xf3\x0f\x1e\xfb" -_prefixed = [(_endbr32 + prolog) for prolog in _NATIVE_FUNCTION_PROLOGS] -_FUNCTION_PROLOGS = _prefixed + _NATIVE_FUNCTION_PROLOGS +_prefixed = {(_endbr32 + prolog) for prolog in _NATIVE_FUNCTION_PROLOGS} +_FUNCTION_PROLOGS = _prefixed | _NATIVE_FUNCTION_PROLOGS class ArchX86(Arch): @@ -152,7 +154,7 @@ def _configure_keystone(self): triplet = "i386-linux-gnu" max_inst_bytes = 15 call_sp_fix = -4 - ret_offset = 8 + ret_offset = RegisterOffset(8) vex_conditional_helpers = True syscall_num_offset = 8 call_pushes_ret = True diff --git a/archinfo/types.py b/archinfo/types.py index c73c6fca..204dcaf1 100644 --- a/archinfo/types.py +++ b/archinfo/types.py @@ -1,9 +1,48 @@ +import sys from typing import NewType -RegisterOffset = NewType("RegisterOffset", int) +if sys.version_info < (3, 11): + from backports.strenum import StrEnum +else: + from enum import StrEnum + + +class RegisterOffset(int): + """A register offset is an integer that represents the offset of a register + in VEX's register memory space. + """ + + def __add__(self, other): + if isinstance(other, int): + return RegisterOffset(int(self) + other) + return NotImplemented + + TmpVar = NewType("TmpVar", int) # This causes too much issues as a NewType, sot is a simple alias instead # This means that is still legal to pass any str where a RegisterName is expected. # The downside is that PyCharm will show the type as `str` when displaying the signature RegisterName = str + + +class Endness(StrEnum): + """Endness specifies the byte order for integer values + + :cvar LE: little endian, least significant byte is stored at lowest address + :cvar BE: big endian, most significant byte is stored at lowest address + :cvar ME: Middle-endian. Yep. + """ + + LE = "Iend_LE" + BE = "Iend_BE" + ME = "Iend_ME" + ANY = "any" + UNSURE = "unsure" + + @staticmethod + def from_str(s: str) -> "Endness": + for e in Endness: + if e.value == s: + return e + raise ValueError("Unknown endness: %s" % s) diff --git a/pyproject.toml b/pyproject.toml index 780203a4..e1cebbfe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,3 +8,8 @@ target-version = ['py38'] [tool.ruff] line-length = 120 +select = [ + "E", + "F", + "I", +] diff --git a/setup.cfg b/setup.cfg index 45c76346..5ab3f9c5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,6 +18,8 @@ classifiers = [options] packages = find: +install_requires = + backports.strenum>=1.2.8;python_version<'3.11' python_requires = >=3.8 [options.extras_require] diff --git a/tests/test_amd64.py b/tests/test_amd64.py index 74aff7fd..aab5d4f8 100644 --- a/tests/test_amd64.py +++ b/tests/test_amd64.py @@ -1,7 +1,7 @@ import unittest -from archinfo.arch import Endness from archinfo import ArchAMD64, ArchError +from archinfo.arch import Endness class TestAmd64(unittest.TestCase): diff --git a/tests/test_pcode.py b/tests/test_pcode.py index 6d587093..15666b12 100644 --- a/tests/test_pcode.py +++ b/tests/test_pcode.py @@ -1,14 +1,14 @@ # pylint:disable=missing-class-docstring,no-self-use -import unittest import pickle +import unittest try: import pypcode except ImportError: pypcode = None +from archinfo import ArchError, ArchPcode from archinfo.arch import Endness -from archinfo import ArchPcode, ArchError @unittest.skipUnless(pypcode is not None, "pypcode not installed")