diff --git a/bindings/python/quokka/block.py b/bindings/python/quokka/block.py index d18e1ec..664fb63 100644 --- a/bindings/python/quokka/block.py +++ b/bindings/python/quokka/block.py @@ -235,7 +235,7 @@ def bytes(self) -> bytes: return b"" # Read all block at once - block_bytes = self.program.executable.read_byte( + block_bytes = self.program.executable.read_bytes( offset=file_offset, size=self.size, ) diff --git a/bindings/python/quokka/executable.py b/bindings/python/quokka/executable.py index c76eaa6..02bd359 100644 --- a/bindings/python/quokka/executable.py +++ b/bindings/python/quokka/executable.py @@ -58,7 +58,7 @@ def read(self, offset: int, size: int) -> bytes: """Read `size` at `offset` in the file. This method should not be used directly and considered as part of a private API. - The preferred method are read_byte / read_string . + The preferred method are read_bytes / read_string . Arguments: offset: File offset @@ -77,14 +77,14 @@ def read(self, offset: int, size: int) -> bytes: def read_string(self, offset: int, size: Optional[int] = None) -> str: """Read a string in the file. - + If the size is not given, Quokka will try to read the string until the first null byte. That works only for null-terminated strings. If the string is null terminated, remove the trailing 0. Arguments: - offset: String file offset + offset: String file offset size: String size if known. Returns: @@ -99,14 +99,16 @@ def read_string(self, offset: int, size: Optional[int] = None) -> str: string = self.read(offset, size).decode("utf-8") except UnicodeDecodeError as exc: raise ValueError("Unable to read or decode the string.") from exc - + else: try: null_byte = self.content.index(b"\x00", offset) except ValueError as exc: - raise ValueError("String is not null-terminated and size was not given") from exc + raise ValueError( + "String is not null-terminated and size was not given" + ) from exc - string = self.content[offset: null_byte].decode("utf-8") + string = self.content[offset:null_byte].decode("utf-8") # FIX: When returning a single character string, it does not end with a '\0' if len(string) > 1 and string.endswith("\x00"): @@ -129,10 +131,11 @@ def read_data( Returns: The data value """ + # Read an int of size `read_size` def read_int(read_size: int) -> int: """Read an integer from the binary""" - return int.from_bytes(self.read_byte(offset, read_size), endianness) + return int.from_bytes(self.read_bytes(offset, read_size), endianness) endianness: Literal["big", "little"] if self.endianness == Endianness.BIG_ENDIAN: @@ -158,16 +161,16 @@ def read_int(read_size: int) -> int: return read_int(16 if size is None else size) elif data_type == DataType.FLOAT: s = 4 if size is None else size - return struct.unpack(f"{endianness_sign}f", self.read_byte(offset, s)) + return struct.unpack(f"{endianness_sign}f", self.read_bytes(offset, s)) elif data_type == DataType.DOUBLE: s = 8 if size is None else size - return struct.unpack(f"{endianness_sign}d", self.read_byte(offset, s)) + return struct.unpack(f"{endianness_sign}d", self.read_bytes(offset, s)) else: raise NotImplementedError( f"Cannot read {data_type}. DataType not implemented." ) - def read_byte(self, offset: int, size: int) -> bytes: + def read_bytes(self, offset: int, size: int) -> bytes: """Read one (or more) byte(s) in the file at `offset`. This is mostly used to read instructions. diff --git a/bindings/python/quokka/function.py b/bindings/python/quokka/function.py index 3eaa64c..23a8baa 100644 --- a/bindings/python/quokka/function.py +++ b/bindings/python/quokka/function.py @@ -708,6 +708,14 @@ def in_degree(self) -> int: """Function in degree""" return self[self.start].in_degree + @property + def blocks(self) -> dict[AddressT, quokka.Block]: + """Returns a dictionary which is used to reference all basic blocks + by their address. + Calling this function will also load the CFG. + """ + return {addr: self.get_block(addr) for addr in self.graph.nodes} + def __hash__(self) -> int: # type: ignore """Hash value""" return self.start diff --git a/bindings/python/quokka/instruction.py b/bindings/python/quokka/instruction.py index 801c0b7..4c53d75 100644 --- a/bindings/python/quokka/instruction.py +++ b/bindings/python/quokka/instruction.py @@ -359,7 +359,7 @@ def bytes(self) -> bytes: except quokka.exc.NotInFileError: return b"" - return self.program.executable.read_byte( + return self.program.executable.read_bytes( offset=file_offset, size=self.size, ) diff --git a/bindings/python/quokka/program.py b/bindings/python/quokka/program.py index de7214f..89f0415 100644 --- a/bindings/python/quokka/program.py +++ b/bindings/python/quokka/program.py @@ -440,6 +440,21 @@ def iter_chunk( if chunk.chunk_type in chunk_types: yield chunk + def read_bytes(self, v_addr: AddressT, size: int) -> bytes: + """Read raw bytes from a virtual address + + Arguments: + v_addr: Virtual address of the data to read + size: Size of the data to read + + Returns: + The raw data at the specified address + """ + + if (offset := v_addr - self.base_address) < 0: + raise ValueError("Address outside virtual address space.") + return self.executable.read_bytes(offset, size) + def get_data(self, address: AddressT) -> quokka.Data: """Get data by address diff --git a/bindings/python/quokka/types.py b/bindings/python/quokka/types.py index b997b81..7571b02 100644 --- a/bindings/python/quokka/types.py +++ b/bindings/python/quokka/types.py @@ -48,6 +48,13 @@ RegType = enum.IntEnum +class RegAccessMode(enum.Enum): + """Register access mode""" + + READ = enum.auto() + WRITE = enum.auto() + ANY = enum.auto() + ReferenceTarget = Union[ "quokka.structure.Structure", "quokka.structure.StructureMember", diff --git a/bindings/python/quokka/utils.py b/bindings/python/quokka/utils.py index 588bf42..b1ff201 100644 --- a/bindings/python/quokka/utils.py +++ b/bindings/python/quokka/utils.py @@ -14,10 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import functools import hashlib import pathlib import logging +from collections.abc import Iterable +from typing import TYPE_CHECKING import quokka from quokka.analysis import ( @@ -34,7 +37,10 @@ ArchPPC64, ) -from quokka.types import Type +from quokka.types import Type, RegAccessMode + +if TYPE_CHECKING: + from quokka.instruction import Instruction logger = logging.getLogger() @@ -177,3 +183,44 @@ def parse_version(version: str) -> tuple[int, int, int]: ) return parsed + + +def find_register_access( + register: int | str, access_mode: RegAccessMode, instructions: Iterable[Instruction] +) -> Instruction | None: + """Traverse the list of instructions searching for the first one that access + the specified register with the required access mode. + + Arguments: + reg: The identifier of the register we are targeting, that can either be + the capstone register ID (ex: capstone.x86_const.X86_REG_EAX) or the + register name (ex: "eax") + access_mode: The access mode to the register (read or write) + instructions: An iterable of instructions to analyze + + Returns: + The first instruction that access the register in the specified mode. + Return None if no such instruction is found. + """ + + for instr in instructions: + # Retrieve the list of all registers read or modified by the instruction using capstone + regs_read, regs_write = instr.cs_inst.regs_access() + + # Remap registers to the correct type + if isinstance(register, str): + register = register.lower() + regs_read = [instr.cs_inst.reg_name(r) for r in regs_read] + regs_write = [instr.cs_inst.reg_name(r) for r in regs_write] + + # Check if it is accessing the target register in the correct mode + if ( + register in regs_write + and (access_mode == RegAccessMode.WRITE or access_mode == RegAccessMode.ANY) + ) or ( + register in regs_read + and (access_mode == RegAccessMode.READ or access_mode == RegAccessMode.ANY) + ): + return instr + + return None diff --git a/setup.py b/setup.py index f7ed749..b6d5b51 100644 --- a/setup.py +++ b/setup.py @@ -39,6 +39,7 @@ description="Quokka : A Fast and Accurate Binary Exporter", long_description=readme, long_description_content_type="text/markdown", + python_requires=">=3.8", packages=["quokka", "quokka.analysis", "quokka.backends"], package_dir={"": "bindings/python/"}, package_data={"quokka": ["*.pyi", "*.typed"]},