From 5d6c5505e5f0ef22673c012023e58300f58a9fda Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Sun, 20 Aug 2023 14:38:48 +0200 Subject: [PATCH 1/9] [API] Add read_byte from virtual address --- bindings/python/quokka/program.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/bindings/python/quokka/program.py b/bindings/python/quokka/program.py index de7214f..b14b37d 100644 --- a/bindings/python/quokka/program.py +++ b/bindings/python/quokka/program.py @@ -440,6 +440,21 @@ def iter_chunk( if chunk.chunk_type in chunk_types: yield chunk + def read_byte(self, v_addr: AddressT, size: int) -> bytes: + """Read raw bytes from a virtual address + + Arguments: + v_addr: Virtual address of the data to read + size: Size of the data to read + + Returns: + The raw data at the specified address + """ + + if (offset := v_addr - self.base_address) < 0: + raise ValueError("Address outside virtual address space.") + return self.executable.read_byte(offset, size) + def get_data(self, address: AddressT) -> quokka.Data: """Get data by address From 3b8b66ca26c949055365b1e4e485c201ceb04842 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Mon, 21 Aug 2023 16:08:08 +0200 Subject: [PATCH 2/9] Add utils.fing_reg_access API --- bindings/python/quokka/types.py | 7 ++++++ bindings/python/quokka/utils.py | 41 ++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/bindings/python/quokka/types.py b/bindings/python/quokka/types.py index b997b81..7571b02 100644 --- a/bindings/python/quokka/types.py +++ b/bindings/python/quokka/types.py @@ -48,6 +48,13 @@ RegType = enum.IntEnum +class RegAccessMode(enum.Enum): + """Register access mode""" + + READ = enum.auto() + WRITE = enum.auto() + ANY = enum.auto() + ReferenceTarget = Union[ "quokka.structure.Structure", "quokka.structure.StructureMember", diff --git a/bindings/python/quokka/utils.py b/bindings/python/quokka/utils.py index 588bf42..feb8604 100644 --- a/bindings/python/quokka/utils.py +++ b/bindings/python/quokka/utils.py @@ -14,10 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import functools import hashlib import pathlib import logging +from collections.abc import Iterable +from typing import TYPE_CHECKING import quokka from quokka.analysis import ( @@ -34,7 +37,10 @@ ArchPPC64, ) -from quokka.types import Type +from quokka.types import Type, RegAccessMode + +if TYPE_CHECKING: + from quokka.instruction import Instruction logger = logging.getLogger() @@ -177,3 +183,36 @@ def parse_version(version: str) -> tuple[int, int, int]: ) return parsed + + +def find_reg_access( + reg_id: int, access_mode: RegAccessMode, instructions: Iterable[Instruction] +) -> Instruction | None: + """Traverse the list of instructions searching for the first one that access + the specified register with the required access mode. + + Arguments: + reg: The capstone register ID that we are targeting (ex: capstone.x86_const.X86_REG_EAX) + access_mode: The access mode to the register (read or write) + instructions: An iterable of instructions to analyze + + Returns: + The first instruction that access the register in the specified mode. + Return None if no such instruction is found. + """ + + for instr in instructions: + # Retrieve the list of all registers read or modified by the instruction using capstone + regs_read, regs_write = instr.cs_inst.regs_access() + + # Check if it is accessing the target register in the correct mode + if ( + reg_id in regs_write + and (access_mode == RegAccessMode.WRITE or access_mode == RegAccessMode.ANY) + ) or ( + reg_id in regs_read + and (access_mode == RegAccessMode.READ or access_mode == RegAccessMode.ANY) + ): + return instr + + return None From cc1b9903704823954007c91a6dcd6618de25a8f6 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Tue, 22 Aug 2023 09:36:12 +0200 Subject: [PATCH 3/9] Add minimum python version 3.8 --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f7ed749..b6d5b51 100644 --- a/setup.py +++ b/setup.py @@ -39,6 +39,7 @@ description="Quokka : A Fast and Accurate Binary Exporter", long_description=readme, long_description_content_type="text/markdown", + python_requires=">=3.8", packages=["quokka", "quokka.analysis", "quokka.backends"], package_dir={"": "bindings/python/"}, package_data={"quokka": ["*.pyi", "*.typed"]}, From 063db2e12851203723b5166b73c50f65e961f243 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Tue, 22 Aug 2023 09:40:20 +0200 Subject: [PATCH 4/9] Rename function to find_register_access. NFC. --- bindings/python/quokka/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/quokka/utils.py b/bindings/python/quokka/utils.py index feb8604..6bca8bc 100644 --- a/bindings/python/quokka/utils.py +++ b/bindings/python/quokka/utils.py @@ -185,7 +185,7 @@ def parse_version(version: str) -> tuple[int, int, int]: return parsed -def find_reg_access( +def find_register_access( reg_id: int, access_mode: RegAccessMode, instructions: Iterable[Instruction] ) -> Instruction | None: """Traverse the list of instructions searching for the first one that access From 48e505cdea7720611d93f426b433c27193b6bd10 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Tue, 22 Aug 2023 10:46:32 +0200 Subject: [PATCH 5/9] find_register_access accepts either a register name or id --- bindings/python/quokka/utils.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/bindings/python/quokka/utils.py b/bindings/python/quokka/utils.py index 6bca8bc..b1ff201 100644 --- a/bindings/python/quokka/utils.py +++ b/bindings/python/quokka/utils.py @@ -186,13 +186,15 @@ def parse_version(version: str) -> tuple[int, int, int]: def find_register_access( - reg_id: int, access_mode: RegAccessMode, instructions: Iterable[Instruction] + register: int | str, access_mode: RegAccessMode, instructions: Iterable[Instruction] ) -> Instruction | None: """Traverse the list of instructions searching for the first one that access the specified register with the required access mode. Arguments: - reg: The capstone register ID that we are targeting (ex: capstone.x86_const.X86_REG_EAX) + reg: The identifier of the register we are targeting, that can either be + the capstone register ID (ex: capstone.x86_const.X86_REG_EAX) or the + register name (ex: "eax") access_mode: The access mode to the register (read or write) instructions: An iterable of instructions to analyze @@ -205,12 +207,18 @@ def find_register_access( # Retrieve the list of all registers read or modified by the instruction using capstone regs_read, regs_write = instr.cs_inst.regs_access() + # Remap registers to the correct type + if isinstance(register, str): + register = register.lower() + regs_read = [instr.cs_inst.reg_name(r) for r in regs_read] + regs_write = [instr.cs_inst.reg_name(r) for r in regs_write] + # Check if it is accessing the target register in the correct mode if ( - reg_id in regs_write + register in regs_write and (access_mode == RegAccessMode.WRITE or access_mode == RegAccessMode.ANY) ) or ( - reg_id in regs_read + register in regs_read and (access_mode == RegAccessMode.READ or access_mode == RegAccessMode.ANY) ): return instr From a882f0afebe4ac4feb6c5bcb5d75bf761e9b7c5a Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Tue, 22 Aug 2023 10:52:04 +0200 Subject: [PATCH 6/9] Deprecate read_byte and use read_bytes --- bindings/python/quokka/block.py | 2 +- bindings/python/quokka/executable.py | 28 +++++++++++++++++---------- bindings/python/quokka/instruction.py | 2 +- bindings/python/quokka/program.py | 4 ++-- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/bindings/python/quokka/block.py b/bindings/python/quokka/block.py index d18e1ec..664fb63 100644 --- a/bindings/python/quokka/block.py +++ b/bindings/python/quokka/block.py @@ -235,7 +235,7 @@ def bytes(self) -> bytes: return b"" # Read all block at once - block_bytes = self.program.executable.read_byte( + block_bytes = self.program.executable.read_bytes( offset=file_offset, size=self.size, ) diff --git a/bindings/python/quokka/executable.py b/bindings/python/quokka/executable.py index c76eaa6..662ef77 100644 --- a/bindings/python/quokka/executable.py +++ b/bindings/python/quokka/executable.py @@ -58,7 +58,7 @@ def read(self, offset: int, size: int) -> bytes: """Read `size` at `offset` in the file. This method should not be used directly and considered as part of a private API. - The preferred method are read_byte / read_string . + The preferred method are read_bytes / read_string . Arguments: offset: File offset @@ -77,14 +77,14 @@ def read(self, offset: int, size: int) -> bytes: def read_string(self, offset: int, size: Optional[int] = None) -> str: """Read a string in the file. - + If the size is not given, Quokka will try to read the string until the first null byte. That works only for null-terminated strings. If the string is null terminated, remove the trailing 0. Arguments: - offset: String file offset + offset: String file offset size: String size if known. Returns: @@ -99,14 +99,16 @@ def read_string(self, offset: int, size: Optional[int] = None) -> str: string = self.read(offset, size).decode("utf-8") except UnicodeDecodeError as exc: raise ValueError("Unable to read or decode the string.") from exc - + else: try: null_byte = self.content.index(b"\x00", offset) except ValueError as exc: - raise ValueError("String is not null-terminated and size was not given") from exc + raise ValueError( + "String is not null-terminated and size was not given" + ) from exc - string = self.content[offset: null_byte].decode("utf-8") + string = self.content[offset:null_byte].decode("utf-8") # FIX: When returning a single character string, it does not end with a '\0' if len(string) > 1 and string.endswith("\x00"): @@ -129,10 +131,11 @@ def read_data( Returns: The data value """ + # Read an int of size `read_size` def read_int(read_size: int) -> int: """Read an integer from the binary""" - return int.from_bytes(self.read_byte(offset, read_size), endianness) + return int.from_bytes(self.read_bytes(offset, read_size), endianness) endianness: Literal["big", "little"] if self.endianness == Endianness.BIG_ENDIAN: @@ -158,16 +161,16 @@ def read_int(read_size: int) -> int: return read_int(16 if size is None else size) elif data_type == DataType.FLOAT: s = 4 if size is None else size - return struct.unpack(f"{endianness_sign}f", self.read_byte(offset, s)) + return struct.unpack(f"{endianness_sign}f", self.read_bytes(offset, s)) elif data_type == DataType.DOUBLE: s = 8 if size is None else size - return struct.unpack(f"{endianness_sign}d", self.read_byte(offset, s)) + return struct.unpack(f"{endianness_sign}d", self.read_bytes(offset, s)) else: raise NotImplementedError( f"Cannot read {data_type}. DataType not implemented." ) - def read_byte(self, offset: int, size: int) -> bytes: + def read_bytes(self, offset: int, size: int) -> bytes: """Read one (or more) byte(s) in the file at `offset`. This is mostly used to read instructions. @@ -180,3 +183,8 @@ def read_byte(self, offset: int, size: int) -> bytes: The bytes values """ return self.read(offset, size) + + def read_byte(self, offset: int, size: int) -> bytes: + """Deprecated""" + DeprecationWarning("read_byte has been deprecated. Please use read_bytes") + return self.read_bytes(offset, size) diff --git a/bindings/python/quokka/instruction.py b/bindings/python/quokka/instruction.py index 801c0b7..4c53d75 100644 --- a/bindings/python/quokka/instruction.py +++ b/bindings/python/quokka/instruction.py @@ -359,7 +359,7 @@ def bytes(self) -> bytes: except quokka.exc.NotInFileError: return b"" - return self.program.executable.read_byte( + return self.program.executable.read_bytes( offset=file_offset, size=self.size, ) diff --git a/bindings/python/quokka/program.py b/bindings/python/quokka/program.py index b14b37d..89f0415 100644 --- a/bindings/python/quokka/program.py +++ b/bindings/python/quokka/program.py @@ -440,7 +440,7 @@ def iter_chunk( if chunk.chunk_type in chunk_types: yield chunk - def read_byte(self, v_addr: AddressT, size: int) -> bytes: + def read_bytes(self, v_addr: AddressT, size: int) -> bytes: """Read raw bytes from a virtual address Arguments: @@ -453,7 +453,7 @@ def read_byte(self, v_addr: AddressT, size: int) -> bytes: if (offset := v_addr - self.base_address) < 0: raise ValueError("Address outside virtual address space.") - return self.executable.read_byte(offset, size) + return self.executable.read_bytes(offset, size) def get_data(self, address: AddressT) -> quokka.Data: """Get data by address From 1d8830e8a53d3215a7fe4ae2f11c455693f3f0d5 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Tue, 22 Aug 2023 13:22:37 +0200 Subject: [PATCH 7/9] Raise deprecation warning --- bindings/python/quokka/executable.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bindings/python/quokka/executable.py b/bindings/python/quokka/executable.py index 662ef77..a3cb197 100644 --- a/bindings/python/quokka/executable.py +++ b/bindings/python/quokka/executable.py @@ -16,6 +16,7 @@ import pathlib import struct +import warnings from quokka.types import DataType, Endianness, Literal, Optional, Union @@ -185,6 +186,8 @@ def read_bytes(self, offset: int, size: int) -> bytes: return self.read(offset, size) def read_byte(self, offset: int, size: int) -> bytes: - """Deprecated""" - DeprecationWarning("read_byte has been deprecated. Please use read_bytes") + """Deprecated. Use read_bytes""" + warnings.warn( + "read_byte has been deprecated. Please use read_bytes", DeprecationWarning + ) return self.read_bytes(offset, size) From 4e831cfadd4974d3136ee0c67c2d651964d37105 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Wed, 23 Aug 2023 18:47:36 +0200 Subject: [PATCH 8/9] Add API `Function.blocks` --- bindings/python/quokka/function.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bindings/python/quokka/function.py b/bindings/python/quokka/function.py index 3eaa64c..23a8baa 100644 --- a/bindings/python/quokka/function.py +++ b/bindings/python/quokka/function.py @@ -708,6 +708,14 @@ def in_degree(self) -> int: """Function in degree""" return self[self.start].in_degree + @property + def blocks(self) -> dict[AddressT, quokka.Block]: + """Returns a dictionary which is used to reference all basic blocks + by their address. + Calling this function will also load the CFG. + """ + return {addr: self.get_block(addr) for addr in self.graph.nodes} + def __hash__(self) -> int: # type: ignore """Hash value""" return self.start From 051d29d57cb5e501ff74f842e3460b9c73c731d8 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Fri, 25 Aug 2023 18:11:59 +0200 Subject: [PATCH 9/9] Drop deprecated function --- bindings/python/quokka/executable.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/bindings/python/quokka/executable.py b/bindings/python/quokka/executable.py index a3cb197..02bd359 100644 --- a/bindings/python/quokka/executable.py +++ b/bindings/python/quokka/executable.py @@ -16,7 +16,6 @@ import pathlib import struct -import warnings from quokka.types import DataType, Endianness, Literal, Optional, Union @@ -184,10 +183,3 @@ def read_bytes(self, offset: int, size: int) -> bytes: The bytes values """ return self.read(offset, size) - - def read_byte(self, offset: int, size: int) -> bytes: - """Deprecated. Use read_bytes""" - warnings.warn( - "read_byte has been deprecated. Please use read_bytes", DeprecationWarning - ) - return self.read_bytes(offset, size)