Skip to content

Commit dfd137f

Browse files
authored
Refactor PyVEX to pass lint (#294)
* Refactor PyVEX to pass lint * Re-add irop_enums_to_ints to pyvex namespace * Replace log.trace with log.debug * Register lifters in one place instead of relying on lifter import order * Fix pylint errors * Rename lift.py to lift_function.py
1 parent a973dc6 commit dfd137f

37 files changed

+706
-673
lines changed

.pre-commit-config.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ repos:
7878
rev: v1.10.0
7979
hooks:
8080
# Python
81-
# - id: python-use-type-annotations
81+
- id: python-use-type-annotations
8282
- id: python-no-log-warn
8383
# Documentation
8484
- id: rst-backticks
@@ -92,7 +92,7 @@ repos:
9292
- id: check-builtin-literals
9393
- id: check-docstring-first
9494

95-
# - repo: https://github.com/charliermarsh/ruff-pre-commit
96-
# rev: v0.0.230
97-
# hooks:
98-
# - id: ruff
95+
- repo: https://github.com/charliermarsh/ruff-pre-commit
96+
rev: v0.0.231
97+
hooks:
98+
- id: ruff

make_ffi.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1-
import re
1+
import logging
22
import os
3-
import sys
4-
import cffi
5-
import subprocess
63
import platform
4+
import re
5+
import subprocess
6+
import sys
77

8-
import logging
8+
import cffi
99

10-
l = logging.getLogger("cffier")
11-
l.setLevel(logging.DEBUG)
10+
log = logging.getLogger("cffier")
11+
log.setLevel(logging.DEBUG)
1212

1313

1414
def find_good_scan(questionable):
@@ -18,7 +18,7 @@ def find_good_scan(questionable):
1818

1919
while len(questionable):
2020
ffi = cffi.FFI()
21-
l.debug("scan - trying %d good and %d questionable", len(known_good), len(questionable))
21+
log.debug("scan - trying %d good and %d questionable", len(known_good), len(questionable))
2222

2323
candidate = known_good + questionable[:end_line]
2424
failed_line = -1
@@ -98,7 +98,7 @@ def doit(vex_path):
9898
errs.append((" ".join(cmd), -1, "does not exist"))
9999
continue
100100
else:
101-
l.warning("failed commands:\n" + "\n".join("{} ({}) -- {}".format(*e) for e in errs))
101+
log.warning("failed commands:\n" + "\n".join("{} ({}) -- {}".format(*e) for e in errs))
102102
raise Exception(
103103
"Couldn't process pyvex headers."
104104
+ 'Please set CPP environmental variable to local path of "cpp".'
@@ -145,7 +145,5 @@ def get_guest_offsets(vex_path):
145145

146146

147147
if __name__ == "__main__":
148-
import sys
149-
150148
logging.basicConfig(level=logging.DEBUG)
151149
doit(sys.argv[1])

pyproject.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,12 @@ force-exclude = '''
1010
vex
1111
)/
1212
'''
13+
14+
[tool.ruff]
15+
line-length = 120
16+
select = [
17+
"E",
18+
"F",
19+
"I",
20+
"TID",
21+
]

pyvex/__init__.py

Lines changed: 43 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -4,96 +4,54 @@
44
"""
55
__version__ = "9.2.37.dev0"
66

7-
if bytes is str:
8-
raise Exception("This module is designed for python 3 only. Please install an older version to use python 2.")
9-
10-
import os
11-
import sys
12-
import hashlib
13-
import pickle
14-
import tempfile
15-
from typing import Any
16-
import cffi
17-
from .vex_ffi import ffi_str as _ffi_str
18-
19-
ffi = cffi.FFI()
20-
21-
import logging
22-
23-
logging.getLogger("pyvex").addHandler(logging.NullHandler())
24-
25-
26-
def _locate_lib(module: str, library: str) -> str:
27-
"""
28-
Attempt to find a native library without using pkg_resources, and only fall back to pkg_resources upon failures.
29-
This is because "import pkg_resources" is slow.
30-
31-
:return: The full path of the native library.
32-
"""
33-
base_dir = os.path.dirname(__file__)
34-
attempt = os.path.join(base_dir, library)
35-
if os.path.isfile(attempt):
36-
return attempt
37-
38-
import pkg_resources # pylint:disable=import-outside-toplevel
39-
40-
return pkg_resources.resource_filename(module, os.path.join("lib", library))
41-
42-
43-
def _parse_ffi_str():
44-
hash = hashlib.md5(_ffi_str.encode("utf-8")).hexdigest()
45-
cache_location = os.path.join(tempfile.gettempdir(), f"pyvex_ffi_parser_cache.{hash}")
46-
47-
if os.path.isfile(cache_location):
48-
# load the cache
49-
with open(cache_location, "rb") as f:
50-
cache = pickle.loads(f.read())
51-
ffi._parser._declarations = cache["_declarations"]
52-
ffi._parser._int_constants = cache["_int_constants"]
53-
else:
54-
ffi.cdef(_ffi_str)
55-
# cache the result
56-
cache = {
57-
"_declarations": ffi._parser._declarations,
58-
"_int_constants": ffi._parser._int_constants,
59-
}
60-
with open(cache_location, "wb") as f:
61-
f.write(pickle.dumps(cache))
62-
63-
64-
def _find_c_lib():
65-
# Load the c library for calling into VEX
66-
if sys.platform in ("win32", "cygwin"):
67-
library_file = "pyvex.dll"
68-
elif sys.platform == "darwin":
69-
library_file = "libpyvex.dylib"
70-
else:
71-
library_file = "libpyvex.so"
72-
73-
pyvex_path = _locate_lib(__name__, os.path.join("lib", library_file))
74-
# parse _ffi_str and use cache if possible
75-
_parse_ffi_str()
76-
# RTLD_GLOBAL used for sim_unicorn.so
77-
lib = ffi.dlopen(pyvex_path)
78-
if not lib.vex_init():
79-
raise ImportError("libvex failed to initialize")
80-
# this looks up all the definitions (wtf)
81-
dir(lib)
82-
return lib
83-
84-
85-
pvc = _find_c_lib() # type: Any # This should be properly typed, but this seems non trivial
86-
87-
# pylint: disable=wildcard-import
88-
from .enums import *
89-
from . import stmt, expr, const
7+
from . import const, expr, stmt
908
from .block import IRSB, IRTypeEnv
9+
from .const import get_type_size, get_type_spec_size, tag_to_const_class
10+
from .enums import (
11+
IRCallee,
12+
IRRegArray,
13+
VEXObject,
14+
default_vex_archinfo,
15+
get_enum_from_int,
16+
get_int_from_enum,
17+
irop_enums_to_ints,
18+
vex_endness_from_string,
19+
)
20+
from .errors import PyVEXError
9121
from .expr import get_op_retty
92-
from .const import tag_to_const_class, get_type_size, get_type_spec_size
9322
from .lifting import lift, lifters
94-
from .errors import PyVEXError
23+
from .native import ffi, pvc
9524

9625
# aliases....
9726
IRStmt = stmt
9827
IRExpr = expr
9928
IRConst = const
29+
30+
31+
__all__ = [
32+
"const",
33+
"expr",
34+
"stmt",
35+
"IRSB",
36+
"IRTypeEnv",
37+
"get_type_size",
38+
"get_type_spec_size",
39+
"irop_enums_to_ints",
40+
"tag_to_const_class",
41+
"IRCallee",
42+
"IRRegArray",
43+
"VEXObject",
44+
"default_vex_archinfo",
45+
"get_enum_from_int",
46+
"get_int_from_enum",
47+
"vex_endness_from_string",
48+
"PyVEXError",
49+
"get_op_retty",
50+
"lift",
51+
"lifters",
52+
"ffi",
53+
"pvc",
54+
"IRStmt",
55+
"IRExpr",
56+
"IRConst",
57+
]

pyvex/block.py

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,18 @@
11
import copy
2-
import sys
32
import itertools
3+
import logging
44
from typing import List, Optional
55

6-
from pyvex.expr import IRExpr
7-
from pyvex.stmt import IRStmt
8-
9-
from . import VEXObject
106
from . import expr, stmt
117
from .const import get_type_size
12-
from .stmt import WrTmp, LoadG, LLSC, Dirty, CAS, get_enum_from_int, get_int_from_enum, Exit, IMark
13-
from .expr import RdTmp
148
from .data_ref import DataRef
9+
from .enums import VEXObject
1510
from .errors import SkipStatementsError
11+
from .expr import RdTmp
12+
from .native import pvc
13+
from .stmt import CAS, LLSC, Dirty, Exit, IMark, IRExpr, IRStmt, LoadG, WrTmp, get_enum_from_int, get_int_from_enum
1614

17-
18-
import logging
19-
20-
l = logging.getLogger("pyvex.block")
15+
log = logging.getLogger("pyvex.block")
2116

2217

2318
class IRSB(VEXObject):
@@ -88,8 +83,10 @@ def __init__(
8883
:type arch: :class:`archinfo.Arch`
8984
:param max_inst: The maximum number of instructions to lift. (See note below)
9085
:param max_bytes: The maximum number of bytes to use.
91-
:param num_inst: Replaces max_inst if max_inst is None. If set to None as well, no instruction limit is used.
92-
:param num_bytes: Replaces max_bytes if max_bytes is None. If set to None as well, no byte limit is used.
86+
:param num_inst: Replaces max_inst if max_inst is None. If set to None as well, no instruction limit
87+
is used.
88+
:param num_bytes: Replaces max_bytes if max_bytes is None. If set to None as well, no byte limit is
89+
used.
9390
:param bytes_offset: The offset into `data` to start lifting at. Note that for ARM THUMB mode, both
9491
`mem_addr` and `bytes_offset` must be odd (typically `bytes_offset` is set to 1).
9592
:param traceflags: The libVEX traceflags, controlling VEX debug prints.
@@ -117,10 +114,10 @@ def __init__(
117114
self.addr = mem_addr
118115
self.arch = arch
119116

120-
self.statements = [] # type: List[IRStmt]
121-
self.next = None # type: Optional[IRExpr]
117+
self.statements: List[IRStmt] = []
118+
self.next: Optional[IRExpr] = None
122119
self._tyenv = None
123-
self.jumpkind = None # type: Optional[str]
120+
self.jumpkind: Optional[str] = None
124121
self._direct_next = None
125122
self._size = None
126123
self._instructions = None
@@ -133,6 +130,8 @@ def __init__(
133130
# This is the slower path (because we need to call _from_py() to copy the content in the returned IRSB to
134131
# the current IRSB instance. You should always call `lift()` directly. This method is kept for compatibility
135132
# concerns.
133+
from pyvex.lifting import lift
134+
136135
irsb = lift(
137136
data,
138137
mem_addr,
@@ -225,8 +224,8 @@ def convert_tmp(tmp):
225224

226225
def convert_expr(expr_):
227226
"""
228-
Converts a VEX expression to use tmps in the appended-block instead of the appended-to-block. Used to prevent
229-
collisions in tmp numbers between the two blocks.
227+
Converts a VEX expression to use tmps in the appended-block instead of the appended-to-block. Used to
228+
prevent collisions in tmp numbers between the two blocks.
230229
:param tmp: The VEX expression to convert
231230
:vartype expr: :class:`IRExpr`
232231
"""
@@ -304,7 +303,7 @@ def typecheck(self):
304303
assert self.next is not None, "Missing next expression"
305304
assert self.jumpkind is not None, "Missing jumpkind"
306305

307-
# type assertions
306+
# Type assertions
308307
assert isinstance(self.next, expr.IRExpr), "Next expression is not an expression"
309308
assert type(self.jumpkind is str), "Jumpkind is not a string"
310309
assert self.jumpkind.startswith("Ijk_"), "Jumpkind is not a jumpkind enum"
@@ -316,7 +315,7 @@ def typecheck(self):
316315
assert isinstance(st, stmt.IRStmt), "Statement %d is not an IRStmt" % i
317316
try:
318317
assert st.typecheck(self.tyenv), "Statement %d failed to typecheck" % i
319-
except: # pylint: disable=bare-except
318+
except Exception: # pylint: disable=bare-except
320319
assert False, "Statement %d errored in typechecking" % i
321320

322321
if type(st) is stmt.NoOp:
@@ -331,7 +330,7 @@ def typecheck(self):
331330

332331
assert last_imark is not None, "No IMarks present in block"
333332
except AssertionError as e:
334-
l.debug(e.args[0])
333+
log.debug(e.args[0])
335334
return False
336335
return True
337336

@@ -433,7 +432,8 @@ def operations(self):
433432
@property
434433
def all_constants(self):
435434
"""
436-
Returns all constants in the block (including incrementing of the program counter) as :class:`pyvex.const.IRConst`.
435+
Returns all constants in the block (including incrementing of the program counter) as
436+
:class:`pyvex.const.IRConst`.
437437
"""
438438
return sum((e.constants for e in self.expressions), [])
439439

@@ -634,7 +634,7 @@ def lookup(self, tmp):
634634
Return the type of temporary variable `tmp` as an enum string
635635
"""
636636
if tmp < 0 or tmp > self.types_used:
637-
l.debug("Invalid temporary number %d", tmp)
637+
log.debug("Invalid temporary number %d", tmp)
638638
raise IndexError(tmp)
639639
return self.types[tmp]
640640

@@ -670,7 +670,3 @@ def typecheck(self):
670670
except ValueError:
671671
return False
672672
return True
673-
674-
675-
from . import pvc
676-
from .lifting import lift

0 commit comments

Comments
 (0)