Skip to content

Commit

Permalink
Merge pull request #70 from quarkslab/bindifffile-refactoring
Browse files Browse the repository at this point in the history
update .BinDiff file creation to comply with new API
  • Loading branch information
RobinDavid authored Jan 3, 2025
2 parents 66af6ee + 40f0110 commit 725ded3
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 32 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dependencies = [
"scikit-learn",
"python-louvain",
"enum_tools",
"python-bindiff",
"python-bindiff>=0.3.1",
"python-binexport>=0.3.2",
"quokka-project",
"idascript",
Expand Down
93 changes: 62 additions & 31 deletions src/qbindiff/mapping/bindiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@

from __future__ import annotations

import hashlib
from collections import defaultdict
from collections.abc import Generator
from functools import lru_cache
from typing import TYPE_CHECKING
from pathlib import Path

# third-party imports
from bindiff import BindiffFile # type: ignore[import-untyped]
Expand Down Expand Up @@ -140,6 +142,43 @@ def compute_instruction_match(
yield from zip(primary_instr[k], secondary_instr[k])


def _compute_file_info(program: Program) -> dict:
"""
Compute a BinExport file information required for filling
.Bindiff database.
:param program: Binexport program
:return: dict of data
"""
exec_path = Path(program.exec_path)
exp_path = Path(program.export_path)
hash = hashlib.sha256(exec_path.read_bytes() if exec_path.exists() else exp_path.read_bytes()).hexdigest()

funs = {True: 0, False: 0}
bbs = {True: 0, False: 0}
edges = {True: 0, False: 0}
insts = {True: 0, False: 0}
for fun in program:
islib = fun.is_library()
funs[islib] += 1
bbs[islib] += len(fun.flowgraph.nodes)
edges[islib] += len(fun.flowgraph.edges)
insts[islib] += sum(len(bb.instructions) for bb in fun)

return {"export_name": program.export_path,
"hash": hash,
"executable_name": program.exec_path,
"functions": funs[False],
"libfunctions": funs[True],
"calls": len(program.callgraph.edges),
"basicblocks": bbs[False],
"libbasicblocks": bbs[True],
"edges": edges[False],
"libedges": edges[True],
"instructions": insts[False],
"libinstructions": insts[True]}


def export_to_bindiff(
filename: str, primary: Program, secondary: Program, mapping: Mapping
) -> None:
Expand All @@ -153,58 +192,50 @@ def export_to_bindiff(
"""
from qbindiff import __version__ # import the version here to avoid circular definition

def count_items(program: Program) -> tuple[int, int, int, int]:
fp, flib, bbs, inst = 0, 0, 0, 0
for f_addr, f in program.items():
fp += int(not (f.is_import()))
flib += int(f.is_import())
bbs += len(f)
inst += sum(len(x) for x in f)
return fp, flib, bbs, inst

binfile = BindiffFile.create(
filename,
primary.export_path,
secondary.export_path,
f"Qbindiff {__version__}",
"",
mapping.normalized_similarity,
0.0,
)

# Add the two files
infos_primary = _compute_file_info(primary)
binfile.add_file_matched(**infos_primary)

infos_secondary = _compute_file_info(secondary)
binfile.add_file_matched(**infos_secondary)

for m in mapping: # iterate all the matchs
with m.primary, m.secondary: # Do not unload basic blocks
# Add the function match
faddr1, faddr2 = m.primary.addr, m.secondary.addr

# Add the function match here to provide the same_bb_count
funentry_id = binfile.add_function_match(
faddr1,
faddr2,
m.primary.name,
m.secondary.name,
float(m.similarity),
float(m.confidence),
0,
)

# Compute the basic block match (bindiff style) and add it in database
same_bb_count = 0
bb_matches = compute_basic_block_match(m.primary, m.secondary)
for addr1, addr2 in bb_matches:
bb1, bb2 = m.primary[addr1], m.secondary[addr2]
same_bb_count += 1
entry_id = binfile.add_basic_block_match(faddr1, faddr2, addr1, addr2)
bbentry_id = binfile.add_basic_block_match(funentry_id, addr1, addr2)

# Compute the instruction match (bindiff style) and add it in database
for instr_addr1, instr_addr2 in compute_instruction_match(bb1, bb2):
binfile.add_instruction_match(entry_id, instr_addr1, instr_addr2)

# Add the function match here to provide the same_bb_count
binfile.add_function_match(
faddr1,
faddr2,
m.primary.name,
m.secondary.name,
float(m.similarity),
float(m.confidence),
same_bb_count,
)
binfile.add_instruction_match(bbentry_id, instr_addr1, instr_addr2)

# Update file infos about primary
f, lib, bbs, insts = count_items(primary)
binfile.update_file_infos(1, f, lib, bbs, insts)
# Update file infos about secondary
f, lib, bbs, insts = count_items(secondary)
binfile.update_file_infos(2, f, lib, bbs, insts)
# Update a-posteriori identical basic blocks count
binfile.update_samebb_function_match(funentry_id, same_bb_count)

# binfile.commit()
binfile.commit()

0 comments on commit 725ded3

Please sign in to comment.