From 708972bf4ec378b1c507ad2d8e2cb6fc5454a0b4 Mon Sep 17 00:00:00 2001 From: Lucas Date: Mon, 4 Nov 2024 18:38:07 -0300 Subject: [PATCH] Lazily calculate target pc --- examples/to_json.rs | 5 +++-- src/assembler.rs | 17 +++++++++++++---- src/disassembler.rs | 6 ++++-- src/elf.rs | 17 ++++++++--------- src/interpreter.rs | 11 ++++++++++- src/jit.rs | 15 ++++++++++++--- src/program.rs | 10 ++++++++++ src/static_analysis.rs | 13 +++++++------ src/verifier.rs | 5 +++-- tests/assembler.rs | 2 +- tests/execution.rs | 3 ++- tests/verifier.rs | 2 +- 12 files changed, 74 insertions(+), 32 deletions(-) diff --git a/examples/to_json.rs b/examples/to_json.rs index 0e2950aa..db2b35c5 100644 --- a/examples/to_json.rs +++ b/examples/to_json.rs @@ -38,7 +38,7 @@ fn to_json(program: &[u8]) -> String { let analysis = Analysis::from_executable(&executable).unwrap(); let mut json_insns = vec![]; - for insn in analysis.instructions.iter() { + for (pc, insn) in analysis.instructions.iter().enumerate() { json_insns.push(object!( "opc" => format!("{:#x}", insn.opc), // => insn.opc, "dst" => format!("{:#x}", insn.dst), // => insn.dst, @@ -53,7 +53,8 @@ fn to_json(program: &[u8]) -> String { // has no effect and the complete value is printed anyway. "imm" => format!("{:#x}", insn.imm as i32), // => insn.imm, "desc" => analysis.disassemble_instruction( - insn + insn, + pc ), )); } diff --git a/src/assembler.rs b/src/assembler.rs index ffdb3508..30263794 100644 --- a/src/assembler.rs +++ b/src/assembler.rs @@ -417,6 +417,11 @@ pub fn assemble( insn(opc, 0, 0, resolve_label(insn_ptr, &labels, label)?, 0) } (CallImm, [Integer(imm)]) => { + let instr_imm = if sbpf_version.static_syscalls() { + *imm + } else { + *imm + insn_ptr as i64 + 1 + }; let target_pc = *imm + insn_ptr as i64 + 1; let label = format!("function_{}", target_pc as usize); function_registry @@ -426,7 +431,7 @@ pub fn assemble( target_pc as usize, ) .map_err(|_| format!("Label hash collision {name}"))?; - insn(opc, 0, 1, 0, target_pc) + insn(opc, 0, 1, 0, instr_imm) } (CallReg, [Register(dst)]) => { if sbpf_version.callx_uses_src_reg() { @@ -461,10 +466,14 @@ pub fn assemble( (Syscall, [Integer(imm)]) => insn(opc, 0, 0, 0, *imm), (CallImm, [Label(label)]) => { let label: &str = label; - let target_pc = *labels + let mut target_pc = *labels .get(label) - .ok_or_else(|| format!("Label not found {label}"))?; - insn(opc, 0, 1, 0, target_pc as i64) + .ok_or_else(|| format!("Label not found {label}"))? + as i64; + if sbpf_version.static_syscalls() { + target_pc = target_pc - insn_ptr as i64 - 1; + } + insn(opc, 0, 1, 0, target_pc) } (Endian(size), [Register(dst)]) => insn(opc, *dst, 0, 0, size), (LoadDwImm, [Register(dst), Integer(imm)]) => { diff --git a/src/disassembler.rs b/src/disassembler.rs index fbfe2c58..fb7ec6e8 100644 --- a/src/disassembler.rs +++ b/src/disassembler.rs @@ -111,7 +111,8 @@ fn jmp_reg_str(name: &str, insn: &ebpf::Insn, cfg_nodes: &BTreeMap( - insn: &ebpf::Insn, + insn: &ebpf::Insn, + pc: usize, cfg_nodes: &BTreeMap, function_registry: &FunctionRegistry, loader: &BuiltinProgram, @@ -265,7 +266,8 @@ pub fn disassemble_instruction( ebpf::JSLE_IMM => { name = "jsle"; desc = jmp_imm_str(name, insn, cfg_nodes); }, ebpf::JSLE_REG => { name = "jsle"; desc = jmp_reg_str(name, insn, cfg_nodes); }, ebpf::CALL_IMM => { - let function_name = function_registry.lookup_by_key(insn.imm as u32).map(|(function_name, _)| String::from_utf8_lossy(function_name).to_string()); + let key = sbpf_version.calculate_call_imm_target_pc(pc, insn.imm); + let function_name = function_registry.lookup_by_key(key).map(|(function_name, _)| String::from_utf8_lossy(function_name).to_string()); let function_name = if let Some(function_name) = function_name { name = "call"; function_name diff --git a/src/elf.rs b/src/elf.rs index 640073c3..90cda2da 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -799,10 +799,7 @@ impl Executable { .ok_or(ElfError::ValueOutOfBounds)?; for i in 0..instruction_count { let insn = ebpf::get_insn(text_bytes, i); - if insn.opc == ebpf::CALL_IMM - && insn.imm != -1 - && !(sbpf_version.static_syscalls() && insn.src == 0) - { + if insn.opc == ebpf::CALL_IMM && insn.imm != -1 { let target_pc = (i as isize) .saturating_add(1) .saturating_add(insn.imm as isize); @@ -820,11 +817,13 @@ impl Executable { name.as_bytes(), target_pc as usize, )?; - let offset = i.saturating_mul(ebpf::INSN_SIZE).saturating_add(4); - let checked_slice = text_bytes - .get_mut(offset..offset.saturating_add(4)) - .ok_or(ElfError::ValueOutOfBounds)?; - LittleEndian::write_u32(checked_slice, key); + if !sbpf_version.static_syscalls() { + let offset = i.saturating_mul(ebpf::INSN_SIZE).saturating_add(4); + let checked_slice = text_bytes + .get_mut(offset..offset.saturating_add(4)) + .ok_or(ElfError::ValueOutOfBounds)?; + LittleEndian::write_u32(checked_slice, key); + } } } diff --git a/src/interpreter.rs b/src/interpreter.rs index 9a257608..75064d81 100644 --- a/src/interpreter.rs +++ b/src/interpreter.rs @@ -551,7 +551,16 @@ impl<'a, 'b, C: ContextObject> Interpreter<'a, 'b, C> { } if internal && !resolved { - if let Some((_function_name, target_pc)) = self.executable.get_function_registry().lookup_by_key(insn.imm as u32) { + if let Some((_function_name, target_pc)) = + self.executable + .get_function_registry() + .lookup_by_key( + self + .executable + .get_sbpf_version() + .calculate_call_imm_target_pc(self.reg[11] as usize, insn.imm) + ) + { resolved = true; // make BPF to BPF call diff --git a/src/jit.rs b/src/jit.rs index d0a17180..6e806426 100644 --- a/src/jit.rs +++ b/src/jit.rs @@ -727,9 +727,18 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> { } if internal { - if let Some((_function_name, target_pc)) = self.executable.get_function_registry().lookup_by_key(insn.imm as u32) { - self.emit_internal_call(Value::Constant64(target_pc as i64, true)); - resolved = true; + if let Some((_function_name, target_pc)) = + self.executable + .get_function_registry() + .lookup_by_key( + self + .executable + .get_sbpf_version() + .calculate_call_imm_target_pc(self.pc, insn.imm) + ) + { + self.emit_internal_call(Value::Constant64(target_pc as i64, true)); + resolved = true; } } diff --git a/src/program.rs b/src/program.rs index e40998a5..183b0589 100644 --- a/src/program.rs +++ b/src/program.rs @@ -81,6 +81,16 @@ impl SBPFVersion { self != SBPFVersion::V1 } + /// Calculate the target program counter for a CALL_IMM instruction depending on + /// the SBPF version. + pub fn calculate_call_imm_target_pc(self, pc: usize, imm: i64) -> u32 { + if self.static_syscalls() { + (pc as i64).saturating_add(imm).saturating_add(1) as u32 + } else { + imm as u32 + } + } + /// Move opcodes of memory instructions into ALU instruction classes pub fn move_memory_instruction_classes(self) -> bool { self != SBPFVersion::V1 diff --git a/src/static_analysis.rs b/src/static_analysis.rs index 77dde3b1..89fc8ab7 100644 --- a/src/static_analysis.rs +++ b/src/static_analysis.rs @@ -442,9 +442,10 @@ impl<'a> Analysis<'a> { } /// Generates assembler code for a single instruction - pub fn disassemble_instruction(&self, insn: &ebpf::Insn) -> String { + pub fn disassemble_instruction(&self, insn: &ebpf::Insn, pc: usize) -> String { disassemble_instruction( insn, + pc, &self.cfg_nodes, self.executable.get_function_registry(), self.executable.get_loader(), @@ -455,14 +456,14 @@ impl<'a> Analysis<'a> { /// Generates assembler code for the analyzed executable pub fn disassemble(&self, output: &mut W) -> std::io::Result<()> { let mut last_basic_block = usize::MAX; - for insn in self.instructions.iter() { + for (pc, insn) in self.instructions.iter().enumerate() { self.disassemble_label( output, Some(insn) == self.instructions.first(), insn.ptr, &mut last_basic_block, )?; - writeln!(output, " {}", self.disassemble_instruction(insn))?; + writeln!(output, " {}", self.disassemble_instruction(insn, pc))?; } Ok(()) } @@ -493,7 +494,7 @@ impl<'a> Analysis<'a> { index, &entry[0..11], pc, - self.disassemble_instruction(insn), + self.disassemble_instruction(insn, pc), )?; } Ok(()) @@ -545,9 +546,9 @@ impl<'a> Analysis<'a> { writeln!(output, " lbb_{} [label=<{}
>];", cfg_node_start, analysis.instructions[cfg_node.instructions.clone()].iter() - .map(|insn| { + .enumerate().map(|(pc, insn)| { let desc = analysis.disassemble_instruction( - insn + insn, pc ); if let Some(split_index) = desc.find(' ') { let mut rest = desc[split_index+1..].to_string(); diff --git a/src/verifier.rs b/src/verifier.rs index 3cbee58b..70cb0de4 100644 --- a/src/verifier.rs +++ b/src/verifier.rs @@ -391,10 +391,11 @@ impl Verifier for RequisiteVerifier { ebpf::JSLE_IMM => { check_jmp_offset(prog, insn_ptr, &function_range)?; }, ebpf::JSLE_REG => { check_jmp_offset(prog, insn_ptr, &function_range)?; }, ebpf::CALL_IMM if sbpf_version.static_syscalls() => { + let target_pc = sbpf_version.calculate_call_imm_target_pc(insn_ptr, insn.imm); check_call_target( - insn.imm as u32, + target_pc, function_registry, - VerifierError::InvalidFunction(insn.imm as usize) + VerifierError::InvalidFunction(target_pc as usize) )?; }, ebpf::CALL_IMM => {}, diff --git a/tests/assembler.rs b/tests/assembler.rs index 66df864d..45ce9931 100644 --- a/tests/assembler.rs +++ b/tests/assembler.rs @@ -152,7 +152,7 @@ fn test_call_reg() { fn test_call_imm() { assert_eq!( asm("call 299"), - Ok(vec![insn(0, ebpf::CALL_IMM, 0, 1, 0, 300)]) + Ok(vec![insn(0, ebpf::CALL_IMM, 0, 1, 0, 299)]) ); } diff --git a/tests/execution.rs b/tests/execution.rs index 54c1be01..4dab27af 100644 --- a/tests/execution.rs +++ b/tests/execution.rs @@ -2962,8 +2962,9 @@ fn test_err_exit_capped() { ); test_interpreter_and_jit_asm!( " - call 1 + call function_foo exit + function_foo: mov r0, r0 exit ", diff --git a/tests/verifier.rs b/tests/verifier.rs index dc846c58..be7320f9 100644 --- a/tests/verifier.rs +++ b/tests/verifier.rs @@ -348,7 +348,7 @@ fn test_verifier_err_unknown_opcode() { } #[test] -#[should_panic(expected = "InvalidFunction(1811268606)")] +#[should_panic(expected = "InvalidFunction(1811268607)")] fn test_verifier_unknown_sycall() { let prog = &[ 0x85, 0x00, 0x00, 0x00, 0xfe, 0xc3, 0xf5, 0x6b, // call 0x6bf5c3fe