From 8302f705aa44bd5e9e057d723f38dbe00e15fc31 Mon Sep 17 00:00:00 2001 From: condy Date: Fri, 14 Feb 2020 21:21:59 +0800 Subject: [PATCH] Undump lua binary chunk --- Cargo.lock | 9 ++ Cargo.toml | 1 + src/binary/mod.rs | 174 ++++++++++++++++++++++ src/binary/reader.rs | 346 +++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 13 ++ 5 files changed, 543 insertions(+) create mode 100644 src/binary/mod.rs create mode 100644 src/binary/reader.rs diff --git a/Cargo.lock b/Cargo.lock index f4a4bd7..8149b1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,14 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "byteorder" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" + [[package]] name = "rua" version = "0.1.0" +dependencies = [ + "byteorder", +] diff --git a/Cargo.toml b/Cargo.toml index cf3fc84..0108bf6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,4 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +byteorder = "1.3.4" \ No newline at end of file diff --git a/src/binary/mod.rs b/src/binary/mod.rs new file mode 100644 index 0000000..6808922 --- /dev/null +++ b/src/binary/mod.rs @@ -0,0 +1,174 @@ +use std::fs::File; +use std::io::{self, BufReader}; +use std::mem; + +mod reader; +use reader::Reader; + +const RUA_SIGNATURE: &[u8; 4] = b"\x1bLua"; + +const RUA_MAJOR_VERSION: u8 = 5; +const RUA_MINOR_VERSION: u8 = 3; +const RUA_RELEASE_VERSION: u8 = 0; +const RUA_VERSION: u8 = RUA_MAJOR_VERSION * 16 + RUA_MINOR_VERSION; + +const RUA_FORMAT: u8 = 0; // This it the official format +const RUA_DATA: &[u8; 6] = b"\x19\x93\r\n\x1a\n"; // Lua 1.0 released at 1993 +const RUA_INT_SIZE: u8 = mem::size_of::() as u8; +const RUA_SIZET_SIZE: u8 = mem::size_of::() as u8; +const RUA_INSTRUCTION_SIZE: u8 = mem::size_of::() as u8; +const RUA_INTEGER_SIZE: u8 = mem::size_of::() as u8; +const RUA_NUMBER_SIZE: u8 = mem::size_of::() as u8; +const RUA_INTEGER_DATA: i64 = 0x5678; +const RUA_NUMBER_DATA: f64 = 370.5; + +/// The constants in Rua +#[derive(PartialEq, Debug)] +pub enum Constant { + Nil, + Boolean(bool), + Number(f64), + Integer(i64), + Str(String), +} + +/// The local variable in Rua +#[derive(Eq, PartialEq, Debug, Hash)] +pub struct LocalVariable { + pub name: String, + pub start_pc: u32, + pub end_pc: u32, +} + +/// TODO +#[derive(Eq, PartialEq, Debug, Hash)] +pub struct UpValue { + pub instack: u8, + pub idx: u8, +} + +/// The prototype of a function +#[derive(PartialEq, Debug)] +pub struct Prototype { + /// The source name where defined current function. + /// + /// If the function is anonymous function, the source name is empty. + /// + /// If the source starts with "@", it means the binary chunk is indeed + /// compiled from the `Lua` source file. After removing the '@', the real + /// file name is obtained. + /// + /// If the source starts with "=", it has special meaning (e.g. "=stdin" + /// indicates that the binary chunk is compiled from standard input). + /// + /// If there is no "=", it indicates that the binary chunk is compiled from + /// the string provided by the programmer, the source stores the string. + pub source: Option, + /// The first line of the function + pub first_line: u32, + /// The last line of the function + pub last_line: u32, + /// The number of fixed parameters of the function. The fixed parameters + /// here are relative to the variadic length parameters (vararg). + pub params: u8, + /// Is it a variadic function? + pub variadic: u8, + /// TODO The number of register + pub max_stack_size: u8, + /// The instructions table. + /// + /// Each instruction occupied 4 bytes. + pub instructions: Vec, + /// The constant table is used to store literals that appear in Lua code, + /// including `nil`, `boolean`, `integer`, `floating point number`, and + /// `string`. + /// + /// Each constant starts with a 1-byte tag to identify what type of constant + /// value is stored subsequently. + pub constants: Vec, + /// TODO + /// + /// an `UpValue` takes 2 bytes + pub upvalues: Vec, + /// Sub-prototypes + pub protos: Vec, + /// The line information of each instruction. + pub line_infos: Vec, + /// The local variable table + pub local_vars: Vec, + /// TODO + /// + /// The name of an `UpValue` + pub upvalue_names: Vec, +} + +/// +pub fn undump(file: File) -> io::Result { + let mut bufr = BufReader::new(file); + let mut reader = Reader::new(&mut bufr); + + // Checks the magic number + assert_eq!( + reader.read_bytes(4)?, + RUA_SIGNATURE, + "not a precompiled chunk" + ); + + // Checks version + assert_eq!(reader.read_byte()?, RUA_VERSION, "version mismatch"); + + // Checks format + assert_eq!(reader.read_byte()?, RUA_FORMAT, "format mismatch"); + + // Checks data + assert_eq!(reader.read_bytes(6)?, RUA_DATA, "corrupted"); + + // Checks the size of int + assert_eq!(reader.read_byte()?, RUA_INT_SIZE, "sizeof(int) mismatch"); + + // Checks the size of size_t + assert_eq!( + reader.read_byte()?, + RUA_SIZET_SIZE, + "sizeof(size_t) mismatch" + ); + + // Checks the size of instruction + assert_eq!( + reader.read_byte()?, + RUA_INSTRUCTION_SIZE, + "sizeof(instruction) mismatch" + ); + + // Checks the size of Integer + assert_eq!( + reader.read_byte()?, + RUA_INTEGER_SIZE, + "sizeof(Integer) mismatch" + ); + + // Checks the size of Number + assert_eq!( + reader.read_byte()?, + RUA_NUMBER_SIZE, + "sizeof(Number) mismatch" + ); + + // Checks the endianness of Integer + assert_eq!( + reader.read_integer()?, + RUA_INTEGER_DATA, + "endianness mismatch" + ); + + // Checks the format of Number + assert!( + (reader.read_number()? - RUA_NUMBER_DATA).abs() < std::f64::EPSILON, + "float format mismatch" + ); + + // TODO + let _upvalues = reader.read_byte()?; + + Ok(reader.read_prototype()?) +} diff --git a/src/binary/reader.rs b/src/binary/reader.rs new file mode 100644 index 0000000..ddcaef6 --- /dev/null +++ b/src/binary/reader.rs @@ -0,0 +1,346 @@ +use std::io; + +use byteorder::{NativeEndian, ReadBytesExt}; + +use super::{Constant, LocalVariable, Prototype, UpValue}; + +const TAG_NIL: u8 = 0x00; +const TAG_BOOLEAN: u8 = 0x01; +const TAG_NUMBER: u8 = 0x03; +const TAG_INTEGER: u8 = 0x13; +const TAG_SHORT_STR: u8 = 0x04; +const TAG_LONG_STR: u8 = 0x14; + +/// A reader adaptor for Rua binary chunk +pub struct Reader<'a, T: ReadBytesExt> { + src: &'a mut T, +} + +impl<'a, T: ReadBytesExt> Reader<'a, T> { + /// Constructs a `Reader` from `File`, `BufReader` and etc... + pub fn new(src: &'a mut T) -> Self { + Self { src } + } + + /// Returns 1-byte or yields an `io::Result::Err` + pub fn read_byte(&mut self) -> io::Result { + self.src.read_u8() + } + + /// Returns n-bytes or yields an `io::Result::Err` + pub fn read_bytes(&mut self, n: usize) -> io::Result> { + let mut buf = Vec::with_capacity(n); + buf.resize(n, b'\x00'); + self.src.read_exact(buf.as_mut())?; + Ok(buf) + } + + /// Returns an `u32` or yields an `io::Result::Err` + pub fn read_u32(&mut self) -> io::Result { + self.src.read_u32::() + } + + /// Returns an `u64` or yields an `io::Result::Err` + pub fn read_u64(&mut self) -> io::Result { + self.src.read_u64::() + } + + /// Returns an `Integer` (represented by i64) or yields an `io::Result::Err` + pub fn read_integer(&mut self) -> io::Result { + self.src.read_i64::() + } + + /// Returns a `Number` (represented by f64) or yields an `io::Result::Err` + pub fn read_number(&mut self) -> io::Result { + self.src.read_f64::() + } + + /// Returns a `String` or yields an `io::Result::Err` + pub fn read_string(&mut self) -> io::Result { + Ok(self.read_string_impl()?.unwrap_or_else(String::new)) + } + + /// Returns a `Vec` that is applied with `f` + pub fn read_vec(&mut self, f: F) -> io::Result> + where + F: Fn(&mut Self) -> io::Result, + { + let n = self.read_u32()? as usize; + let mut vec = Vec::with_capacity(n); + for _i in 0..n { + vec.push(f(self)?); + } + Ok(vec) + } + + /// Returns a [`Constant`] in Rua or yields an `io::Result::Err`. + /// + /// It can be: + /// + /// - Nil + /// - Boolean + /// - Number + /// - Integer + /// - Str + /// + /// [`Constant`]: ../enum.Constant.html + pub fn read_constant(&mut self) -> io::Result { + use Constant::*; + + let tag = self.read_byte()?; + let c = match tag { + TAG_NIL => Nil, + TAG_BOOLEAN => Boolean(self.read_byte()? != 0), + TAG_NUMBER => Number(self.read_number()?), + TAG_INTEGER => Integer(self.read_integer()?), + TAG_SHORT_STR | TAG_LONG_STR => Str(self.read_string()?), + _ => panic!("corrupted!"), + }; + Ok(c) + } + + /// Returns an [`UpValue`] in Rua or yields an `io::Result::Err` + /// + /// [`UpValue`]: ../struct.UpValue.html + pub fn read_upvalue(&mut self) -> io::Result { + Ok(UpValue { + instack: self.read_byte()?, + idx: self.read_byte()?, + }) + } + + /// Returns a [`LocalVariable`] in Rua or yields an `io::Result::Err` + /// + /// [`LocalVariable`]: ../struct.LocalVariable.html + pub fn read_local_variable(&mut self) -> io::Result { + Ok(LocalVariable { + name: self.read_string()?, + start_pc: self.read_u32()?, + end_pc: self.read_u32()?, + }) + } + + /// Returns a [`Prototype`] in Rua or yields an `io::Result::Err` + /// + /// [`Prototype`]: ../struct.Prototype.html + pub fn read_prototype(&mut self) -> io::Result { + self.read_prototype_impl(None) + } + + fn read_prototype_impl(&mut self, parent: Option) -> io::Result { + let src = self.read_string_impl()?.or(parent); + Ok(Prototype { + source: src, + first_line: self.read_u32()?, + last_line: self.read_u32()?, + params: self.read_byte()?, + variadic: self.read_byte()?, + max_stack_size: self.read_byte()?, + instructions: self.read_vec(|r| r.read_u32())?, + constants: self.read_vec(|r| r.read_constant())?, + upvalues: self.read_vec(|r| r.read_upvalue())?, + protos: self.read_vec(|r| r.read_prototype())?, + line_infos: self.read_vec(|r| r.read_u32())?, + local_vars: self.read_vec(|r| r.read_local_variable())?, + upvalue_names: self.read_vec(|r| r.read_string())?, + }) + } + + fn read_string_impl(&mut self) -> io::Result> { + let mut sz = self.read_byte()? as usize; + if sz == 0 { + return Ok(None); + } + if sz == 0xff { + sz = self.read_u64()? as usize; + } + + let s = unsafe { String::from_utf8_unchecked(self.read_bytes(sz - 1)?) }; + Ok(Some(s)) + } +} + +#[cfg(all(target_arch = "x86_64", target_endian = "little"))] +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn reader_new() { + let mut src = b"0\n" as &[u8]; + let _reader = Reader::new(&mut src); + } + + #[test] + fn reader_read_byte() { + let mut src = b"\xff" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_byte().unwrap(), b'\xff'); + } + + #[test] + fn reader_read_bytes() { + let mut src = b"\xff\xfe\x00" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_bytes(2).unwrap(), vec![b'\xff', b'\xfe']); + } + + #[test] + fn reader_read_u32() { + let mut src = b"\x00\x01\x02\x03" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_u32().unwrap(), 0x03020100); + } + + #[test] + fn reader_read_u64() { + let mut src = b"\x00\x01\x02\x03\x04\x05\x06\x07" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_u64().unwrap(), 0x0706050403020100); + } + + #[test] + fn reader_read_integer() { + let mut src = b"\x00\x01\x02\x03\x04\x05\x06\x07" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_integer().unwrap(), 0x0706050403020100); + } + + #[test] + fn reader_read_number() { + let mut src = b"\x00\x00\x00\x00\x00\x28\x77\x40" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_number().unwrap(), 370.5); + } + + #[test] + fn reader_read_string() { + // null string + let mut src = b"\x00" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_string().unwrap(), ""); + + // short string + let mut src = b"\x02A" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_string().unwrap(), "A"); + + // long string + let mut src = b"\xff\x00\x01\x00\x00\x00\x00\x00\x00aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_string().unwrap(), "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + } + + #[test] + fn reader_read_vec() { + let mut src = b"\x03\x00\x00\x00bbb" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!( + reader.read_vec(|r| r.read_byte()).unwrap(), + vec![b'b', b'b', b'b'] + ); + } + + #[test] + fn reader_read_constant() { + let mut src = &[TAG_NIL] as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_constant().unwrap(), Constant::Nil); + + let mut src = &[TAG_BOOLEAN, b'\x01'] as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_constant().unwrap(), Constant::Boolean(true)); + + let mut src = &[TAG_BOOLEAN, b'\x00'] as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_constant().unwrap(), Constant::Boolean(false)); + + let mut src = &[ + TAG_NUMBER, b'\x00', b'\x00', b'\x00', b'\x00', b'\x00', b'\x28', b'\x77', b'\x40', + ] as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!(reader.read_constant().unwrap(), Constant::Number(370.5)); + + let mut src = &[ + TAG_INTEGER, + b'\x01', + b'\x02', + b'\x03', + b'\x04', + b'\x05', + b'\x06', + b'\x07', + b'\x08', + ] as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!( + reader.read_constant().unwrap(), + Constant::Integer(0x0807060504030201) + ); + + let mut src = &[TAG_SHORT_STR, b'\x00'] as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!( + reader.read_constant().unwrap(), + Constant::Str("".to_string()) + ); + + let mut src = &[TAG_SHORT_STR, b'\x02', b'A'] as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!( + reader.read_constant().unwrap(), + Constant::Str("A".to_string()) + ); + } + + #[test] + fn reader_read_upvalue() { + let mut src = b"\x01\x02" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!( + reader.read_upvalue().unwrap(), + UpValue { + instack: b'\x01', + idx: b'\x02', + } + ); + } + + #[test] + fn reader_read_local_variable() { + let mut src = b"\x00\x01\x00\x00\x00\x03\x00\x00\x00" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!( + reader.read_local_variable().unwrap(), + LocalVariable { + name: "".to_string(), + start_pc: 0x01, + end_pc: 0x03, + } + ); + } + + #[test] + fn reader_read_prototype() { + let mut src = b"\x00\x00\x01\x02\x03\x01\x02\x03\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" as &[u8]; + let mut reader = Reader::new(&mut src); + assert_eq!( + reader.read_prototype().unwrap(), + Prototype { + source: None, + first_line: 0x03020100, + last_line: 0x04030201, + params: b'\x00', + variadic: b'\x01', + max_stack_size: b'\x00', + instructions: Vec::new(), + constants: Vec::new(), + upvalues: Vec::new(), + protos: Vec::new(), + line_infos: Vec::new(), + local_vars: Vec::new(), + upvalue_names: Vec::new(), + } + ); + } +} diff --git a/src/main.rs b/src/main.rs index e7a11a9..bb80de8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,16 @@ +use std::env; +use std::fs::File; + +mod binary; +use binary::undump; + fn main() { + let mut args = env::args(); + if args.len() > 1 { + let f = File::open(args.nth(1).unwrap()).expect("Failed to open file"); + let proto = undump(f).expect("undump failed"); + println!("proto = {:?}", proto); + } + println!("Hello, world!"); }