From 7c2ba7c41dfd9c296d53109840461590b3936283 Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Fri, 9 May 2025 19:06:37 +0100 Subject: [PATCH 1/5] add bounds checks to the emit_aux code --- backend/emitaux.ml | 42 +++++++++++++++++++++++--------- runtime/caml/frame_descriptors.h | 8 ++++++ 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/backend/emitaux.ml b/backend/emitaux.ml index 37fef20bd23..f6294c18aa0 100644 --- a/backend/emitaux.ml +++ b/backend/emitaux.ml @@ -209,6 +209,27 @@ type emit_frame_actions = } let emit_frames a = + let emit_i16 n = + if n < -0x8000 || n > 0x7FFF + then + Misc.fatal_errorf + "attempting to emit signed 16-bit integer %d out of range" n + else a.efa_16 n + in + let emit_u16 n = + if n < 0 || n > 0xFFFF + then + Misc.fatal_errorf + "attempting to emit unsigned 16-bit integer %d out of range" n + else a.efa_16 n + in + let emit_i32 n = + if n < -0x8000_0000 || n > 0x7FFF_FFFF + then + Misc.fatal_errorf + "attempting to emit signed 32-bit integer %d out of range" n + else a.efa_32 (Int32.of_int n) + in let filenames = Hashtbl.create 7 in let label_filename name = try Hashtbl.find filenames name @@ -244,7 +265,6 @@ let emit_frames a = Label_table.add debuginfos key lbl; lbl in - let emit_32 n = n |> Int32.of_int |> a.efa_32 in let emit_frame fd = let flags = get_flags fd.fd_debuginfo in a.efa_label_rel fd.fd_lbl 0l; @@ -252,12 +272,12 @@ let emit_frames a = below. *) if fd.fd_long then ( - a.efa_16 Flambda_backend_flags.max_long_frames_threshold; + emit_i16 Flambda_backend_flags.max_long_frames_threshold; a.efa_align 4); - let emit_16_or_32 = if fd.fd_long then emit_32 else a.efa_16 in - emit_16_or_32 (fd.fd_frame_size + flags); - emit_16_or_32 (List.length fd.fd_live_offset); - List.iter emit_16_or_32 fd.fd_live_offset; + let emit_signed_16_or_32 = if fd.fd_long then emit_i32 else emit_i16 in + emit_signed_16_or_32 (fd.fd_frame_size + flags); + emit_signed_16_or_32 (List.length fd.fd_live_offset); + List.iter emit_signed_16_or_32 fd.fd_live_offset; (match fd.fd_debuginfo with | _ when flags = 0 -> () | Dbg_other dbg -> @@ -284,7 +304,7 @@ let emit_frames a = List.iter (fun Cmm.{ alloc_dbg; _ } -> if is_none_dbg alloc_dbg - then a.efa_32 Int32.zero + then emit_i32 0 else a.efa_label_rel (label_debuginfos false alloc_dbg) Int32.zero) dbg)); a.efa_align Arch.size_addr @@ -295,9 +315,9 @@ let emit_frames a = in let emit_defname (_filename, defname, loc) (file_lbl, lbl) = let emit_loc (start_chr, end_chr, end_offset) = - a.efa_16 start_chr; - a.efa_16 end_chr; - a.efa_32 (Int32.of_int end_offset) + emit_u16 start_chr; + emit_u16 end_chr; + emit_i32 end_offset in (* These must be 32-bit aligned, both because they contain a 32-bit value, and because emit_debuginfo assumes the low 2 bits of their addresses are @@ -389,7 +409,7 @@ let emit_frames a = a.efa_label_rel (label_defname d.dinfo_file defname loc) (Int64.to_int32 info); - a.efa_32 (Int64.to_int32 (Int64.shift_right info 32)); + emit_i32 (Int32.to_int (Int64.to_int32 (Int64.shift_right info 32))); match rest with [] -> () | d :: rest -> emit false d rest in match rdbg with [] -> assert false | d :: rest -> emit rs d rest diff --git a/runtime/caml/frame_descriptors.h b/runtime/caml/frame_descriptors.h index cfc9f7e262d..c7b8369279b 100644 --- a/runtime/caml/frame_descriptors.h +++ b/runtime/caml/frame_descriptors.h @@ -60,6 +60,14 @@ #define FRAME_RETURN_TO_C 0xFFFF #define FRAME_LONG_MARKER 0x7FFF + +/* CR sspies: The frame descriptions below are no longer accurate with respect to + whether the integers in the description are signed or unsigned. Due to recent compiler + changes, the live offsets can now be negative. To make the integer ranges explicit, + the frame table code in [emit_aux.ml] now contains explicit range checks. The fields + below, specifically, for frame_data, number of live offsets, and the live offsets + should now be signed. */ + typedef struct { int32_t retaddr_rel; /* offset of return address from &retaddr_rel */ uint16_t frame_data; /* frame size and various flags */ From 012c24d41cb027edd8b804670d8583a1380b0137 Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Tue, 13 May 2025 14:18:17 +0100 Subject: [PATCH 2/5] use signed number types and use unsigned integers for most fields of the frame descriptor --- backend/amd64/emit.ml | 17 ++++++++++++--- backend/emitaux.ml | 50 ++++++++++++++++++++++++++++++++----------- backend/emitaux.mli | 9 +++++--- 3 files changed, 58 insertions(+), 18 deletions(-) diff --git a/backend/amd64/emit.ml b/backend/amd64/emit.ml index fa64bdc79c5..a351be5a14e 100644 --- a/backend/amd64/emit.ml +++ b/backend/amd64/emit.ml @@ -2621,9 +2621,20 @@ let end_assembly () = emit_frames { efa_code_label = (fun l -> D.qword (ConstLabel (emit_label l))); efa_data_label = (fun l -> D.qword (ConstLabel (emit_label l))); - efa_8 = (fun n -> D.byte (const n)); - efa_16 = (fun n -> D.word (const n)); - efa_32 = (fun n -> D.long (const_32 n)); + (* Below, we emit constants of different sizes. The x86 emitter internally + uses int64 to represent their value and the directives [D.byte], ..., + [D.qword] control which assembly directive will be used to emit them. + Thus, even though we cast them to integers of larger sizes here (i.e., + [int64]), they do not take up more space than indicated by the + directive. *) + efa_i8 = (fun n -> D.byte (Const (Int64.of_int (Numbers.Int8.to_int n)))); + efa_i16 = + (fun n -> D.word (Const (Int64.of_int (Numbers.Int16.to_int n)))); + efa_i32 = (fun n -> D.long (Const (Int64.of_int32 n))); + efa_u8 = (fun n -> D.byte (Const (Int64.of_int (Numbers.Uint8.to_int n)))); + efa_u16 = + (fun n -> D.word (Const (Int64.of_int (Numbers.Uint16.to_int n)))); + efa_u32 = (fun n -> D.long (Const (Numbers.Uint32.to_int64 n))); efa_word = (fun n -> D.qword (const n)); efa_align = D.align ~data:true; efa_label_rel = diff --git a/backend/emitaux.ml b/backend/emitaux.ml index f6294c18aa0..0d0ea6a345d 100644 --- a/backend/emitaux.ml +++ b/backend/emitaux.ml @@ -198,9 +198,12 @@ let record_frame_descr ~label ~frame_size ~live_offset debuginfo = type emit_frame_actions = { efa_code_label : Label.t -> unit; efa_data_label : Label.t -> unit; - efa_8 : int -> unit; - efa_16 : int -> unit; - efa_32 : int32 -> unit; + efa_i8 : Numbers.Int8.t -> unit; + efa_i16 : Numbers.Int16.t -> unit; + efa_i32 : Int32.t -> unit; + efa_u8 : Numbers.Uint8.t -> unit; + efa_u16 : Numbers.Uint16.t -> unit; + efa_u32 : Numbers.Uint32.t -> unit; efa_word : int -> unit; efa_align : int -> unit; efa_label_rel : Label.t -> int32 -> unit; @@ -209,26 +212,40 @@ type emit_frame_actions = } let emit_frames a = + let emit_u8 n = + if n < 0 || n > 0xFF + then + Misc.fatal_errorf + "attempting to emit unsigned 8-bit integer %d out of range" n + else a.efa_u8 (Numbers.Uint8.of_nonnegative_int_exn n) + in let emit_i16 n = if n < -0x8000 || n > 0x7FFF then Misc.fatal_errorf "attempting to emit signed 16-bit integer %d out of range" n - else a.efa_16 n + else a.efa_i16 (Numbers.Int16.of_int_exn n) in let emit_u16 n = if n < 0 || n > 0xFFFF then Misc.fatal_errorf "attempting to emit unsigned 16-bit integer %d out of range" n - else a.efa_16 n + else a.efa_u16 (Numbers.Uint16.of_nonnegative_int_exn n) in let emit_i32 n = if n < -0x8000_0000 || n > 0x7FFF_FFFF then Misc.fatal_errorf "attempting to emit signed 32-bit integer %d out of range" n - else a.efa_32 (Int32.of_int n) + else a.efa_i32 (Int32.of_int n) + in + let emit_u32 n = + if n < 0 || n > 0xFFFF_FFFF + then + Misc.fatal_errorf + "attempting to emit unsigned 32-bit integer %d out of range" n + else a.efa_u32 (Numbers.Uint32.of_nonnegative_int_exn n) in let filenames = Hashtbl.create 7 in let label_filename name = @@ -272,12 +289,21 @@ let emit_frames a = below. *) if fd.fd_long then ( - emit_i16 Flambda_backend_flags.max_long_frames_threshold; + emit_u16 Flambda_backend_flags.max_long_frames_threshold; a.efa_align 4); let emit_signed_16_or_32 = if fd.fd_long then emit_i32 else emit_i16 in - emit_signed_16_or_32 (fd.fd_frame_size + flags); - emit_signed_16_or_32 (List.length fd.fd_live_offset); - List.iter emit_signed_16_or_32 fd.fd_live_offset; + let emit_unsigned_16_or_32 = if fd.fd_long then emit_u32 else emit_u16 in + let emit_live_offset n = + (* On runtime 4, the live offsets can be negative. As such, we emit them + as signed integers (and truncate the upper bound to 0x7f...ff); on + runtime 5 they are always unsigned. *) + if Config.runtime5 + then emit_unsigned_16_or_32 n + else emit_signed_16_or_32 n + in + emit_unsigned_16_or_32 (fd.fd_frame_size + flags); + emit_unsigned_16_or_32 (List.length fd.fd_live_offset); + List.iter emit_live_offset fd.fd_live_offset; (match fd.fd_debuginfo with | _ when flags = 0 -> () | Dbg_other dbg -> @@ -288,7 +314,7 @@ let emit_frames a = a.efa_label_rel (label_debuginfos true dbg) Int32.zero | Dbg_alloc dbg -> assert (List.length dbg < 256); - a.efa_8 (List.length dbg); + emit_u8 (List.length dbg); List.iter (fun Cmm.{ alloc_words; _ } -> (* Possible allocations range between 2 and 257 *) @@ -296,7 +322,7 @@ let emit_frames a = 2 <= alloc_words && alloc_words - 1 <= Config.max_young_wosize && Config.max_young_wosize <= 256); - a.efa_8 (alloc_words - 2)) + emit_u8 (alloc_words - 2)) dbg; if flags = 3 then ( diff --git a/backend/emitaux.mli b/backend/emitaux.mli index 3aa0ae31eca..6ef7c9bcfcb 100644 --- a/backend/emitaux.mli +++ b/backend/emitaux.mli @@ -100,9 +100,12 @@ val record_frame_descr : type emit_frame_actions = { efa_code_label : Label.t -> unit; efa_data_label : Label.t -> unit; - efa_8 : int -> unit; - efa_16 : int -> unit; - efa_32 : int32 -> unit; + efa_i8 : Numbers.Int8.t -> unit; + efa_i16 : Numbers.Int16.t -> unit; + efa_i32 : Int32.t -> unit; + efa_u8 : Numbers.Uint8.t -> unit; + efa_u16 : Numbers.Uint16.t -> unit; + efa_u32 : Numbers.Uint32.t -> unit; efa_word : int -> unit; efa_align : int -> unit; efa_label_rel : Label.t -> int32 -> unit; From 308a308e2754f8c5f165662e8b4897da5286101b Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Tue, 13 May 2025 14:18:30 +0100 Subject: [PATCH 3/5] update comment --- runtime/caml/frame_descriptors.h | 7 ------- runtime4/caml/stack.h | 4 ++++ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/runtime/caml/frame_descriptors.h b/runtime/caml/frame_descriptors.h index c7b8369279b..3e1424d1822 100644 --- a/runtime/caml/frame_descriptors.h +++ b/runtime/caml/frame_descriptors.h @@ -61,13 +61,6 @@ #define FRAME_LONG_MARKER 0x7FFF -/* CR sspies: The frame descriptions below are no longer accurate with respect to - whether the integers in the description are signed or unsigned. Due to recent compiler - changes, the live offsets can now be negative. To make the integer ranges explicit, - the frame table code in [emit_aux.ml] now contains explicit range checks. The fields - below, specifically, for frame_data, number of live offsets, and the live offsets - should now be signed. */ - typedef struct { int32_t retaddr_rel; /* offset of return address from &retaddr_rel */ uint16_t frame_data; /* frame size and various flags */ diff --git a/runtime4/caml/stack.h b/runtime4/caml/stack.h index 15877b6a4d2..44dd62581e6 100644 --- a/runtime4/caml/stack.h +++ b/runtime4/caml/stack.h @@ -86,6 +86,10 @@ struct caml_context { }; /* Structure of frame descriptors */ +/* Warning: The live offsets of frame descriptors are declared as unsigned integers below. + However, on runtime 4, they can also be negative, so values above 0x7f...ff should be + interpreted as negative. */ + typedef struct { int32_t retaddr_rel; unsigned short frame_size; From 59aafeb13a9eebad979652729ffaaa2884c0ebc9 Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Tue, 13 May 2025 14:22:59 +0100 Subject: [PATCH 4/5] update Arm --- backend/arm64/emit.ml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/arm64/emit.ml b/backend/arm64/emit.ml index c04d1780a48..cec8ea596a3 100644 --- a/backend/arm64/emit.ml +++ b/backend/arm64/emit.ml @@ -2219,10 +2219,12 @@ let end_assembly () = let lbl = label_to_asm_label ~section:Data lbl in D.type_label ~ty:Object lbl; D.label lbl); - efa_8 = (fun n -> D.uint8 (Numbers.Uint8.of_nonnegative_int_exn n)); - efa_16 = (fun n -> D.uint16 (Numbers.Uint16.of_nonnegative_int_exn n)); - (* CR sspies: for some reason, we can get negative numbers here *) - efa_32 = (fun n -> D.int32 n); + efa_i8 = (fun n -> D.int8 n); + efa_i16 = (fun n -> D.int16 n); + efa_i32 = (fun n -> D.int32 n); + efa_u8 = (fun n -> D.uint8 n); + efa_u16 = (fun n -> D.uint16 n); + efa_u32 = (fun n -> D.uint32 n); efa_word = (fun n -> D.targetint (Targetint.of_int_exn n)); efa_align = (fun n -> D.align ~bytes:n); efa_label_rel = From 6121838e8a01a68a8cb0bcb2b17cdc03a1819f8e Mon Sep 17 00:00:00 2001 From: Simon Spies Date: Wed, 14 May 2025 11:56:51 +0100 Subject: [PATCH 5/5] review --- backend/emitaux.ml | 46 +++++++++++++++++--------------- runtime/caml/frame_descriptors.h | 1 - 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/backend/emitaux.ml b/backend/emitaux.ml index 0d0ea6a345d..26ce15c7e66 100644 --- a/backend/emitaux.ml +++ b/backend/emitaux.ml @@ -212,40 +212,39 @@ type emit_frame_actions = } let emit_frames a = + (* The emit functions below perform bounds checks for the corresponding ranges + via the conversion functions that raise exceptions. [int32] does not have + such a function so we perform the check manually here. *) let emit_u8 n = - if n < 0 || n > 0xFF - then - Misc.fatal_errorf - "attempting to emit unsigned 8-bit integer %d out of range" n - else a.efa_u8 (Numbers.Uint8.of_nonnegative_int_exn n) + let n = Numbers.Uint8.of_nonnegative_int_exn n in + a.efa_u8 n + in + let[@warning "-26"] emit_i8 n = + (* unused, but here for completeness *) + let n = Numbers.Int8.of_int_exn n in + a.efa_i8 n in let emit_i16 n = - if n < -0x8000 || n > 0x7FFF - then - Misc.fatal_errorf - "attempting to emit signed 16-bit integer %d out of range" n - else a.efa_i16 (Numbers.Int16.of_int_exn n) + let n = Numbers.Int16.of_int_exn n in + a.efa_i16 n in let emit_u16 n = - if n < 0 || n > 0xFFFF - then - Misc.fatal_errorf - "attempting to emit unsigned 16-bit integer %d out of range" n - else a.efa_u16 (Numbers.Uint16.of_nonnegative_int_exn n) + let n = Numbers.Uint16.of_nonnegative_int_exn n in + a.efa_u16 n in let emit_i32 n = - if n < -0x8000_0000 || n > 0x7FFF_FFFF + let min_i32 = Int64.neg (Int64.shift_left 1L 31) (* -0x8000_0000 *) + and max_i32 = Int64.sub (Int64.shift_left 1L 31) 1L (* 0x7fff_ffff *) + and n_64 = Int64.of_int n in + if Int64.compare n_64 min_i32 < 0 || Int64.compare n_64 max_i32 > 0 then Misc.fatal_errorf "attempting to emit signed 32-bit integer %d out of range" n else a.efa_i32 (Int32.of_int n) in let emit_u32 n = - if n < 0 || n > 0xFFFF_FFFF - then - Misc.fatal_errorf - "attempting to emit unsigned 32-bit integer %d out of range" n - else a.efa_u32 (Numbers.Uint32.of_nonnegative_int_exn n) + let n = Numbers.Uint32.of_nonnegative_int_exn n in + a.efa_u32 n in let filenames = Hashtbl.create 7 in let label_filename name = @@ -435,7 +434,10 @@ let emit_frames a = a.efa_label_rel (label_defname d.dinfo_file defname loc) (Int64.to_int32 info); - emit_i32 (Int32.to_int (Int64.to_int32 (Int64.shift_right info 32))); + (* We use [efa_i32] directly here instead of [emit_i32] to avoid a + round-trip via [int], which would break on 32-bit platforms. The right + shift ensures that the integer is in range of [int32]. *) + a.efa_i32 (Int64.to_int32 (Int64.shift_right info 32)); match rest with [] -> () | d :: rest -> emit false d rest in match rdbg with [] -> assert false | d :: rest -> emit rs d rest diff --git a/runtime/caml/frame_descriptors.h b/runtime/caml/frame_descriptors.h index 3e1424d1822..cfc9f7e262d 100644 --- a/runtime/caml/frame_descriptors.h +++ b/runtime/caml/frame_descriptors.h @@ -60,7 +60,6 @@ #define FRAME_RETURN_TO_C 0xFFFF #define FRAME_LONG_MARKER 0x7FFF - typedef struct { int32_t retaddr_rel; /* offset of return address from &retaddr_rel */ uint16_t frame_data; /* frame size and various flags */