From 7c2ba7c41dfd9c296d53109840461590b3936283 Mon Sep 17 00:00:00 2001
From: Simon Spies <sspies@janestreet.com>
Date: Fri, 9 May 2025 19:06:37 +0100
Subject: [PATCH 1/5] add bounds checks to the emit_aux code

---
 backend/emitaux.ml               | 42 +++++++++++++++++++++++---------
 runtime/caml/frame_descriptors.h |  8 ++++++
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/backend/emitaux.ml b/backend/emitaux.ml
index 37fef20bd23..f6294c18aa0 100644
--- a/backend/emitaux.ml
+++ b/backend/emitaux.ml
@@ -209,6 +209,27 @@ type emit_frame_actions =
   }
 
 let emit_frames a =
+  let emit_i16 n =
+    if n < -0x8000 || n > 0x7FFF
+    then
+      Misc.fatal_errorf
+        "attempting to emit signed 16-bit integer %d out of range" n
+    else a.efa_16 n
+  in
+  let emit_u16 n =
+    if n < 0 || n > 0xFFFF
+    then
+      Misc.fatal_errorf
+        "attempting to emit unsigned 16-bit integer %d out of range" n
+    else a.efa_16 n
+  in
+  let emit_i32 n =
+    if n < -0x8000_0000 || n > 0x7FFF_FFFF
+    then
+      Misc.fatal_errorf
+        "attempting to emit signed 32-bit integer %d out of range" n
+    else a.efa_32 (Int32.of_int n)
+  in
   let filenames = Hashtbl.create 7 in
   let label_filename name =
     try Hashtbl.find filenames name
@@ -244,7 +265,6 @@ let emit_frames a =
       Label_table.add debuginfos key lbl;
       lbl
   in
-  let emit_32 n = n |> Int32.of_int |> a.efa_32 in
   let emit_frame fd =
     let flags = get_flags fd.fd_debuginfo in
     a.efa_label_rel fd.fd_lbl 0l;
@@ -252,12 +272,12 @@ let emit_frames a =
        below. *)
     if fd.fd_long
     then (
-      a.efa_16 Flambda_backend_flags.max_long_frames_threshold;
+      emit_i16 Flambda_backend_flags.max_long_frames_threshold;
       a.efa_align 4);
-    let emit_16_or_32 = if fd.fd_long then emit_32 else a.efa_16 in
-    emit_16_or_32 (fd.fd_frame_size + flags);
-    emit_16_or_32 (List.length fd.fd_live_offset);
-    List.iter emit_16_or_32 fd.fd_live_offset;
+    let emit_signed_16_or_32 = if fd.fd_long then emit_i32 else emit_i16 in
+    emit_signed_16_or_32 (fd.fd_frame_size + flags);
+    emit_signed_16_or_32 (List.length fd.fd_live_offset);
+    List.iter emit_signed_16_or_32 fd.fd_live_offset;
     (match fd.fd_debuginfo with
     | _ when flags = 0 -> ()
     | Dbg_other dbg ->
@@ -284,7 +304,7 @@ let emit_frames a =
         List.iter
           (fun Cmm.{ alloc_dbg; _ } ->
             if is_none_dbg alloc_dbg
-            then a.efa_32 Int32.zero
+            then emit_i32 0
             else a.efa_label_rel (label_debuginfos false alloc_dbg) Int32.zero)
           dbg));
     a.efa_align Arch.size_addr
@@ -295,9 +315,9 @@ let emit_frames a =
   in
   let emit_defname (_filename, defname, loc) (file_lbl, lbl) =
     let emit_loc (start_chr, end_chr, end_offset) =
-      a.efa_16 start_chr;
-      a.efa_16 end_chr;
-      a.efa_32 (Int32.of_int end_offset)
+      emit_u16 start_chr;
+      emit_u16 end_chr;
+      emit_i32 end_offset
     in
     (* These must be 32-bit aligned, both because they contain a 32-bit value,
        and because emit_debuginfo assumes the low 2 bits of their addresses are
@@ -389,7 +409,7 @@ let emit_frames a =
       a.efa_label_rel
         (label_defname d.dinfo_file defname loc)
         (Int64.to_int32 info);
-      a.efa_32 (Int64.to_int32 (Int64.shift_right info 32));
+      emit_i32 (Int32.to_int (Int64.to_int32 (Int64.shift_right info 32)));
       match rest with [] -> () | d :: rest -> emit false d rest
     in
     match rdbg with [] -> assert false | d :: rest -> emit rs d rest
diff --git a/runtime/caml/frame_descriptors.h b/runtime/caml/frame_descriptors.h
index cfc9f7e262d..c7b8369279b 100644
--- a/runtime/caml/frame_descriptors.h
+++ b/runtime/caml/frame_descriptors.h
@@ -60,6 +60,14 @@
 #define FRAME_RETURN_TO_C 0xFFFF
 #define FRAME_LONG_MARKER 0x7FFF
 
+
+/* CR sspies: The frame descriptions below are no longer accurate with respect to
+   whether the integers in the description are signed or unsigned. Due to recent compiler
+   changes, the live offsets can now be negative. To make the integer ranges explicit,
+   the frame table code in [emit_aux.ml] now contains explicit range checks. The fields
+   below, specifically, for frame_data, number of live offsets, and the live offsets
+   should now be signed.  */
+
 typedef struct {
   int32_t retaddr_rel; /* offset of return address from &retaddr_rel */
   uint16_t frame_data; /* frame size and various flags */

From 012c24d41cb027edd8b804670d8583a1380b0137 Mon Sep 17 00:00:00 2001
From: Simon Spies <sspies@janestreet.com>
Date: Tue, 13 May 2025 14:18:17 +0100
Subject: [PATCH 2/5] use signed number types and use unsigned integers for
 most fields of the frame descriptor

---
 backend/amd64/emit.ml | 17 ++++++++++++---
 backend/emitaux.ml    | 50 ++++++++++++++++++++++++++++++++-----------
 backend/emitaux.mli   |  9 +++++---
 3 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/backend/amd64/emit.ml b/backend/amd64/emit.ml
index fa64bdc79c5..a351be5a14e 100644
--- a/backend/amd64/emit.ml
+++ b/backend/amd64/emit.ml
@@ -2621,9 +2621,20 @@ let end_assembly () =
   emit_frames
     { efa_code_label = (fun l -> D.qword (ConstLabel (emit_label l)));
       efa_data_label = (fun l -> D.qword (ConstLabel (emit_label l)));
-      efa_8 = (fun n -> D.byte (const n));
-      efa_16 = (fun n -> D.word (const n));
-      efa_32 = (fun n -> D.long (const_32 n));
+      (* Below, we emit constants of different sizes. The x86 emitter internally
+         uses int64 to represent their value and the directives [D.byte], ...,
+         [D.qword] control which assembly directive will be used to emit them.
+         Thus, even though we cast them to integers of larger sizes here (i.e.,
+         [int64]), they do not take up more space than indicated by the
+         directive. *)
+      efa_i8 = (fun n -> D.byte (Const (Int64.of_int (Numbers.Int8.to_int n))));
+      efa_i16 =
+        (fun n -> D.word (Const (Int64.of_int (Numbers.Int16.to_int n))));
+      efa_i32 = (fun n -> D.long (Const (Int64.of_int32 n)));
+      efa_u8 = (fun n -> D.byte (Const (Int64.of_int (Numbers.Uint8.to_int n))));
+      efa_u16 =
+        (fun n -> D.word (Const (Int64.of_int (Numbers.Uint16.to_int n))));
+      efa_u32 = (fun n -> D.long (Const (Numbers.Uint32.to_int64 n)));
       efa_word = (fun n -> D.qword (const n));
       efa_align = D.align ~data:true;
       efa_label_rel =
diff --git a/backend/emitaux.ml b/backend/emitaux.ml
index f6294c18aa0..0d0ea6a345d 100644
--- a/backend/emitaux.ml
+++ b/backend/emitaux.ml
@@ -198,9 +198,12 @@ let record_frame_descr ~label ~frame_size ~live_offset debuginfo =
 type emit_frame_actions =
   { efa_code_label : Label.t -> unit;
     efa_data_label : Label.t -> unit;
-    efa_8 : int -> unit;
-    efa_16 : int -> unit;
-    efa_32 : int32 -> unit;
+    efa_i8 : Numbers.Int8.t -> unit;
+    efa_i16 : Numbers.Int16.t -> unit;
+    efa_i32 : Int32.t -> unit;
+    efa_u8 : Numbers.Uint8.t -> unit;
+    efa_u16 : Numbers.Uint16.t -> unit;
+    efa_u32 : Numbers.Uint32.t -> unit;
     efa_word : int -> unit;
     efa_align : int -> unit;
     efa_label_rel : Label.t -> int32 -> unit;
@@ -209,26 +212,40 @@ type emit_frame_actions =
   }
 
 let emit_frames a =
+  let emit_u8 n =
+    if n < 0 || n > 0xFF
+    then
+      Misc.fatal_errorf
+        "attempting to emit unsigned 8-bit integer %d out of range" n
+    else a.efa_u8 (Numbers.Uint8.of_nonnegative_int_exn n)
+  in
   let emit_i16 n =
     if n < -0x8000 || n > 0x7FFF
     then
       Misc.fatal_errorf
         "attempting to emit signed 16-bit integer %d out of range" n
-    else a.efa_16 n
+    else a.efa_i16 (Numbers.Int16.of_int_exn n)
   in
   let emit_u16 n =
     if n < 0 || n > 0xFFFF
     then
       Misc.fatal_errorf
         "attempting to emit unsigned 16-bit integer %d out of range" n
-    else a.efa_16 n
+    else a.efa_u16 (Numbers.Uint16.of_nonnegative_int_exn n)
   in
   let emit_i32 n =
     if n < -0x8000_0000 || n > 0x7FFF_FFFF
     then
       Misc.fatal_errorf
         "attempting to emit signed 32-bit integer %d out of range" n
-    else a.efa_32 (Int32.of_int n)
+    else a.efa_i32 (Int32.of_int n)
+  in
+  let emit_u32 n =
+    if n < 0 || n > 0xFFFF_FFFF
+    then
+      Misc.fatal_errorf
+        "attempting to emit unsigned 32-bit integer %d out of range" n
+    else a.efa_u32 (Numbers.Uint32.of_nonnegative_int_exn n)
   in
   let filenames = Hashtbl.create 7 in
   let label_filename name =
@@ -272,12 +289,21 @@ let emit_frames a =
        below. *)
     if fd.fd_long
     then (
-      emit_i16 Flambda_backend_flags.max_long_frames_threshold;
+      emit_u16 Flambda_backend_flags.max_long_frames_threshold;
       a.efa_align 4);
     let emit_signed_16_or_32 = if fd.fd_long then emit_i32 else emit_i16 in
-    emit_signed_16_or_32 (fd.fd_frame_size + flags);
-    emit_signed_16_or_32 (List.length fd.fd_live_offset);
-    List.iter emit_signed_16_or_32 fd.fd_live_offset;
+    let emit_unsigned_16_or_32 = if fd.fd_long then emit_u32 else emit_u16 in
+    let emit_live_offset n =
+      (* On runtime 4, the live offsets can be negative. As such, we emit them
+         as signed integers (and truncate the upper bound to 0x7f...ff); on
+         runtime 5 they are always unsigned. *)
+      if Config.runtime5
+      then emit_unsigned_16_or_32 n
+      else emit_signed_16_or_32 n
+    in
+    emit_unsigned_16_or_32 (fd.fd_frame_size + flags);
+    emit_unsigned_16_or_32 (List.length fd.fd_live_offset);
+    List.iter emit_live_offset fd.fd_live_offset;
     (match fd.fd_debuginfo with
     | _ when flags = 0 -> ()
     | Dbg_other dbg ->
@@ -288,7 +314,7 @@ let emit_frames a =
       a.efa_label_rel (label_debuginfos true dbg) Int32.zero
     | Dbg_alloc dbg ->
       assert (List.length dbg < 256);
-      a.efa_8 (List.length dbg);
+      emit_u8 (List.length dbg);
       List.iter
         (fun Cmm.{ alloc_words; _ } ->
           (* Possible allocations range between 2 and 257 *)
@@ -296,7 +322,7 @@ let emit_frames a =
             2 <= alloc_words
             && alloc_words - 1 <= Config.max_young_wosize
             && Config.max_young_wosize <= 256);
-          a.efa_8 (alloc_words - 2))
+          emit_u8 (alloc_words - 2))
         dbg;
       if flags = 3
       then (
diff --git a/backend/emitaux.mli b/backend/emitaux.mli
index 3aa0ae31eca..6ef7c9bcfcb 100644
--- a/backend/emitaux.mli
+++ b/backend/emitaux.mli
@@ -100,9 +100,12 @@ val record_frame_descr :
 type emit_frame_actions =
   { efa_code_label : Label.t -> unit;
     efa_data_label : Label.t -> unit;
-    efa_8 : int -> unit;
-    efa_16 : int -> unit;
-    efa_32 : int32 -> unit;
+    efa_i8 : Numbers.Int8.t -> unit;
+    efa_i16 : Numbers.Int16.t -> unit;
+    efa_i32 : Int32.t -> unit;
+    efa_u8 : Numbers.Uint8.t -> unit;
+    efa_u16 : Numbers.Uint16.t -> unit;
+    efa_u32 : Numbers.Uint32.t -> unit;
     efa_word : int -> unit;
     efa_align : int -> unit;
     efa_label_rel : Label.t -> int32 -> unit;

From 308a308e2754f8c5f165662e8b4897da5286101b Mon Sep 17 00:00:00 2001
From: Simon Spies <sspies@janestreet.com>
Date: Tue, 13 May 2025 14:18:30 +0100
Subject: [PATCH 3/5] update comment

---
 runtime/caml/frame_descriptors.h | 7 -------
 runtime4/caml/stack.h            | 4 ++++
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/runtime/caml/frame_descriptors.h b/runtime/caml/frame_descriptors.h
index c7b8369279b..3e1424d1822 100644
--- a/runtime/caml/frame_descriptors.h
+++ b/runtime/caml/frame_descriptors.h
@@ -61,13 +61,6 @@
 #define FRAME_LONG_MARKER 0x7FFF
 
 
-/* CR sspies: The frame descriptions below are no longer accurate with respect to
-   whether the integers in the description are signed or unsigned. Due to recent compiler
-   changes, the live offsets can now be negative. To make the integer ranges explicit,
-   the frame table code in [emit_aux.ml] now contains explicit range checks. The fields
-   below, specifically, for frame_data, number of live offsets, and the live offsets
-   should now be signed.  */
-
 typedef struct {
   int32_t retaddr_rel; /* offset of return address from &retaddr_rel */
   uint16_t frame_data; /* frame size and various flags */
diff --git a/runtime4/caml/stack.h b/runtime4/caml/stack.h
index 15877b6a4d2..44dd62581e6 100644
--- a/runtime4/caml/stack.h
+++ b/runtime4/caml/stack.h
@@ -86,6 +86,10 @@ struct caml_context {
 };
 
 /* Structure of frame descriptors */
+/* Warning: The live offsets of frame descriptors are declared as unsigned integers below.
+   However, on runtime 4, they can also be negative, so values above 0x7f...ff should be
+   interpreted as negative.  */
+
 typedef struct {
   int32_t retaddr_rel;
   unsigned short frame_size;

From 59aafeb13a9eebad979652729ffaaa2884c0ebc9 Mon Sep 17 00:00:00 2001
From: Simon Spies <sspies@janestreet.com>
Date: Tue, 13 May 2025 14:22:59 +0100
Subject: [PATCH 4/5] update Arm

---
 backend/arm64/emit.ml | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/backend/arm64/emit.ml b/backend/arm64/emit.ml
index c04d1780a48..cec8ea596a3 100644
--- a/backend/arm64/emit.ml
+++ b/backend/arm64/emit.ml
@@ -2219,10 +2219,12 @@ let end_assembly () =
           let lbl = label_to_asm_label ~section:Data lbl in
           D.type_label ~ty:Object lbl;
           D.label lbl);
-      efa_8 = (fun n -> D.uint8 (Numbers.Uint8.of_nonnegative_int_exn n));
-      efa_16 = (fun n -> D.uint16 (Numbers.Uint16.of_nonnegative_int_exn n));
-      (* CR sspies: for some reason, we can get negative numbers here *)
-      efa_32 = (fun n -> D.int32 n);
+      efa_i8 = (fun n -> D.int8 n);
+      efa_i16 = (fun n -> D.int16 n);
+      efa_i32 = (fun n -> D.int32 n);
+      efa_u8 = (fun n -> D.uint8 n);
+      efa_u16 = (fun n -> D.uint16 n);
+      efa_u32 = (fun n -> D.uint32 n);
       efa_word = (fun n -> D.targetint (Targetint.of_int_exn n));
       efa_align = (fun n -> D.align ~bytes:n);
       efa_label_rel =

From 6121838e8a01a68a8cb0bcb2b17cdc03a1819f8e Mon Sep 17 00:00:00 2001
From: Simon Spies <sspies@janestreet.com>
Date: Wed, 14 May 2025 11:56:51 +0100
Subject: [PATCH 5/5] review

---
 backend/emitaux.ml               | 46 +++++++++++++++++---------------
 runtime/caml/frame_descriptors.h |  1 -
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/backend/emitaux.ml b/backend/emitaux.ml
index 0d0ea6a345d..26ce15c7e66 100644
--- a/backend/emitaux.ml
+++ b/backend/emitaux.ml
@@ -212,40 +212,39 @@ type emit_frame_actions =
   }
 
 let emit_frames a =
+  (* The emit functions below perform bounds checks for the corresponding ranges
+     via the conversion functions that raise exceptions. [int32] does not have
+     such a function so we perform the check manually here. *)
   let emit_u8 n =
-    if n < 0 || n > 0xFF
-    then
-      Misc.fatal_errorf
-        "attempting to emit unsigned 8-bit integer %d out of range" n
-    else a.efa_u8 (Numbers.Uint8.of_nonnegative_int_exn n)
+    let n = Numbers.Uint8.of_nonnegative_int_exn n in
+    a.efa_u8 n
+  in
+  let[@warning "-26"] emit_i8 n =
+    (* unused, but here for completeness *)
+    let n = Numbers.Int8.of_int_exn n in
+    a.efa_i8 n
   in
   let emit_i16 n =
-    if n < -0x8000 || n > 0x7FFF
-    then
-      Misc.fatal_errorf
-        "attempting to emit signed 16-bit integer %d out of range" n
-    else a.efa_i16 (Numbers.Int16.of_int_exn n)
+    let n = Numbers.Int16.of_int_exn n in
+    a.efa_i16 n
   in
   let emit_u16 n =
-    if n < 0 || n > 0xFFFF
-    then
-      Misc.fatal_errorf
-        "attempting to emit unsigned 16-bit integer %d out of range" n
-    else a.efa_u16 (Numbers.Uint16.of_nonnegative_int_exn n)
+    let n = Numbers.Uint16.of_nonnegative_int_exn n in
+    a.efa_u16 n
   in
   let emit_i32 n =
-    if n < -0x8000_0000 || n > 0x7FFF_FFFF
+    let min_i32 = Int64.neg (Int64.shift_left 1L 31) (* -0x8000_0000 *)
+    and max_i32 = Int64.sub (Int64.shift_left 1L 31) 1L (* 0x7fff_ffff *)
+    and n_64 = Int64.of_int n in
+    if Int64.compare n_64 min_i32 < 0 || Int64.compare n_64 max_i32 > 0
     then
       Misc.fatal_errorf
         "attempting to emit signed 32-bit integer %d out of range" n
     else a.efa_i32 (Int32.of_int n)
   in
   let emit_u32 n =
-    if n < 0 || n > 0xFFFF_FFFF
-    then
-      Misc.fatal_errorf
-        "attempting to emit unsigned 32-bit integer %d out of range" n
-    else a.efa_u32 (Numbers.Uint32.of_nonnegative_int_exn n)
+    let n = Numbers.Uint32.of_nonnegative_int_exn n in
+    a.efa_u32 n
   in
   let filenames = Hashtbl.create 7 in
   let label_filename name =
@@ -435,7 +434,10 @@ let emit_frames a =
       a.efa_label_rel
         (label_defname d.dinfo_file defname loc)
         (Int64.to_int32 info);
-      emit_i32 (Int32.to_int (Int64.to_int32 (Int64.shift_right info 32)));
+      (* We use [efa_i32] directly here instead of [emit_i32] to avoid a
+         round-trip via [int], which would break on 32-bit platforms. The right
+         shift ensures that the integer is in range of [int32]. *)
+      a.efa_i32 (Int64.to_int32 (Int64.shift_right info 32));
       match rest with [] -> () | d :: rest -> emit false d rest
     in
     match rdbg with [] -> assert false | d :: rest -> emit rs d rest
diff --git a/runtime/caml/frame_descriptors.h b/runtime/caml/frame_descriptors.h
index 3e1424d1822..cfc9f7e262d 100644
--- a/runtime/caml/frame_descriptors.h
+++ b/runtime/caml/frame_descriptors.h
@@ -60,7 +60,6 @@
 #define FRAME_RETURN_TO_C 0xFFFF
 #define FRAME_LONG_MARKER 0x7FFF
 
-
 typedef struct {
   int32_t retaddr_rel; /* offset of return address from &retaddr_rel */
   uint16_t frame_data; /* frame size and various flags */