From 6883d5b3fddd1316a5b752f29640b2d9ea3c352e Mon Sep 17 00:00:00 2001 From: Greta Yorsh <45005955+gretay-js@users.noreply.github.com> Date: Tue, 18 Mar 2025 16:36:56 +0000 Subject: [PATCH 1/4] arm64 float32 regs --- backend/arm64/cfg_selection.ml | 3 +- backend/arm64/proc.ml | 91 +++++++++++++++++--------------- backend/arm64/selection_utils.ml | 6 +-- 3 files changed, 53 insertions(+), 47 deletions(-) diff --git a/backend/arm64/cfg_selection.ml b/backend/arm64/cfg_selection.ml index 89de872bc44..5d744944f24 100644 --- a/backend/arm64/cfg_selection.ml +++ b/backend/arm64/cfg_selection.ml @@ -159,8 +159,9 @@ class selector = | [Cop (Cmulf Float64, args, _); arg] -> specific Inegmulsubf, arg :: args | _ -> super#select_operation op args dbg ~label_after) + | Cpackf32 -> specific (Isimd Zip1_f32), args (* Recognize floating-point square root *) - | Cextcall { func = "sqrt" } -> specific Isqrtf, args + | Cextcall { func = "sqrt" | "sqrtf" } -> specific Isqrtf, args | Cextcall { func; builtin = true; _ } -> ( match Simd_selection.select_operation_cfg func args with | Some (op, args) -> Basic (Op op), args diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml index 0fb2c21c22e..e85a99f2512 100644 --- a/backend/arm64/proc.ml +++ b/backend/arm64/proc.ml @@ -59,53 +59,50 @@ let float_reg_name = "d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23"; "d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |] +let float32_reg_name = + [| "s0"; "s1"; "s2"; "s3"; "s4"; "s5"; "s6"; "s7"; + "s8"; "s9"; "s10"; "s11"; "s12"; "s13"; "s14"; "s15"; + "s16"; "s17"; "s18"; "s19"; "s20"; "s21"; "s22"; "s23"; + "s24"; "s25"; "s26"; "s27"; "s28"; "s29"; "s30"; "s31" |] + let num_register_classes = 2 let register_class r = match (r.typ : Cmm.machtype_component) with | Val | Int | Addr -> 0 - | Float -> 1 | Vec128 -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got vec128 register" - | Float32 -> - (* CR mslater: (float32) arm64 *) - fatal_error "arm64: got float32 register" | Valx2 -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got valx2 register" + | Float | Float32 -> 1 let num_stack_slot_classes = 2 let stack_slot_class typ = match (typ : Cmm.machtype_component) with | Val | Int | Addr -> 0 - | Float -> 1 | Vec128 -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got vec128 register" - | Float32 -> - (* CR mslater: (float32) arm64 *) - fatal_error "arm64: got float32 register" | Valx2 -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got valx2 register" + | Float | Float32 -> 1 let types_are_compatible left right = match left.typ, right.typ with | (Int | Val | Addr), (Int | Val | Addr) - | Float, Float -> - true - | Float32, _ | _, Float32 -> - (* CR mslater: (float32) arm64 *) - fatal_error "arm64: got float32 register" + | Float, Float -> true + | Float32, Float32 -> true | Vec128, _ | _, Vec128 -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got vec128 register" | Valx2, _ | _, Valx2 -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got valx2 register" - | (Int | Val | Addr | Float), _ -> false + | (Int | Val | Addr | Float | Float32), _ -> false let stack_class_tag c = match c with @@ -129,12 +126,13 @@ let register_name ty r = (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got vec128 register" | Float32 -> - (* CR mslater: (float32) arm64 *) - fatal_error "arm64: got float32 register" + float32_reg_name.(r - first_available_register.(1)) | Valx2 -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got valx2 register" +(* CR gyorsh for xclerc: [rotate_registers] used in [coloring] on Mach, + but not in IRC on CFG. Are we dropping an optimization here? *) let rotate_registers = true (* Representation of hard registers by pseudo-registers *) @@ -146,15 +144,17 @@ let hard_int_reg = done; v -let hard_float_reg = +let hard_float_reg_gen kind = let v = Array.make 32 Reg.dummy in for i = 0 to 31 do - v.(i) <- Reg.at_location Float (Reg(100 + i)) + v.(i) <- Reg.at_location kind (Reg(100 + i)) done; v +let hard_float_reg = hard_float_reg_gen Float +let hard_float32_reg = hard_float_reg_gen Float32 let all_phys_regs = - Array.append hard_int_reg hard_float_reg + Array.concat [hard_int_reg; hard_float_reg; hard_float32_reg; ] let precolored_regs = let phys_regs = Reg.set_of_array all_phys_regs in @@ -167,19 +167,15 @@ let phys_reg ty n = | Vec128 -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got vec128 register" - | Float32 -> - (* CR mslater: (float32) arm64 *) - fatal_error "arm64: got float32 register" | Valx2 -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got valx2 register" + | Float32 -> hard_float32_reg.(n - 100) let gc_regs_offset _ = - (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: gc_reg_offset unreachable" let reg_x8 = phys_reg Int 8 -let reg_d7 = phys_reg Float 107 let stack_slot slot ty = Reg.at_location ty (Stack slot) @@ -198,16 +194,19 @@ let loc_int last_int make_stack int ofs = ofs := !ofs + size_int; l end -let loc_float last_float make_stack float ofs = +let loc_float_gen kind size last_float make_stack float ofs = if !float <= last_float then begin - let l = phys_reg Float !float in + let l = phys_reg kind !float in incr float; l end else begin - ofs := Misc.align !ofs size_float; - let l = stack_slot (make_stack !ofs) Float in - ofs := !ofs + size_float; l + ofs := Misc.align !ofs size; + let l = stack_slot (make_stack !ofs) kind in + ofs := !ofs + size; l end +let loc_float = loc_float_gen Float Arch.size_float +(* float32 slots still take up a full word *) +let loc_float32 = loc_float_gen Float32 Arch.size_float let loc_int32 last_int make_stack int ofs = if !int <= last_int then begin let l = phys_reg Int !int in @@ -234,8 +233,7 @@ let calling_conventions (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got vec128 register" | Float32 -> - (* CR mslater: (float32) arm64 *) - fatal_error "arm64: got float32 register" + loc.(i) <- loc_float32 last_float make_stack float ofs | Valx2 -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got valx2 register" @@ -305,8 +303,7 @@ let external_calling_conventions (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got vec128 register" | XFloat32 -> - (* CR mslater: (float32) arm64 *) - fatal_error "arm64: got float32 register" + loc.(i) <- [| loc_float32 last_float make_stack float ofs |] end) ty_args; (loc, Misc.align !ofs 16) (* keep stack 16-aligned *) @@ -350,13 +347,25 @@ let domainstate_ptr_dwarf_register_number = 28 let destroyed_at_c_noalloc_call = (* x19-x28, d8-d15 preserved *) - Array.append - (Array.of_list (List.map (phys_reg Int) - [0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15])) - (Array.of_list (List.map (phys_reg Float) - [100;101;102;103;104;105;106;107; - 116;117;118;119;120;121;122;123; - 124;125;126;127;128;129;130;131])) + let int_regs_destroyed_at_c_noalloc_call = + [| 0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15 |] + in + let float_regs_destroyed_at_c_noalloc_call = + [|100;101;102;103;104;105;106;107; + 116;117;118;119;120;121;122;123; + 124;125;126;127;128;129;130;131|] + in + Array.concat [ + Array.map (phys_reg Int) int_regs_destroyed_at_c_noalloc_call; + Array.map (phys_reg Float) float_regs_destroyed_at_c_noalloc_call; + Array.map (phys_reg Float32) float_regs_destroyed_at_c_noalloc_call; + ] + +(* CSE needs to know that all versions of neon are destroyed. *) +let destroy_neon_reg n = + [| phys_reg Float (100 + n); phys_reg Float32 (100 + n); |] + +let destroy_neon_reg7 = destroy_neon_reg 7 let destroyed_at_raise = all_phys_regs @@ -366,8 +375,6 @@ let destroyed_at_pushtrap = [| |] let destroyed_at_alloc_or_poll = [| reg_x8 |] -let destroy_neon_reg7 = [| reg_d7 |] - let destroyed_at_basic (basic : Cfg_intf.S.basic) = match basic with | Reloadretaddr -> diff --git a/backend/arm64/selection_utils.ml b/backend/arm64/selection_utils.ml index bb731579904..712c6806c0a 100644 --- a/backend/arm64/selection_utils.ml +++ b/backend/arm64/selection_utils.ml @@ -31,15 +31,13 @@ let is_offset chunk n = (* 12 bits unsigned, scaled by chunk size *) | Byte_unsigned | Byte_signed -> n < 0x1000 | Sixteen_unsigned | Sixteen_signed -> n land 1 = 0 && n lsr 1 < 0x1000 - | Thirtytwo_unsigned | Thirtytwo_signed | Single { reg = Float64 } -> + | Thirtytwo_unsigned | Thirtytwo_signed + | Single { reg = Float64 | Float32 } -> n land 3 = 0 && n lsr 2 < 0x1000 | Word_int | Word_val | Double -> n land 7 = 0 && n lsr 3 < 0x1000 | Onetwentyeight_aligned | Onetwentyeight_unaligned -> (* CR mslater: (SIMD) arm64 *) Misc.fatal_error "arm64: got 128 bit memory chunk" - | Single { reg = Float32 } -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "arm64: got float32 memory chunk" let is_logical_immediate_int n = Arch.is_logical_immediate (Nativeint.of_int n) From fd2c0953bc637e0e0e28327ef0c34ae3912afba8 Mon Sep 17 00:00:00 2001 From: Greta Yorsh <45005955+gretay-js@users.noreply.github.com> Date: Tue, 18 Mar 2025 17:18:40 +0000 Subject: [PATCH 2/4] arm64 float32 emit --- backend/arm64/emit.mlp | 88 +++++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/backend/arm64/emit.mlp b/backend/arm64/emit.mlp index dcb150fd3e4..f57c1217fc6 100644 --- a/backend/arm64/emit.mlp +++ b/backend/arm64/emit.mlp @@ -317,6 +317,11 @@ let is_immediate_float bits = let mant = Int64.logand bits 0xF_FFFF_FFFF_FFFFL in exp >= -3 && exp <= 4 && Int64.logand mant 0xF_0000_0000_0000L = mant +let is_immediate_float32 bits = + let exp = (Int32.(to_int (shift_right_logical bits 23)) land 0x7F) - 63 in + let mant = Int32.logand bits 0x7F_FFFFl in + exp >= -3 && exp <= 4 && Int32.logand mant 0x78_0000l = mant + (* Adjust sp (up or down) by the given byte amount *) let emit_stack_adjustment n = @@ -703,9 +708,7 @@ module BR = Branch_relaxation.Make (struct | Lop (Move | Spill | Reload) -> 1 | Lop (Const_int n) -> num_instructions_for_intconst n - | Lop (Const_float32 _) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" + | Lop (Const_float32 _) -> 2 | Lop (Const_float _) -> 2 | Lop (Const_symbol _) -> 2 | Lop (Const_vec128 _) -> @@ -755,22 +758,19 @@ module BR = Branch_relaxation.Make (struct | Lop (Begin_region | End_region) -> 1 | Lop (Intop (Icomp _)) -> 2 | Lop (Floatop (Float64, Icompf _)) -> 2 - | Lop (Floatop (Float32, Icompf _)) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" + | Lop (Floatop (Float32, Icompf _)) -> 2 | Lop (Intop_imm (Icomp _, _)) -> 2 | Lop (Intop Imod) -> 2 | Lop (Intop (Imulh _)) -> 1 | Lop (Intop (Iclz _)) -> 1 | Lop (Intop (Ictz _)) -> 2 - | Lop (Floatop (Float64, (Iabsf | Inegf)) | Specific Isqrtf) -> 1 - | Lop (Floatop (Float32, (Iabsf | Inegf))) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" | Lop (Intop (Iadd|Isub|Imul|Idiv|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr|Ipopcnt)) -> 1 | Lop (Intop_imm ((Iadd|Isub|Imul|Idiv|Imod|Imulh _|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr | Iclz _ | Ictz _ |Ipopcnt),_)) -> 1 + | Lop (Floatop (Float64, (Iabsf | Inegf))) -> 1 + | Lop (Floatop (Float32, (Iabsf | Inegf))) -> 1 + | Lop (Specific Isqrtf) -> 1 | Lop (Reinterpret_cast (Value_of_int | Int_of_value | Float_of_int64 | Int64_of_float)) -> 1 | Lop (Reinterpret_cast (Float32_of_float | Float_of_float32 | @@ -788,10 +788,9 @@ module BR = Branch_relaxation.Make (struct | Lop (Static_cast (V128_of_scalar _ | Scalar_of_v128 _)) -> (* CR mslater: (SIMD) arm64 *) Misc.fatal_error "SIMD is not supported on this architecture" - | Lop (Floatop (Float64, (Iaddf | Isubf | Imulf | Idivf)) | Specific Inegmulf) -> 1 - | Lop (Floatop (Float32, (Iaddf | Isubf | Imulf | Idivf))) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" + | Lop (Floatop (Float64, (Iaddf | Isubf | Imulf | Idivf))) -> 1 + | Lop (Floatop (Float32, (Iaddf | Isubf | Imulf | Idivf))) -> 1 + | Lop (Specific Inegmulf) -> 1 | Lop (Opaque) -> 0 | Lop (Specific (Imuladdf | Inegmuladdf | Imulsubf | Inegmulsubf)) -> 1 | Lop (Specific (Ishiftarith _)) -> 1 @@ -1030,7 +1029,7 @@ let emit_instr i = move i.arg.(0) i.res.(0) | Lop(Specific Imove32) -> let src = i.arg.(0) and dst = i.res.(0) in - if src.loc <> dst.loc then begin + if not (Reg.same_loc src dst) then begin match (src, dst) with | {loc = Reg _}, {loc = Reg _} -> ` mov {emit_wreg dst}, {emit_wreg src}\n` @@ -1045,9 +1044,17 @@ let emit_instr i = end | Lop(Const_int n) -> emit_intconst i.res.(0) n - | Lop (Const_float32 _) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" + | Lop (Const_float32 f) -> + DSL.check_reg Float32 i.res.(0); + if f = 0l then + ` fmov {emit_reg i.res.(0)}, wzr\n` + else if is_immediate_float32 f then + ` fmov {emit_reg i.res.(0)}, #{emit_printf "%.7f" (Int32.float_of_bits f)}\n` + else begin + (* float32 constants still take up 8 bytes; we load the lower half. *) + let lbl = float_literal (Int64.of_int32 f) in + emit_load_literal i.res.(0) lbl + end | Lop(Const_float f) -> if f = 0L then ` fmov {emit_reg i.res.(0)}, xzr\n` @@ -1134,6 +1141,7 @@ let emit_instr i = | Thirtytwo_signed -> ` ldrsw {emit_reg dst}, {emit_addressing addressing_mode base}\n` | Single { reg = Float64 } -> + DSL.check_reg Float dst; ` ldr s7, {emit_addressing addressing_mode base}\n`; ` fcvt {emit_reg dst}, s7\n` | Word_int | Word_val -> @@ -1146,8 +1154,8 @@ let emit_instr i = | Double -> ` ldr {emit_reg dst}, {emit_addressing addressing_mode base}\n` | Single { reg = Float32 } -> - (* CR mslater: (float32) arm64 *) - fatal_error "arm64: got float32 memory chunk" + DSL.check_reg Float32 dst; + ` ldr {emit_reg dst}, {emit_addressing addressing_mode base}\n` | Onetwentyeight_aligned | Onetwentyeight_unaligned -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got 128 bit memory chunk" @@ -1171,6 +1179,7 @@ let emit_instr i = | Thirtytwo_unsigned | Thirtytwo_signed -> ` str {emit_wreg src}, {emit_addressing addr base}\n` | Single { reg = Float64 } -> + DSL.check_reg Float src; ` fcvt s7, {emit_reg src}\n`; ` str s7, {emit_addressing addr base}\n`; | Word_int | Word_val -> @@ -1180,8 +1189,8 @@ let emit_instr i = | Double -> ` str {emit_reg src}, {emit_addressing addr base}\n` | Single { reg = Float32 } -> - (* CR mslater: (float32) arm64 *) - fatal_error "arm64: got float32 memory chunk" + DSL.check_reg Float32 src; + ` str {emit_reg src}, {emit_addressing addr base}\n` | Onetwentyeight_aligned | Onetwentyeight_unaligned -> (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got 128 bit memory chunk" @@ -1213,9 +1222,10 @@ let emit_instr i = let comp = name_for_float_comparison cmp in ` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` cset {emit_reg i.res.(0)}, {emit_string comp}\n` - | Lop(Floatop(Float32, Icompf _)) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" + | Lop(Floatop(Float32, Icompf cmp)) -> + let comp = name_for_float_comparison cmp in + ` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; + ` cset {emit_reg i.res.(0)}, {emit_string comp}\n` | Lop(Intop_imm(Icomp cmp, n)) -> emit_cmpimm i.arg.(0) n; ` cset {emit_reg i.res.(0)}, {emit_string (name_for_comparison cmp)}\n` @@ -1240,25 +1250,19 @@ let emit_instr i = | Lop(Intop_imm(op, n)) -> let instr = name_for_int_operation op in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, #{emit_int n}\n` - | Lop(Floatop (Float32, (Iabsf | Inegf))) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" - | Lop(Floatop (Float32, (Iaddf | Isubf | Imulf | Idivf))) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" | Lop(Specific Isqrtf) -> ` fsqrt {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` - | Lop(Floatop ((Float64), Iabsf)) -> + | Lop(Floatop ((Float32 | Float64), Iabsf)) -> ` fabs {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` - | Lop(Floatop ((Float64), Inegf)) -> + | Lop(Floatop ((Float32 | Float64), Inegf)) -> ` fneg {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` - | Lop(Floatop ((Float64), Iaddf)) -> + | Lop(Floatop ((Float32 | Float64), Iaddf)) -> ` fadd {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` - | Lop(Floatop ((Float64), Isubf)) -> + | Lop(Floatop ((Float32 | Float64), Isubf)) -> ` fsub {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` - | Lop(Floatop ((Float64), Imulf)) -> + | Lop(Floatop ((Float32 | Float64), Imulf)) -> ` fmul {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` - | Lop(Floatop ((Float64), Idivf)) -> + | Lop(Floatop ((Float32 | Float64), Idivf)) -> ` fdiv {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` | Lop(Specific Inegmulf) -> ` fnmul {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n` @@ -1341,10 +1345,7 @@ let emit_instr i = let comp = name_for_comparison cmp in emit_cmpimm i.arg.(0) n; ` csel {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_string comp}\n` - | Ifloattest (Float32, _cmp) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" - | Ifloattest (Float64, cmp) -> + | Ifloattest ((Float32 | Float64), cmp) -> let comp = name_for_float_comparison cmp in ` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` csel {emit_reg i.res.(0)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(3)}, {emit_string comp}\n` @@ -1377,10 +1378,7 @@ let emit_instr i = emit_cmpimm i.arg.(0) n; let comp = name_for_comparison cmp in ` b.{emit_string comp} {emit_label lbl}\n` - | Ifloattest (Float32, _cmp) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" - | Ifloattest (Float64, cmp) -> + | Ifloattest ((Float32 | Float64), cmp) -> let comp = name_for_float_comparison cmp in ` fcmp {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}\n`; ` b.{emit_string comp} {emit_label lbl}\n` From c2d3de79517fbf9930cf218f7fd036ffef69d296 Mon Sep 17 00:00:00 2001 From: Greta Yorsh <45005955+gretay-js@users.noreply.github.com> Date: Thu, 20 Mar 2025 13:58:07 +0000 Subject: [PATCH 3/4] remove unused rotate_registers --- backend/amd64/proc.ml | 4 ---- backend/arm64/proc.ml | 3 --- backend/proc.mli | 1 - 3 files changed, 8 deletions(-) diff --git a/backend/amd64/proc.ml b/backend/amd64/proc.ml index dc9d2f010ec..fb270fb5070 100644 --- a/backend/amd64/proc.ml +++ b/backend/amd64/proc.ml @@ -140,10 +140,6 @@ let register_name ty r = | Float | Float32 | Vec128 | Valx2 -> float_reg_name.(r - first_available_register.(1)) -(* Pack registers starting at %rax so as to reduce the number of REX - prefixes and thus improve code density *) -let rotate_registers = false - (* Representation of hard registers by pseudo-registers *) let hard_int_reg = diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml index e85a99f2512..71491f70024 100644 --- a/backend/arm64/proc.ml +++ b/backend/arm64/proc.ml @@ -131,9 +131,6 @@ let register_name ty r = (* CR mslater: (SIMD) arm64 *) fatal_error "arm64: got valx2 register" -(* CR gyorsh for xclerc: [rotate_registers] used in [coloring] on Mach, - but not in IRC on CFG. Are we dropping an optimization here? *) -let rotate_registers = true (* Representation of hard registers by pseudo-registers *) diff --git a/backend/proc.mli b/backend/proc.mli index 2a6982fb35a..4789bfd3345 100644 --- a/backend/proc.mli +++ b/backend/proc.mli @@ -26,7 +26,6 @@ val first_available_register: int array val register_name: Cmm.machtype_component -> int -> string val phys_reg: Cmm.machtype_component -> int -> Reg.t val gc_regs_offset : Reg.t -> int -val rotate_registers: bool val precolored_regs : unit -> Reg.Set.t (* The number of stack slot classes may differ from the number of register classes. From edcb49fcf4588f803d0e8e60a25e632d09a246ee Mon Sep 17 00:00:00 2001 From: Greta Yorsh <45005955+gretay-js@users.noreply.github.com> Date: Thu, 20 Mar 2025 14:09:32 +0000 Subject: [PATCH 4/4] Generalize [hard_float_reg_gen] to cover int_hard_reg and avoid magic constants: [hard_reg_gen] --- backend/arm64/proc.ml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/backend/arm64/proc.ml b/backend/arm64/proc.ml index 71491f70024..751404eb975 100644 --- a/backend/arm64/proc.ml +++ b/backend/arm64/proc.ml @@ -67,8 +67,8 @@ let float32_reg_name = let num_register_classes = 2 -let register_class r = - match (r.typ : Cmm.machtype_component) with +let register_class_of_machtype_component typ = + match (typ : Cmm.machtype_component) with | Val | Int | Addr -> 0 | Vec128 -> (* CR mslater: (SIMD) arm64 *) @@ -78,6 +78,9 @@ let register_class r = fatal_error "arm64: got valx2 register" | Float | Float32 -> 1 +let register_class r = + register_class_of_machtype_component r.typ + let num_stack_slot_classes = 2 let stack_slot_class typ = @@ -134,22 +137,19 @@ let register_name ty r = (* Representation of hard registers by pseudo-registers *) -let hard_int_reg = - let v = Array.make 28 Reg.dummy in - for i = 0 to 27 do - v.(i) <- Reg.at_location Int (Reg i) - done; - v -let hard_float_reg_gen kind = - let v = Array.make 32 Reg.dummy in - for i = 0 to 31 do - v.(i) <- Reg.at_location kind (Reg(100 + i)) +let hard_reg_gen typ n = + let reg_class = register_class_of_machtype_component typ in + let first = first_available_register.(reg_class) in + let v = Array.make n Reg.dummy in + for i = 0 to n - 1 do + v.(i) <- Reg.at_location typ (Reg(first + i)) done; - v +v -let hard_float_reg = hard_float_reg_gen Float -let hard_float32_reg = hard_float_reg_gen Float32 +let hard_int_reg = hard_reg_gen Int (Array.length int_reg_name) +let hard_float_reg = hard_reg_gen Float (Array.length float_reg_name) +let hard_float32_reg = hard_reg_gen Float32 (Array.length float32_reg_name) let all_phys_regs = Array.concat [hard_int_reg; hard_float_reg; hard_float32_reg; ]