Skip to content

Commit c7ec27e

Browse files
committed
arm64 float32 regs
1 parent 68c8f62 commit c7ec27e

File tree

3 files changed

+53
-47
lines changed

3 files changed

+53
-47
lines changed

backend/arm64/cfg_selection.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,9 @@ class selector =
159159
| [Cop (Cmulf Float64, args, _); arg] ->
160160
specific Inegmulsubf, arg :: args
161161
| _ -> super#select_operation op args dbg ~label_after)
162+
| Cpackf32 -> specific (Isimd Zip1_f32), args
162163
(* Recognize floating-point square root *)
163-
| Cextcall { func = "sqrt" } -> specific Isqrtf, args
164+
| Cextcall { func = "sqrt" | "sqrtf" } -> specific Isqrtf, args
164165
| Cextcall { func; builtin = true; _ } -> (
165166
match Simd_selection.select_operation_cfg func args with
166167
| Some (op, args) -> Basic (Op op), args

backend/arm64/proc.ml

Lines changed: 49 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -59,53 +59,50 @@ let float_reg_name =
5959
"d16"; "d17"; "d18"; "d19"; "d20"; "d21"; "d22"; "d23";
6060
"d24"; "d25"; "d26"; "d27"; "d28"; "d29"; "d30"; "d31" |]
6161

62+
let float32_reg_name =
63+
[| "s0"; "s1"; "s2"; "s3"; "s4"; "s5"; "s6"; "s7";
64+
"s8"; "s9"; "s10"; "s11"; "s12"; "s13"; "s14"; "s15";
65+
"s16"; "s17"; "s18"; "s19"; "s20"; "s21"; "s22"; "s23";
66+
"s24"; "s25"; "s26"; "s27"; "s28"; "s29"; "s30"; "s31" |]
67+
6268
let num_register_classes = 2
6369

6470
let register_class r =
6571
match (r.typ : Cmm.machtype_component) with
6672
| Val | Int | Addr -> 0
67-
| Float -> 1
6873
| Vec128 ->
6974
(* CR mslater: (SIMD) arm64 *)
7075
fatal_error "arm64: got vec128 register"
71-
| Float32 ->
72-
(* CR mslater: (float32) arm64 *)
73-
fatal_error "arm64: got float32 register"
7476
| Valx2 ->
7577
(* CR mslater: (SIMD) arm64 *)
7678
fatal_error "arm64: got valx2 register"
79+
| Float | Float32 -> 1
7780

7881
let num_stack_slot_classes = 2
7982

8083
let stack_slot_class typ =
8184
match (typ : Cmm.machtype_component) with
8285
| Val | Int | Addr -> 0
83-
| Float -> 1
8486
| Vec128 ->
8587
(* CR mslater: (SIMD) arm64 *)
8688
fatal_error "arm64: got vec128 register"
87-
| Float32 ->
88-
(* CR mslater: (float32) arm64 *)
89-
fatal_error "arm64: got float32 register"
9089
| Valx2 ->
9190
(* CR mslater: (SIMD) arm64 *)
9291
fatal_error "arm64: got valx2 register"
92+
| Float | Float32 -> 1
9393

9494
let types_are_compatible left right =
9595
match left.typ, right.typ with
9696
| (Int | Val | Addr), (Int | Val | Addr)
97-
| Float, Float ->
98-
true
99-
| Float32, _ | _, Float32 ->
100-
(* CR mslater: (float32) arm64 *)
101-
fatal_error "arm64: got float32 register"
97+
| Float, Float -> true
98+
| Float32, Float32 -> true
10299
| Vec128, _ | _, Vec128 ->
103100
(* CR mslater: (SIMD) arm64 *)
104101
fatal_error "arm64: got vec128 register"
105102
| Valx2, _ | _, Valx2 ->
106103
(* CR mslater: (SIMD) arm64 *)
107104
fatal_error "arm64: got valx2 register"
108-
| (Int | Val | Addr | Float), _ -> false
105+
| (Int | Val | Addr | Float | Float32), _ -> false
109106

110107
let stack_class_tag c =
111108
match c with
@@ -129,12 +126,13 @@ let register_name ty r =
129126
(* CR mslater: (SIMD) arm64 *)
130127
fatal_error "arm64: got vec128 register"
131128
| Float32 ->
132-
(* CR mslater: (float32) arm64 *)
133-
fatal_error "arm64: got float32 register"
129+
float32_reg_name.(r - first_available_register.(1))
134130
| Valx2 ->
135131
(* CR mslater: (SIMD) arm64 *)
136132
fatal_error "arm64: got valx2 register"
137133

134+
(* CR gyorsh for xclerc: [rotate_registers] used in [coloring] on Mach,
135+
but not in IRC on CFG. Are we dropping an optimization here? *)
138136
let rotate_registers = true
139137

140138
(* Representation of hard registers by pseudo-registers *)
@@ -146,15 +144,17 @@ let hard_int_reg =
146144
done;
147145
v
148146

149-
let hard_float_reg =
147+
let hard_float_reg_gen kind =
150148
let v = Array.make 32 Reg.dummy in
151149
for i = 0 to 31 do
152-
v.(i) <- Reg.at_location Float (Reg(100 + i))
150+
v.(i) <- Reg.at_location kind (Reg(100 + i))
153151
done;
154152
v
155153

154+
let hard_float_reg = hard_float_reg_gen Float
155+
let hard_float32_reg = hard_float_reg_gen Float32
156156
let all_phys_regs =
157-
Array.append hard_int_reg hard_float_reg
157+
Array.concat [hard_int_reg; hard_float_reg; hard_float32_reg; ]
158158

159159
let precolored_regs =
160160
let phys_regs = Reg.set_of_array all_phys_regs in
@@ -167,19 +167,15 @@ let phys_reg ty n =
167167
| Vec128 ->
168168
(* CR mslater: (SIMD) arm64 *)
169169
fatal_error "arm64: got vec128 register"
170-
| Float32 ->
171-
(* CR mslater: (float32) arm64 *)
172-
fatal_error "arm64: got float32 register"
173170
| Valx2 ->
174171
(* CR mslater: (SIMD) arm64 *)
175172
fatal_error "arm64: got valx2 register"
173+
| Float32 -> hard_float32_reg.(n - 100)
176174

177175
let gc_regs_offset _ =
178-
(* CR mslater: (SIMD) arm64 *)
179176
fatal_error "arm64: gc_reg_offset unreachable"
180177

181178
let reg_x8 = phys_reg Int 8
182-
let reg_d7 = phys_reg Float 107
183179

184180
let stack_slot slot ty =
185181
Reg.at_location ty (Stack slot)
@@ -198,16 +194,19 @@ let loc_int last_int make_stack int ofs =
198194
ofs := !ofs + size_int; l
199195
end
200196

201-
let loc_float last_float make_stack float ofs =
197+
let loc_float_gen kind size last_float make_stack float ofs =
202198
if !float <= last_float then begin
203-
let l = phys_reg Float !float in
199+
let l = phys_reg kind !float in
204200
incr float; l
205201
end else begin
206-
ofs := Misc.align !ofs size_float;
207-
let l = stack_slot (make_stack !ofs) Float in
208-
ofs := !ofs + size_float; l
202+
ofs := Misc.align !ofs size;
203+
let l = stack_slot (make_stack !ofs) kind in
204+
ofs := !ofs + size; l
209205
end
210206

207+
let loc_float = loc_float_gen Float Arch.size_float
208+
(* float32 slots still take up a full word *)
209+
let loc_float32 = loc_float_gen Float32 Arch.size_float
211210
let loc_int32 last_int make_stack int ofs =
212211
if !int <= last_int then begin
213212
let l = phys_reg Int !int in
@@ -234,8 +233,7 @@ let calling_conventions
234233
(* CR mslater: (SIMD) arm64 *)
235234
fatal_error "arm64: got vec128 register"
236235
| Float32 ->
237-
(* CR mslater: (float32) arm64 *)
238-
fatal_error "arm64: got float32 register"
236+
loc.(i) <- loc_float32 last_float make_stack float ofs
239237
| Valx2 ->
240238
(* CR mslater: (SIMD) arm64 *)
241239
fatal_error "arm64: got valx2 register"
@@ -305,8 +303,7 @@ let external_calling_conventions
305303
(* CR mslater: (SIMD) arm64 *)
306304
fatal_error "arm64: got vec128 register"
307305
| XFloat32 ->
308-
(* CR mslater: (float32) arm64 *)
309-
fatal_error "arm64: got float32 register"
306+
loc.(i) <- [| loc_float32 last_float make_stack float ofs |]
310307
end)
311308
ty_args;
312309
(loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
@@ -350,13 +347,25 @@ let domainstate_ptr_dwarf_register_number = 28
350347

351348
let destroyed_at_c_noalloc_call =
352349
(* x19-x28, d8-d15 preserved *)
353-
Array.append
354-
(Array.of_list (List.map (phys_reg Int)
355-
[0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15]))
356-
(Array.of_list (List.map (phys_reg Float)
357-
[100;101;102;103;104;105;106;107;
358-
116;117;118;119;120;121;122;123;
359-
124;125;126;127;128;129;130;131]))
350+
let int_regs_destroyed_at_c_noalloc_call =
351+
[| 0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15 |]
352+
in
353+
let float_regs_destroyed_at_c_noalloc_call =
354+
[|100;101;102;103;104;105;106;107;
355+
116;117;118;119;120;121;122;123;
356+
124;125;126;127;128;129;130;131|]
357+
in
358+
Array.concat [
359+
Array.map (phys_reg Int) int_regs_destroyed_at_c_noalloc_call;
360+
Array.map (phys_reg Float) float_regs_destroyed_at_c_noalloc_call;
361+
Array.map (phys_reg Float32) float_regs_destroyed_at_c_noalloc_call;
362+
]
363+
364+
(* CSE needs to know that all versions of neon are destroyed. *)
365+
let destroy_neon_reg n =
366+
[| phys_reg Float (100 + n); phys_reg Float32 (100 + n); |]
367+
368+
let destroy_neon_reg7 = destroy_neon_reg 7
360369

361370
let destroyed_at_raise = all_phys_regs
362371

@@ -366,8 +375,6 @@ let destroyed_at_pushtrap = [| |]
366375

367376
let destroyed_at_alloc_or_poll = [| reg_x8 |]
368377

369-
let destroy_neon_reg7 = [| reg_d7 |]
370-
371378
let destroyed_at_basic (basic : Cfg_intf.S.basic) =
372379
match basic with
373380
| Reloadretaddr ->

backend/arm64/selection_utils.ml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,13 @@ let is_offset chunk n =
3131
(* 12 bits unsigned, scaled by chunk size *)
3232
| Byte_unsigned | Byte_signed -> n < 0x1000
3333
| Sixteen_unsigned | Sixteen_signed -> n land 1 = 0 && n lsr 1 < 0x1000
34-
| Thirtytwo_unsigned | Thirtytwo_signed | Single { reg = Float64 } ->
34+
| Thirtytwo_unsigned | Thirtytwo_signed
35+
| Single { reg = Float64 | Float32 } ->
3536
n land 3 = 0 && n lsr 2 < 0x1000
3637
| Word_int | Word_val | Double -> n land 7 = 0 && n lsr 3 < 0x1000
3738
| Onetwentyeight_aligned | Onetwentyeight_unaligned ->
3839
(* CR mslater: (SIMD) arm64 *)
3940
Misc.fatal_error "arm64: got 128 bit memory chunk"
40-
| Single { reg = Float32 } ->
41-
(* CR mslater: (float32) arm64 *)
42-
Misc.fatal_error "arm64: got float32 memory chunk"
4341

4442
let is_logical_immediate_int n = Arch.is_logical_immediate (Nativeint.of_int n)
4543

0 commit comments

Comments
 (0)