Skip to content

Commit 48fb7fc

Browse files
committed
vec128 regs
1 parent 9587b12 commit 48fb7fc

File tree

2 files changed

+30
-45
lines changed

2 files changed

+30
-45
lines changed

backend/arm64/proc.ml

Lines changed: 29 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -65,44 +65,38 @@ let float32_reg_name =
6565
"s16"; "s17"; "s18"; "s19"; "s20"; "s21"; "s22"; "s23";
6666
"s24"; "s25"; "s26"; "s27"; "s28"; "s29"; "s30"; "s31" |]
6767

68+
let vec128_reg_name =
69+
[| "q0"; "q1"; "q2"; "q3"; "q4"; "q5"; "q6"; "q7";
70+
"q8"; "q9"; "q10"; "q11"; "q12"; "q13"; "q14"; "q15";
71+
"q16"; "q17"; "q18"; "q19"; "q20"; "q21"; "q22"; "q23";
72+
"q24"; "q25"; "q26"; "q27"; "q28"; "q29"; "q30"; "q31" |]
73+
6874
let num_register_classes = 2
6975

7076
let register_class r =
7177
match (r.typ : Cmm.machtype_component) with
7278
| Val | Int | Addr -> 0
73-
| Vec128 ->
74-
(* CR mslater: (SIMD) arm64 *)
75-
fatal_error "arm64: got vec128 register"
76-
| Valx2 ->
77-
(* CR mslater: (SIMD) arm64 *)
78-
fatal_error "arm64: got valx2 register"
7979
| Float | Float32 -> 1
80+
| Vec128 -> 1
81+
| Valx2 -> 1
8082

81-
let num_stack_slot_classes = 2
83+
let num_stack_slot_classes = 3
8284

8385
let stack_slot_class typ =
8486
match (typ : Cmm.machtype_component) with
8587
| Val | Int | Addr -> 0
86-
| Vec128 ->
87-
(* CR mslater: (SIMD) arm64 *)
88-
fatal_error "arm64: got vec128 register"
89-
| Valx2 ->
90-
(* CR mslater: (SIMD) arm64 *)
91-
fatal_error "arm64: got valx2 register"
9288
| Float | Float32 -> 1
89+
| Vec128 -> 3
90+
| Valx2 -> 3
9391

9492
let types_are_compatible left right =
9593
match left.typ, right.typ with
9694
| (Int | Val | Addr), (Int | Val | Addr)
9795
| Float, Float -> true
9896
| Float32, Float32 -> true
99-
| Vec128, _ | _, Vec128 ->
100-
(* CR mslater: (SIMD) arm64 *)
101-
fatal_error "arm64: got vec128 register"
102-
| Valx2, _ | _, Valx2 ->
103-
(* CR mslater: (SIMD) arm64 *)
104-
fatal_error "arm64: got valx2 register"
105-
| (Int | Val | Addr | Float | Float32), _ -> false
97+
| Vec128, Vec128 -> true
98+
| Valx2,Valx2 -> true
99+
| (Int | Val | Addr | Float | Float32 | Vec128 | Valx2), _ -> false
106100

107101
let stack_class_tag c =
108102
match c with
@@ -122,14 +116,10 @@ let register_name ty r =
122116
int_reg_name.(r - first_available_register.(0))
123117
| Float ->
124118
float_reg_name.(r - first_available_register.(1))
125-
| Vec128 ->
126-
(* CR mslater: (SIMD) arm64 *)
127-
fatal_error "arm64: got vec128 register"
128119
| Float32 ->
129120
float32_reg_name.(r - first_available_register.(1))
130-
| Valx2 ->
131-
(* CR mslater: (SIMD) arm64 *)
132-
fatal_error "arm64: got valx2 register"
121+
| Vec128 | Valx2 ->
122+
vec128_reg_name.(r - first_available_register.(1))
133123

134124
(* CR gyorsh for xclerc: [rotate_registers] used in [coloring] on Mach,
135125
but not in IRC on CFG. Are we dropping an optimization here? *)
@@ -153,8 +143,10 @@ let hard_float_reg_gen kind =
153143

154144
let hard_float_reg = hard_float_reg_gen Float
155145
let hard_float32_reg = hard_float_reg_gen Float32
146+
let hard_vec128_reg = hard_float_reg_gen Vec128
147+
156148
let all_phys_regs =
157-
Array.concat [hard_int_reg; hard_float_reg; hard_float32_reg; ]
149+
Array.concat [hard_int_reg; hard_float_reg; hard_float32_reg; hard_vec128_reg; ]
158150

159151
let precolored_regs =
160152
let phys_regs = Reg.set_of_array all_phys_regs in
@@ -164,13 +156,8 @@ let phys_reg ty n =
164156
match (ty : Cmm.machtype_component) with
165157
| Int | Addr | Val -> hard_int_reg.(n)
166158
| Float -> hard_float_reg.(n - 100)
167-
| Vec128 ->
168-
(* CR mslater: (SIMD) arm64 *)
169-
fatal_error "arm64: got vec128 register"
170-
| Valx2 ->
171-
(* CR mslater: (SIMD) arm64 *)
172-
fatal_error "arm64: got valx2 register"
173159
| Float32 -> hard_float32_reg.(n - 100)
160+
| Vec128 | Valx2 -> hard_vec128_reg.(n - 100)
174161

175162
let gc_regs_offset _ =
176163
fatal_error "arm64: gc_reg_offset unreachable"
@@ -207,6 +194,8 @@ let loc_float_gen kind size last_float make_stack float ofs =
207194
let loc_float = loc_float_gen Float Arch.size_float
208195
(* float32 slots still take up a full word *)
209196
let loc_float32 = loc_float_gen Float32 Arch.size_float
197+
let loc_vec128 = loc_float_gen Vec128 Arch.size_vec128
198+
210199
let loc_int32 last_int make_stack int ofs =
211200
if !int <= last_int then begin
212201
let l = phys_reg Int !int in
@@ -230,14 +219,13 @@ let calling_conventions
230219
| Float ->
231220
loc.(i) <- loc_float last_float make_stack float ofs
232221
| Vec128 ->
233-
(* CR mslater: (SIMD) arm64 *)
234-
fatal_error "arm64: got vec128 register"
222+
loc.(i) <- loc_vec128 last_float make_stack float ofs
235223
| Float32 ->
236224
loc.(i) <- loc_float32 last_float make_stack float ofs
237225
| Valx2 ->
238-
(* CR mslater: (SIMD) arm64 *)
239-
fatal_error "arm64: got valx2 register"
226+
Misc.fatal_error "Unexpected machtype_component Valx2"
240227
done;
228+
(* CR mslater: (SIMD) will need to be 32/64 if vec256/512 are used. *)
241229
(loc, Misc.align (max 0 !ofs) 16) (* keep stack 16-aligned *)
242230

243231
let incoming ofs =
@@ -300,8 +288,7 @@ let external_calling_conventions
300288
| XFloat ->
301289
loc.(i) <- [| loc_float last_float make_stack float ofs |]
302290
| XVec128 ->
303-
(* CR mslater: (SIMD) arm64 *)
304-
fatal_error "arm64: got vec128 register"
291+
loc.(i) <- [| loc_vec128 last_float make_stack float ofs |]
305292
| XFloat32 ->
306293
loc.(i) <- [| loc_float32 last_float make_stack float ofs |]
307294
end)
@@ -359,11 +346,12 @@ let destroyed_at_c_noalloc_call =
359346
Array.map (phys_reg Int) int_regs_destroyed_at_c_noalloc_call;
360347
Array.map (phys_reg Float) float_regs_destroyed_at_c_noalloc_call;
361348
Array.map (phys_reg Float32) float_regs_destroyed_at_c_noalloc_call;
349+
Array.map (phys_reg Vec128) float_regs_destroyed_at_c_noalloc_call;
362350
]
363351

364352
(* CSE needs to know that all versions of neon are destroyed. *)
365353
let destroy_neon_reg n =
366-
[| phys_reg Float (100 + n); phys_reg Float32 (100 + n); |]
354+
[| phys_reg Float (100 + n); phys_reg Float32 (100 + n); phys_reg Vec128 (100 + n); |]
367355

368356
let destroy_neon_reg7 = destroy_neon_reg 7
369357

@@ -524,9 +512,8 @@ let assemble_file infile outfile =
524512
let init () = ()
525513

526514
let operation_supported : Cmm.operation -> bool = function
515+
| Cprefetch _ | Catomic _ -> false
527516
| Cpopcnt
528-
| Cprefetch _ | Catomic _
529-
(* CR mslater: (float32) arm64 *)
530517
| Cnegf Float32 | Cabsf Float32 | Caddf Float32
531518
| Csubf Float32 | Cmulf Float32 | Cdivf Float32
532519
| Cpackf32
@@ -536,7 +523,6 @@ let operation_supported : Cmm.operation -> bool = function
536523
| Cstatic_cast (Float_of_float32 | Float32_of_float |
537524
Int_of_float Float32 | Float_of_int Float32 |
538525
V128_of_scalar _ | Scalar_of_v128 _)
539-
-> false (* Not implemented *)
540526
| Cclz _ | Cctz _ | Cbswap _
541527
| Capply _ | Cextcall _ | Cload _ | Calloc _ | Cstore _
542528
| Caddi | Csubi | Cmuli | Cmulhi _ | Cdivi | Cmodi

backend/arm64/selection_utils.ml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ let is_offset chunk n =
3636
n land 3 = 0 && n lsr 2 < 0x1000
3737
| Word_int | Word_val | Double -> n land 7 = 0 && n lsr 3 < 0x1000
3838
| Onetwentyeight_aligned | Onetwentyeight_unaligned ->
39-
(* CR mslater: (SIMD) arm64 *)
40-
Misc.fatal_error "arm64: got 128 bit memory chunk"
39+
n land 15 = 0 && n lsr 4 < 0x1000
4140

4241
let is_logical_immediate_int n = Arch.is_logical_immediate (Nativeint.of_int n)
4342

0 commit comments

Comments
 (0)