@@ -65,44 +65,38 @@ let float32_reg_name =
65
65
" s16" ; " s17" ; " s18" ; " s19" ; " s20" ; " s21" ; " s22" ; " s23" ;
66
66
" s24" ; " s25" ; " s26" ; " s27" ; " s28" ; " s29" ; " s30" ; " s31" |]
67
67
68
+ let vec128_reg_name =
69
+ [| " q0" ; " q1" ; " q2" ; " q3" ; " q4" ; " q5" ; " q6" ; " q7" ;
70
+ " q8" ; " q9" ; " q10" ; " q11" ; " q12" ; " q13" ; " q14" ; " q15" ;
71
+ " q16" ; " q17" ; " q18" ; " q19" ; " q20" ; " q21" ; " q22" ; " q23" ;
72
+ " q24" ; " q25" ; " q26" ; " q27" ; " q28" ; " q29" ; " q30" ; " q31" |]
73
+
68
74
let num_register_classes = 2
69
75
70
76
let register_class r =
71
77
match (r.typ : Cmm.machtype_component ) with
72
78
| Val | Int | Addr -> 0
73
- | Vec128 ->
74
- (* CR mslater: (SIMD) arm64 *)
75
- fatal_error " arm64: got vec128 register"
76
- | Valx2 ->
77
- (* CR mslater: (SIMD) arm64 *)
78
- fatal_error " arm64: got valx2 register"
79
79
| Float | Float32 -> 1
80
+ | Vec128 -> 1
81
+ | Valx2 -> 1
80
82
81
- let num_stack_slot_classes = 2
83
+ let num_stack_slot_classes = 3
82
84
83
85
let stack_slot_class typ =
84
86
match (typ : Cmm.machtype_component ) with
85
87
| Val | Int | Addr -> 0
86
- | Vec128 ->
87
- (* CR mslater: (SIMD) arm64 *)
88
- fatal_error " arm64: got vec128 register"
89
- | Valx2 ->
90
- (* CR mslater: (SIMD) arm64 *)
91
- fatal_error " arm64: got valx2 register"
92
88
| Float | Float32 -> 1
89
+ | Vec128 -> 3
90
+ | Valx2 -> 3
93
91
94
92
let types_are_compatible left right =
95
93
match left.typ, right.typ with
96
94
| (Int | Val | Addr ), (Int | Val | Addr )
97
95
| Float , Float -> true
98
96
| Float32 , Float32 -> true
99
- | Vec128 , _ | _ , Vec128 ->
100
- (* CR mslater: (SIMD) arm64 *)
101
- fatal_error " arm64: got vec128 register"
102
- | Valx2 , _ | _ , Valx2 ->
103
- (* CR mslater: (SIMD) arm64 *)
104
- fatal_error " arm64: got valx2 register"
105
- | (Int | Val | Addr | Float | Float32 ), _ -> false
97
+ | Vec128 , Vec128 -> true
98
+ | Valx2 ,Valx2 -> true
99
+ | (Int | Val | Addr | Float | Float32 | Vec128 | Valx2 ), _ -> false
106
100
107
101
let stack_class_tag c =
108
102
match c with
@@ -122,14 +116,10 @@ let register_name ty r =
122
116
int_reg_name.(r - first_available_register.(0 ))
123
117
| Float ->
124
118
float_reg_name.(r - first_available_register.(1 ))
125
- | Vec128 ->
126
- (* CR mslater: (SIMD) arm64 *)
127
- fatal_error " arm64: got vec128 register"
128
119
| Float32 ->
129
120
float32_reg_name.(r - first_available_register.(1 ))
130
- | Valx2 ->
131
- (* CR mslater: (SIMD) arm64 *)
132
- fatal_error " arm64: got valx2 register"
121
+ | Vec128 | Valx2 ->
122
+ vec128_reg_name.(r - first_available_register.(1 ))
133
123
134
124
(* CR gyorsh for xclerc: [rotate_registers] used in [coloring] on Mach,
135
125
but not in IRC on CFG. Are we dropping an optimization here? *)
@@ -153,8 +143,10 @@ let hard_float_reg_gen kind =
153
143
154
144
let hard_float_reg = hard_float_reg_gen Float
155
145
let hard_float32_reg = hard_float_reg_gen Float32
146
+ let hard_vec128_reg = hard_float_reg_gen Vec128
147
+
156
148
let all_phys_regs =
157
- Array. concat [hard_int_reg; hard_float_reg; hard_float32_reg; ]
149
+ Array. concat [hard_int_reg; hard_float_reg; hard_float32_reg; hard_vec128_reg; ]
158
150
159
151
let precolored_regs =
160
152
let phys_regs = Reg. set_of_array all_phys_regs in
@@ -164,13 +156,8 @@ let phys_reg ty n =
164
156
match (ty : Cmm.machtype_component ) with
165
157
| Int | Addr | Val -> hard_int_reg.(n)
166
158
| Float -> hard_float_reg.(n - 100 )
167
- | Vec128 ->
168
- (* CR mslater: (SIMD) arm64 *)
169
- fatal_error " arm64: got vec128 register"
170
- | Valx2 ->
171
- (* CR mslater: (SIMD) arm64 *)
172
- fatal_error " arm64: got valx2 register"
173
159
| Float32 -> hard_float32_reg.(n - 100 )
160
+ | Vec128 | Valx2 -> hard_vec128_reg.(n - 100 )
174
161
175
162
let gc_regs_offset _ =
176
163
fatal_error " arm64: gc_reg_offset unreachable"
@@ -207,6 +194,8 @@ let loc_float_gen kind size last_float make_stack float ofs =
207
194
let loc_float = loc_float_gen Float Arch. size_float
208
195
(* float32 slots still take up a full word *)
209
196
let loc_float32 = loc_float_gen Float32 Arch. size_float
197
+ let loc_vec128 = loc_float_gen Vec128 Arch. size_vec128
198
+
210
199
let loc_int32 last_int make_stack int ofs =
211
200
if ! int < = last_int then begin
212
201
let l = phys_reg Int ! int in
@@ -230,14 +219,13 @@ let calling_conventions
230
219
| Float ->
231
220
loc.(i) < - loc_float last_float make_stack float ofs
232
221
| Vec128 ->
233
- (* CR mslater: (SIMD) arm64 *)
234
- fatal_error " arm64: got vec128 register"
222
+ loc.(i) < - loc_vec128 last_float make_stack float ofs
235
223
| Float32 ->
236
224
loc.(i) < - loc_float32 last_float make_stack float ofs
237
225
| Valx2 ->
238
- (* CR mslater: (SIMD) arm64 *)
239
- fatal_error " arm64: got valx2 register"
226
+ Misc. fatal_error " Unexpected machtype_component Valx2"
240
227
done ;
228
+ (* CR mslater: (SIMD) will need to be 32/64 if vec256/512 are used. *)
241
229
(loc, Misc. align (max 0 ! ofs) 16 ) (* keep stack 16-aligned *)
242
230
243
231
let incoming ofs =
@@ -300,8 +288,7 @@ let external_calling_conventions
300
288
| XFloat ->
301
289
loc.(i) < - [| loc_float last_float make_stack float ofs |]
302
290
| XVec128 ->
303
- (* CR mslater: (SIMD) arm64 *)
304
- fatal_error " arm64: got vec128 register"
291
+ loc.(i) < - [| loc_vec128 last_float make_stack float ofs |]
305
292
| XFloat32 ->
306
293
loc.(i) < - [| loc_float32 last_float make_stack float ofs |]
307
294
end )
@@ -359,11 +346,12 @@ let destroyed_at_c_noalloc_call =
359
346
Array. map (phys_reg Int ) int_regs_destroyed_at_c_noalloc_call;
360
347
Array. map (phys_reg Float ) float_regs_destroyed_at_c_noalloc_call;
361
348
Array. map (phys_reg Float32 ) float_regs_destroyed_at_c_noalloc_call;
349
+ Array. map (phys_reg Vec128 ) float_regs_destroyed_at_c_noalloc_call;
362
350
]
363
351
364
352
(* CSE needs to know that all versions of neon are destroyed. *)
365
353
let destroy_neon_reg n =
366
- [| phys_reg Float (100 + n); phys_reg Float32 (100 + n); |]
354
+ [| phys_reg Float (100 + n); phys_reg Float32 (100 + n); phys_reg Vec128 ( 100 + n); |]
367
355
368
356
let destroy_neon_reg7 = destroy_neon_reg 7
369
357
@@ -524,9 +512,8 @@ let assemble_file infile outfile =
524
512
let init () = ()
525
513
526
514
let operation_supported : Cmm.operation -> bool = function
515
+ | Cprefetch _ | Catomic _ -> false
527
516
| Cpopcnt
528
- | Cprefetch _ | Catomic _
529
- (* CR mslater: (float32) arm64 *)
530
517
| Cnegf Float32 | Cabsf Float32 | Caddf Float32
531
518
| Csubf Float32 | Cmulf Float32 | Cdivf Float32
532
519
| Cpackf32
@@ -536,7 +523,6 @@ let operation_supported : Cmm.operation -> bool = function
536
523
| Cstatic_cast (Float_of_float32 | Float32_of_float |
537
524
Int_of_float Float32 | Float_of_int Float32 |
538
525
V128_of_scalar _ | Scalar_of_v128 _)
539
- -> false (* Not implemented *)
540
526
| Cclz _ | Cctz _ | Cbswap _
541
527
| Capply _ | Cextcall _ | Cload _ | Calloc _ | Cstore _
542
528
| Caddi | Csubi | Cmuli | Cmulhi _ | Cdivi | Cmodi
0 commit comments