@@ -170,15 +170,16 @@ let phys_reg ty n =
170
170
171
171
let rax = phys_reg Int 0
172
172
let rdx = phys_reg Int 4
173
+ let rcx = phys_reg Int 5
173
174
let r10 = phys_reg Int 10
174
175
let r11 = phys_reg Int 11
175
176
let rbp = phys_reg Int 12
176
177
177
178
(* CSE needs to know that all versions of xmm15 are destroyed. *)
178
- let destroy_xmm15 () =
179
+ let destroy_xmm n =
179
180
if Language_extension. is_enabled SIMD
180
- then [| phys_reg Float 115 ; phys_reg Vec128 115 |]
181
- else [| phys_reg Float 115 |]
181
+ then [| phys_reg Float ( 100 + n) ; phys_reg Vec128 ( 100 + n) |]
182
+ else [| phys_reg Float ( 100 + n) |]
182
183
183
184
let destroyed_by_plt_stub =
184
185
if not X86_proc. use_plt then [| |] else [| r10; r11 |]
@@ -399,6 +400,21 @@ let destroyed_at_pushtrap =
399
400
let has_pushtrap traps =
400
401
List. exists (function Cmm. Push _ -> true | Pop _ -> false ) traps
401
402
403
+ let destroyed_by_simd_op op =
404
+ match Simd_proc. register_behavior op with
405
+ | R_RM_rax_rdx_to_xmm0
406
+ | R_RM_to_xmm0 -> destroy_xmm 0
407
+ | R_RM_rax_rdx_to_rcx
408
+ | R_RM_to_rcx -> [| rcx |]
409
+ | R_to_fst
410
+ | R_to_R
411
+ | R_to_RM
412
+ | RM_to_R
413
+ | R_R_to_fst
414
+ | R_RM_to_fst
415
+ | R_RM_to_R
416
+ | R_RM_xmm0_to_fst -> [||]
417
+
402
418
(* note: keep this function in sync with `destroyed_at_{basic,terminator}` below. *)
403
419
let destroyed_at_oper = function
404
420
Iop (Icall_ind | Icall_imm _ ) ->
@@ -410,7 +426,7 @@ let destroyed_at_oper = function
410
426
| Iop (Iintop (Idiv | Imod )) | Iop (Iintop_imm ((Idiv | Imod ), _))
411
427
-> [| rax; rdx |]
412
428
| Iop (Istore (Single , _, _))
413
- -> destroy_xmm15 ()
429
+ -> destroy_xmm 15
414
430
| Iop (Ialloc _ | Ipoll _ ) -> destroyed_at_alloc_or_poll
415
431
| Iop (Iintop (Imulh _ | Icomp _) | Iintop_imm ((Icomp _), _))
416
432
-> [| rax |]
@@ -420,9 +436,10 @@ let destroyed_at_oper = function
420
436
| Ireturn traps when has_pushtrap traps -> assert false
421
437
| Iop (Ispecific (Irdtsc | Irdpmc )) -> [| rax; rdx |]
422
438
| Iop (Ispecific (Ilfence | Isfence | Imfence )) -> [||]
439
+ | Iop (Ispecific(Isimd op )) -> destroyed_by_simd_op op
423
440
| Iop (Ispecific (Isextend32 | Izextend32 | Ilea _
424
441
| Istore_int (_, _, _) | Ioffset_loc (_, _)
425
- | Ipause | Iprefetch _ | Isimd _
442
+ | Ipause | Iprefetch _
426
443
| Ifloatarithmem (_, _) | Ifloatsqrtf _ | Ibswap _))
427
444
| Iop (Iintop (Iadd | Isub | Imul | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
428
445
| Ipopcnt | Iclz _ | Ictz _ ))
@@ -465,14 +482,15 @@ let destroyed_at_basic (basic : Cfg_intf.S.basic) =
465
482
| Op (Intop (Idiv | Imod )) | Op (Intop_imm ((Idiv | Imod ), _ )) ->
466
483
[| rax; rdx |]
467
484
| Op (Store(Single, _ , _ )) ->
468
- destroy_xmm15 ()
485
+ destroy_xmm 15
469
486
| Op (Intop (Imulh _ | Icomp _ ) | Intop_imm ((Icomp _ ), _ )) ->
470
487
[| rax |]
471
488
| Op (Specific (Irdtsc | Irdpmc )) ->
472
489
[| rax; rdx |]
473
490
| Op Poll -> destroyed_at_alloc_or_poll
474
491
| Op (Alloc _ ) ->
475
492
destroyed_at_alloc_or_poll
493
+ | Op (Specific (Isimd op )) -> destroyed_by_simd_op op
476
494
| Op (Move | Spill | Reload
477
495
| Const_int _ | Const_float _ | Const_symbol _ | Const_vec128 _
478
496
| Stackoffset _
@@ -497,7 +515,7 @@ let destroyed_at_basic (basic : Cfg_intf.S.basic) =
497
515
| Begin_region
498
516
| End_region
499
517
| Specific (Ilea _ | Istore_int _ | Ioffset_loc _
500
- | Ifloatarithmem _ | Ifloatsqrtf _ | Ibswap _ | Isimd _
518
+ | Ifloatarithmem _ | Ifloatsqrtf _ | Ibswap _
501
519
| Isextend32 | Izextend32 | Ipause
502
520
| Iprefetch _ | Ilfence | Isfence | Imfence )
503
521
| Name_for_debugger _ | Dls_get )
@@ -596,6 +614,20 @@ let max_register_pressure =
596
614
consumes ~int: 1 ~float: 0
597
615
| Istore (Single , _ , _ ) | Icompf _ ->
598
616
consumes ~int: 0 ~float: 1
617
+ | Ispecific (Isimd op ) ->
618
+ (match Simd_proc. register_behavior op with
619
+ | R_RM_rax_rdx_to_xmm0
620
+ | R_RM_to_xmm0 -> consumes ~int: 0 ~float: 1
621
+ | R_RM_rax_rdx_to_rcx
622
+ | R_RM_to_rcx -> consumes ~int: 1 ~float: 0
623
+ | R_to_fst
624
+ | R_to_R
625
+ | R_to_RM
626
+ | RM_to_R
627
+ | R_R_to_fst
628
+ | R_RM_to_fst
629
+ | R_RM_to_R
630
+ | R_RM_xmm0_to_fst -> consumes ~int: 0 ~float: 0 )
599
631
| Iintop (Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl | Ilsr | Iasr
600
632
| Ipopcnt | Iclz _| Ictz _)
601
633
| Iintop_imm ((Iadd | Isub | Imul | Imulh _ | Iand | Ior | Ixor | Ilsl | Ilsr
@@ -613,7 +645,7 @@ let max_register_pressure =
613
645
| Istackoffset _ | Iload _
614
646
| Ispecific (Ilea _ | Isextend32 | Izextend32 | Iprefetch _ | Ipause
615
647
| Irdtsc | Irdpmc | Istore_int (_, _, _)
616
- | Ilfence | Isfence | Imfence | Isimd _
648
+ | Ilfence | Isfence | Imfence
617
649
| Ioffset_loc (_, _) | Ifloatarithmem (_, _) | Ifloatsqrtf _
618
650
| Ibswap _)
619
651
| Iname_for_debugger _ | Iprobe _ | Iprobe_is_enabled _ | Iopaque
0 commit comments