Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b97d414

Browse files
authoredMar 24, 2025··
Arm64: emit casts with float32 and vec128 (#3710)
1 parent 7b73102 commit b97d414

File tree

1 file changed

+141
-56
lines changed

1 file changed

+141
-56
lines changed
 

‎backend/arm64/emit.mlp

Lines changed: 141 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -785,20 +785,14 @@ module BR = Branch_relaxation.Make (struct
785785
| Lop (Reinterpret_cast (Value_of_int | Int_of_value |
786786
Float_of_int64 | Int64_of_float)) -> 1
787787
| Lop (Reinterpret_cast (Float32_of_float | Float_of_float32 |
788-
Float32_of_int32 | Int32_of_float32)) ->
789-
(* CR mslater: (float32) arm64 *)
790-
Misc.fatal_error "float32 is not supported on this architecture"
791-
| Lop (Reinterpret_cast V128_of_v128) ->
792-
(* CR mslater: (SIMD) arm64 *)
793-
Misc.fatal_error "SIMD is not supported on this architecture"
788+
Float32_of_int32 | Int32_of_float32)) -> 1
789+
| Lop (Reinterpret_cast V128_of_v128) -> 1
794790
| Lop (Static_cast (Float_of_int Float64 | Int_of_float Float64)) -> 1
795791
| Lop (Static_cast (Float_of_int Float32 | Int_of_float Float32 |
796-
Float_of_float32 | Float32_of_float)) ->
797-
(* CR mslater: (float32) arm64 *)
798-
Misc.fatal_error "float32 is not supported on this architecture"
799-
| Lop (Static_cast (V128_of_scalar _ | Scalar_of_v128 _)) ->
800-
(* CR mslater: (SIMD) arm64 *)
801-
Misc.fatal_error "SIMD is not supported on this architecture"
792+
Float_of_float32 | Float32_of_float)) -> 1
793+
| Lop (Static_cast (Scalar_of_v128 (Int8x16 | Int16x8))) -> 2
794+
| Lop (Static_cast (Scalar_of_v128 (Int32x4 | Int64x2 | Float32x4 | Float64x2))) -> 1
795+
| Lop (Static_cast (V128_of_scalar _ )) -> 1
802796
| Lop (Floatop (Float64, (Iaddf | Isubf | Imulf | Idivf))) -> 1
803797
| Lop (Floatop (Float32, (Iaddf | Isubf | Imulf | Idivf))) -> 1
804798
| Lop (Specific Inegmulf) -> 1
@@ -986,21 +980,137 @@ let emit_load_literal dst lbl =
986980
` ldr {emit_reg dst}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}]\n`
987981
end
988982

989-
990-
let move src dst =
991-
if src.loc <> dst.loc then begin
992-
match (src, dst) with
993-
| {loc = Reg _; typ = Float}, {loc = Reg _} ->
994-
` fmov {emit_reg dst}, {emit_reg src}\n`
995-
| {loc = Reg _}, {loc = Reg _} ->
996-
` mov {emit_reg dst}, {emit_reg src}\n`
997-
| {loc = Reg _}, {loc = Stack _} ->
998-
` str {emit_reg src}, {emit_stack dst}\n`
999-
| {loc = Stack _}, {loc = Reg _} ->
1000-
` ldr {emit_reg dst}, {emit_stack src}\n`
1001-
| _ ->
1002-
assert false
1003-
end[@@warning "-4"]
983+
let move (src : Reg.t) (dst : Reg.t) =
984+
let distinct = not (Reg.same_loc src dst) in
985+
if distinct then
986+
match src.typ, src.loc, dst.typ, dst.loc with
987+
| Float, Reg _, Float, Reg _
988+
| Float32, Reg _, Float32, Reg _
989+
->
990+
DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |]
991+
| (Vec128|Valx2), Reg _, (Vec128|Valx2), Reg _ ->
992+
DSL.ins I.MOV [| DSL.emit_reg_v2d dst; DSL.emit_reg_v2d src |]
993+
| (Int | Val | Addr), Reg _, (Int | Val | Addr), Reg _ ->
994+
DSL.ins I.MOV [| DSL.emit_reg dst; DSL.emit_reg src |]
995+
| _, Reg _, _, Stack _ ->
996+
` str {emit_reg src}, {emit_stack dst}\n`
997+
| _, Stack _, _, Reg _ ->
998+
` ldr {emit_reg dst}, {emit_stack src}\n`
999+
| _, Stack _, _, Stack _ ->
1000+
Misc.fatal_errorf
1001+
"Illegal move between registers (%a to %a)\n"
1002+
Printreg.reg src Printreg.reg dst
1003+
| _, Unknown, _, (Reg _ | Stack _ | Unknown)
1004+
| _, (Reg _ | Stack _), _, Unknown ->
1005+
Misc.fatal_errorf
1006+
"Illegal move with an unknown register location (%a to %a)\n"
1007+
Printreg.reg src Printreg.reg dst
1008+
| (Float | Float32 | Vec128 | Int | Val | Addr | Valx2), (Reg _), _, _ ->
1009+
Misc.fatal_errorf
1010+
"Illegal move between registers of differing types (%a to %a)\n"
1011+
Printreg.reg src Printreg.reg dst
1012+
1013+
let emit_reinterpret_cast (cast : Cmm.reinterpret_cast) i =
1014+
let src = i.arg.(0) in
1015+
let dst = i.res.(0) in
1016+
let distinct = not (Reg.same_loc src dst) in
1017+
match cast with
1018+
| Int64_of_float ->
1019+
DSL.check_reg Float src;
1020+
DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |]
1021+
| Float_of_int64 ->
1022+
DSL.check_reg Float dst;
1023+
DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |]
1024+
| Float32_of_int32 ->
1025+
DSL.check_reg Float32 dst;
1026+
DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg_w src |]
1027+
| Int32_of_float32 ->
1028+
DSL.check_reg Float32 src;
1029+
DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg src |]
1030+
| Float32_of_float ->
1031+
if distinct then (
1032+
DSL.check_reg Float src;
1033+
DSL.check_reg Float32 dst;
1034+
DSL.ins I.MOV [| DSL.emit_reg_d dst; DSL.emit_reg_d src |])
1035+
| Float_of_float32 ->
1036+
if distinct then (
1037+
DSL.check_reg Float32 src;
1038+
DSL.check_reg Float dst;
1039+
DSL.ins I.MOV [| DSL.emit_reg_d dst; DSL.emit_reg_d src |])
1040+
| V128_of_v128 ->
1041+
if distinct then (
1042+
DSL.check_reg Vec128 src;
1043+
DSL.check_reg Vec128 dst;
1044+
DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |])
1045+
| Int_of_value | Value_of_int -> move src dst
1046+
1047+
let emit_static_cast (cast : Cmm.static_cast) i =
1048+
let dst = i.res.(0) in
1049+
let src = i.arg.(0) in
1050+
let distinct = not (Reg.same_loc src dst) in
1051+
match cast with
1052+
| Int_of_float Float64 ->
1053+
DSL.check_reg Float src;
1054+
DSL.ins I.FCVTZS[| DSL.emit_reg dst; DSL.emit_reg src |]
1055+
| Int_of_float Float32 ->
1056+
DSL.check_reg Float32 src;
1057+
DSL.ins I.FCVTZS[| DSL.emit_reg dst; DSL.emit_reg src |]
1058+
| Float_of_int Float64 ->
1059+
DSL.check_reg Float dst;
1060+
DSL.ins I.SCVTF [| DSL.emit_reg dst; DSL.emit_reg src |];
1061+
| Float_of_int Float32 ->
1062+
DSL.check_reg Float32 dst;
1063+
DSL.ins I.SCVTF [| DSL.emit_reg dst; DSL.emit_reg src |];
1064+
| Float_of_float32 ->
1065+
DSL.check_reg Float dst;
1066+
DSL.check_reg Float32 src;
1067+
DSL.ins I.FCVT [| DSL.emit_reg dst; DSL.emit_reg src |];
1068+
| Float32_of_float ->
1069+
DSL.check_reg Float32 dst;
1070+
DSL.check_reg Float src;
1071+
DSL.ins I.FCVT [| DSL.emit_reg dst; DSL.emit_reg src |];
1072+
| Scalar_of_v128 v ->
1073+
DSL.check_reg Vec128 src;
1074+
begin match v with
1075+
| Int8x16 ->
1076+
DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg_s src |];
1077+
DSL.ins I.UXTB [| DSL.emit_reg dst; DSL.emit_reg_w dst; |];
1078+
| Int16x8 ->
1079+
DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg_s src |];
1080+
DSL.ins I.UXTH [| DSL.emit_reg dst; DSL.emit_reg_w dst; |];
1081+
| Int32x4 ->
1082+
DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg_s src |]
1083+
| Int64x2 ->
1084+
DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg_d src |]
1085+
| Float32x4 ->
1086+
if distinct then (
1087+
DSL.check_reg Float32 dst;
1088+
DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg_s src |])
1089+
| Float64x2 ->
1090+
if distinct then (
1091+
DSL.check_reg Float dst;
1092+
DSL.ins I.FMOV [| DSL.emit_reg dst ; DSL.emit_reg_d src |])
1093+
end
1094+
| V128_of_scalar v ->
1095+
DSL.check_reg Vec128 dst;
1096+
begin match v with
1097+
| Int8x16 ->
1098+
DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg_w src |];
1099+
| Int16x8 ->
1100+
DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg_w src |];
1101+
| Int32x4 ->
1102+
DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg_w src |]
1103+
| Int64x2 ->
1104+
DSL.ins I.FMOV [| DSL.emit_reg_d dst; DSL.emit_reg src |]
1105+
| Float32x4 ->
1106+
if distinct then (
1107+
DSL.check_reg Float32 src;
1108+
DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg src |])
1109+
| Float64x2 ->
1110+
if distinct then (
1111+
DSL.check_reg Float src;
1112+
DSL.ins I.FMOV [| DSL.emit_reg_d dst ; DSL.emit_reg src |])
1113+
end
10041114

10051115
(* Output the assembly code for an instruction *)
10061116

@@ -1020,22 +1130,10 @@ let emit_instr i =
10201130
| Lop(Intop_atomic _) ->
10211131
(* Never generated; builtins are not yet translated to atomics *)
10221132
assert false
1023-
| Lop (Reinterpret_cast (Int64_of_float | Float_of_int64)) ->
1024-
` fmov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
1025-
| Lop(Static_cast (Int_of_float Float64)) ->
1026-
` fcvtzs {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
1027-
| Lop(Static_cast (Float_of_int Float64)) ->
1028-
` scvtf {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
1029-
| Lop (Reinterpret_cast (Float32_of_float | Float_of_float32 |
1030-
Int32_of_float32 | Float32_of_int32))
1031-
| Lop (Static_cast (Float_of_int Float32 | Int_of_float Float32 |
1032-
Float_of_float32 | Float32_of_float)) ->
1033-
(* CR mslater: (float32) arm64 *)
1034-
Misc.fatal_error "float32 not supported on this architecture"
1035-
| Lop(Reinterpret_cast V128_of_v128)
1036-
| Lop(Static_cast (V128_of_scalar _ | Scalar_of_v128 _)) ->
1037-
(* CR mslater: (SIMD) arm64 *)
1038-
Misc.fatal_error "SIMD is not supported on this architecture"
1133+
| Lop (Reinterpret_cast cast) ->
1134+
emit_reinterpret_cast cast i
1135+
| Lop (Static_cast cast) ->
1136+
emit_static_cast cast i
10391137
| Lop(Move | Spill | Reload) ->
10401138
move i.arg.(0) i.res.(0)
10411139
| Lop(Specific Imove32) ->
@@ -1294,19 +1392,6 @@ let emit_instr i =
12941392
` fmsub {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n`
12951393
| Lop(Specific(Inegmulsubf)) ->
12961394
` fnmsub {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n`
1297-
| Lop(Reinterpret_cast (Int_of_value | Value_of_int)) ->
1298-
let src = i.arg.(0) and dst = i.res.(0) in
1299-
if src.loc <> dst.loc then begin
1300-
match (src, dst) with
1301-
| {loc = Reg _}, {loc = Reg _} ->
1302-
` mov {emit_reg dst}, {emit_reg src}\n`
1303-
| {loc = Reg _}, {loc = Stack _} ->
1304-
` str {emit_reg src}, {emit_stack dst}\n`
1305-
| {loc = Stack _}, {loc = Reg _} ->
1306-
` ldr {emit_reg dst}, {emit_stack src}\n`
1307-
| _ ->
1308-
assert false
1309-
end[@warning "-4"]
13101395
| Lop(Opaque) ->
13111396
assert (i.arg.(0).loc = i.res.(0).loc)
13121397
| Lop(Specific(Ishiftarith(op, shift))) ->

0 commit comments

Comments
 (0)
Please sign in to comment.