@@ -785,20 +785,14 @@ module BR = Branch_relaxation.Make (struct
785
785
| Lop (Reinterpret_cast (Value_of_int | Int_of_value |
786
786
Float_of_int64 | Int64_of_float)) -> 1
787
787
| Lop (Reinterpret_cast (Float32_of_float | Float_of_float32 |
788
- Float32_of_int32 | Int32_of_float32)) ->
789
- (* CR mslater: (float32) arm64 *)
790
- Misc.fatal_error "float32 is not supported on this architecture"
791
- | Lop (Reinterpret_cast V128_of_v128) ->
792
- (* CR mslater: (SIMD) arm64 *)
793
- Misc.fatal_error "SIMD is not supported on this architecture"
788
+ Float32_of_int32 | Int32_of_float32)) -> 1
789
+ | Lop (Reinterpret_cast V128_of_v128) -> 1
794
790
| Lop (Static_cast (Float_of_int Float64 | Int_of_float Float64)) -> 1
795
791
| Lop (Static_cast (Float_of_int Float32 | Int_of_float Float32 |
796
- Float_of_float32 | Float32_of_float)) ->
797
- (* CR mslater: (float32) arm64 *)
798
- Misc.fatal_error "float32 is not supported on this architecture"
799
- | Lop (Static_cast (V128_of_scalar _ | Scalar_of_v128 _)) ->
800
- (* CR mslater: (SIMD) arm64 *)
801
- Misc.fatal_error "SIMD is not supported on this architecture"
792
+ Float_of_float32 | Float32_of_float)) -> 1
793
+ | Lop (Static_cast (Scalar_of_v128 (Int8x16 | Int16x8))) -> 2
794
+ | Lop (Static_cast (Scalar_of_v128 (Int32x4 | Int64x2 | Float32x4 | Float64x2))) -> 1
795
+ | Lop (Static_cast (V128_of_scalar _ )) -> 1
802
796
| Lop (Floatop (Float64, (Iaddf | Isubf | Imulf | Idivf))) -> 1
803
797
| Lop (Floatop (Float32, (Iaddf | Isubf | Imulf | Idivf))) -> 1
804
798
| Lop (Specific Inegmulf) -> 1
@@ -986,21 +980,137 @@ let emit_load_literal dst lbl =
986
980
` ldr {emit_reg dst}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}]\n`
987
981
end
988
982
989
-
990
- let move src dst =
991
- if src.loc <> dst.loc then begin
992
- match (src, dst) with
993
- | {loc = Reg _; typ = Float}, {loc = Reg _} ->
994
- ` fmov {emit_reg dst}, {emit_reg src}\n`
995
- | {loc = Reg _}, {loc = Reg _} ->
996
- ` mov {emit_reg dst}, {emit_reg src}\n`
997
- | {loc = Reg _}, {loc = Stack _} ->
998
- ` str {emit_reg src}, {emit_stack dst}\n`
999
- | {loc = Stack _}, {loc = Reg _} ->
1000
- ` ldr {emit_reg dst}, {emit_stack src}\n`
1001
- | _ ->
1002
- assert false
1003
- end[@@warning "-4"]
983
+ let move (src : Reg.t) (dst : Reg.t) =
984
+ let distinct = not (Reg.same_loc src dst) in
985
+ if distinct then
986
+ match src.typ, src.loc, dst.typ, dst.loc with
987
+ | Float, Reg _, Float, Reg _
988
+ | Float32, Reg _, Float32, Reg _
989
+ ->
990
+ DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |]
991
+ | (Vec128|Valx2), Reg _, (Vec128|Valx2), Reg _ ->
992
+ DSL.ins I.MOV [| DSL.emit_reg_v2d dst; DSL.emit_reg_v2d src |]
993
+ | (Int | Val | Addr), Reg _, (Int | Val | Addr), Reg _ ->
994
+ DSL.ins I.MOV [| DSL.emit_reg dst; DSL.emit_reg src |]
995
+ | _, Reg _, _, Stack _ ->
996
+ ` str {emit_reg src}, {emit_stack dst}\n`
997
+ | _, Stack _, _, Reg _ ->
998
+ ` ldr {emit_reg dst}, {emit_stack src}\n`
999
+ | _, Stack _, _, Stack _ ->
1000
+ Misc.fatal_errorf
1001
+ "Illegal move between registers (%a to %a)\n"
1002
+ Printreg.reg src Printreg.reg dst
1003
+ | _, Unknown, _, (Reg _ | Stack _ | Unknown)
1004
+ | _, (Reg _ | Stack _), _, Unknown ->
1005
+ Misc.fatal_errorf
1006
+ "Illegal move with an unknown register location (%a to %a)\n"
1007
+ Printreg.reg src Printreg.reg dst
1008
+ | (Float | Float32 | Vec128 | Int | Val | Addr | Valx2), (Reg _), _, _ ->
1009
+ Misc.fatal_errorf
1010
+ "Illegal move between registers of differing types (%a to %a)\n"
1011
+ Printreg.reg src Printreg.reg dst
1012
+
1013
+ let emit_reinterpret_cast (cast : Cmm.reinterpret_cast) i =
1014
+ let src = i.arg.(0) in
1015
+ let dst = i.res.(0) in
1016
+ let distinct = not (Reg.same_loc src dst) in
1017
+ match cast with
1018
+ | Int64_of_float ->
1019
+ DSL.check_reg Float src;
1020
+ DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |]
1021
+ | Float_of_int64 ->
1022
+ DSL.check_reg Float dst;
1023
+ DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |]
1024
+ | Float32_of_int32 ->
1025
+ DSL.check_reg Float32 dst;
1026
+ DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg_w src |]
1027
+ | Int32_of_float32 ->
1028
+ DSL.check_reg Float32 src;
1029
+ DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg src |]
1030
+ | Float32_of_float ->
1031
+ if distinct then (
1032
+ DSL.check_reg Float src;
1033
+ DSL.check_reg Float32 dst;
1034
+ DSL.ins I.MOV [| DSL.emit_reg_d dst; DSL.emit_reg_d src |])
1035
+ | Float_of_float32 ->
1036
+ if distinct then (
1037
+ DSL.check_reg Float32 src;
1038
+ DSL.check_reg Float dst;
1039
+ DSL.ins I.MOV [| DSL.emit_reg_d dst; DSL.emit_reg_d src |])
1040
+ | V128_of_v128 ->
1041
+ if distinct then (
1042
+ DSL.check_reg Vec128 src;
1043
+ DSL.check_reg Vec128 dst;
1044
+ DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |])
1045
+ | Int_of_value | Value_of_int -> move src dst
1046
+
1047
+ let emit_static_cast (cast : Cmm.static_cast) i =
1048
+ let dst = i.res.(0) in
1049
+ let src = i.arg.(0) in
1050
+ let distinct = not (Reg.same_loc src dst) in
1051
+ match cast with
1052
+ | Int_of_float Float64 ->
1053
+ DSL.check_reg Float src;
1054
+ DSL.ins I.FCVTZS[| DSL.emit_reg dst; DSL.emit_reg src |]
1055
+ | Int_of_float Float32 ->
1056
+ DSL.check_reg Float32 src;
1057
+ DSL.ins I.FCVTZS[| DSL.emit_reg dst; DSL.emit_reg src |]
1058
+ | Float_of_int Float64 ->
1059
+ DSL.check_reg Float dst;
1060
+ DSL.ins I.SCVTF [| DSL.emit_reg dst; DSL.emit_reg src |];
1061
+ | Float_of_int Float32 ->
1062
+ DSL.check_reg Float32 dst;
1063
+ DSL.ins I.SCVTF [| DSL.emit_reg dst; DSL.emit_reg src |];
1064
+ | Float_of_float32 ->
1065
+ DSL.check_reg Float dst;
1066
+ DSL.check_reg Float32 src;
1067
+ DSL.ins I.FCVT [| DSL.emit_reg dst; DSL.emit_reg src |];
1068
+ | Float32_of_float ->
1069
+ DSL.check_reg Float32 dst;
1070
+ DSL.check_reg Float src;
1071
+ DSL.ins I.FCVT [| DSL.emit_reg dst; DSL.emit_reg src |];
1072
+ | Scalar_of_v128 v ->
1073
+ DSL.check_reg Vec128 src;
1074
+ begin match v with
1075
+ | Int8x16 ->
1076
+ DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg_s src |];
1077
+ DSL.ins I.UXTB [| DSL.emit_reg dst; DSL.emit_reg_w dst; |];
1078
+ | Int16x8 ->
1079
+ DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg_s src |];
1080
+ DSL.ins I.UXTH [| DSL.emit_reg dst; DSL.emit_reg_w dst; |];
1081
+ | Int32x4 ->
1082
+ DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg_s src |]
1083
+ | Int64x2 ->
1084
+ DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg_d src |]
1085
+ | Float32x4 ->
1086
+ if distinct then (
1087
+ DSL.check_reg Float32 dst;
1088
+ DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg_s src |])
1089
+ | Float64x2 ->
1090
+ if distinct then (
1091
+ DSL.check_reg Float dst;
1092
+ DSL.ins I.FMOV [| DSL.emit_reg dst ; DSL.emit_reg_d src |])
1093
+ end
1094
+ | V128_of_scalar v ->
1095
+ DSL.check_reg Vec128 dst;
1096
+ begin match v with
1097
+ | Int8x16 ->
1098
+ DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg_w src |];
1099
+ | Int16x8 ->
1100
+ DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg_w src |];
1101
+ | Int32x4 ->
1102
+ DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg_w src |]
1103
+ | Int64x2 ->
1104
+ DSL.ins I.FMOV [| DSL.emit_reg_d dst; DSL.emit_reg src |]
1105
+ | Float32x4 ->
1106
+ if distinct then (
1107
+ DSL.check_reg Float32 src;
1108
+ DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg src |])
1109
+ | Float64x2 ->
1110
+ if distinct then (
1111
+ DSL.check_reg Float src;
1112
+ DSL.ins I.FMOV [| DSL.emit_reg_d dst ; DSL.emit_reg src |])
1113
+ end
1004
1114
1005
1115
(* Output the assembly code for an instruction *)
1006
1116
@@ -1020,22 +1130,10 @@ let emit_instr i =
1020
1130
| Lop(Intop_atomic _) ->
1021
1131
(* Never generated; builtins are not yet translated to atomics *)
1022
1132
assert false
1023
- | Lop (Reinterpret_cast (Int64_of_float | Float_of_int64)) ->
1024
- ` fmov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
1025
- | Lop(Static_cast (Int_of_float Float64)) ->
1026
- ` fcvtzs {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
1027
- | Lop(Static_cast (Float_of_int Float64)) ->
1028
- ` scvtf {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n`
1029
- | Lop (Reinterpret_cast (Float32_of_float | Float_of_float32 |
1030
- Int32_of_float32 | Float32_of_int32))
1031
- | Lop (Static_cast (Float_of_int Float32 | Int_of_float Float32 |
1032
- Float_of_float32 | Float32_of_float)) ->
1033
- (* CR mslater: (float32) arm64 *)
1034
- Misc.fatal_error "float32 not supported on this architecture"
1035
- | Lop(Reinterpret_cast V128_of_v128)
1036
- | Lop(Static_cast (V128_of_scalar _ | Scalar_of_v128 _)) ->
1037
- (* CR mslater: (SIMD) arm64 *)
1038
- Misc.fatal_error "SIMD is not supported on this architecture"
1133
+ | Lop (Reinterpret_cast cast) ->
1134
+ emit_reinterpret_cast cast i
1135
+ | Lop (Static_cast cast) ->
1136
+ emit_static_cast cast i
1039
1137
| Lop(Move | Spill | Reload) ->
1040
1138
move i.arg.(0) i.res.(0)
1041
1139
| Lop(Specific Imove32) ->
@@ -1294,19 +1392,6 @@ let emit_instr i =
1294
1392
` fmsub {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n`
1295
1393
| Lop(Specific(Inegmulsubf)) ->
1296
1394
` fnmsub {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n`
1297
- | Lop(Reinterpret_cast (Int_of_value | Value_of_int)) ->
1298
- let src = i.arg.(0) and dst = i.res.(0) in
1299
- if src.loc <> dst.loc then begin
1300
- match (src, dst) with
1301
- | {loc = Reg _}, {loc = Reg _} ->
1302
- ` mov {emit_reg dst}, {emit_reg src}\n`
1303
- | {loc = Reg _}, {loc = Stack _} ->
1304
- ` str {emit_reg src}, {emit_stack dst}\n`
1305
- | {loc = Stack _}, {loc = Reg _} ->
1306
- ` ldr {emit_reg dst}, {emit_stack src}\n`
1307
- | _ ->
1308
- assert false
1309
- end[@warning "-4"]
1310
1395
| Lop(Opaque) ->
1311
1396
assert (i.arg.(0).loc = i.res.(0).loc)
1312
1397
| Lop(Specific(Ishiftarith(op, shift))) ->
0 commit comments