diff --git a/backend/arm64/emit.mlp b/backend/arm64/emit.mlp index c909aa8dbaa..1c672a02cb3 100644 --- a/backend/arm64/emit.mlp +++ b/backend/arm64/emit.mlp @@ -785,20 +785,14 @@ module BR = Branch_relaxation.Make (struct | Lop (Reinterpret_cast (Value_of_int | Int_of_value | Float_of_int64 | Int64_of_float)) -> 1 | Lop (Reinterpret_cast (Float32_of_float | Float_of_float32 | - Float32_of_int32 | Int32_of_float32)) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" - | Lop (Reinterpret_cast V128_of_v128) -> - (* CR mslater: (SIMD) arm64 *) - Misc.fatal_error "SIMD is not supported on this architecture" + Float32_of_int32 | Int32_of_float32)) -> 1 + | Lop (Reinterpret_cast V128_of_v128) -> 1 | Lop (Static_cast (Float_of_int Float64 | Int_of_float Float64)) -> 1 | Lop (Static_cast (Float_of_int Float32 | Int_of_float Float32 | - Float_of_float32 | Float32_of_float)) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 is not supported on this architecture" - | Lop (Static_cast (V128_of_scalar _ | Scalar_of_v128 _)) -> - (* CR mslater: (SIMD) arm64 *) - Misc.fatal_error "SIMD is not supported on this architecture" + Float_of_float32 | Float32_of_float)) -> 1 + | Lop (Static_cast (Scalar_of_v128 (Int8x16 | Int16x8))) -> 2 + | Lop (Static_cast (Scalar_of_v128 (Int32x4 | Int64x2 | Float32x4 | Float64x2))) -> 1 + | Lop (Static_cast (V128_of_scalar _ )) -> 1 | Lop (Floatop (Float64, (Iaddf | Isubf | Imulf | Idivf))) -> 1 | Lop (Floatop (Float32, (Iaddf | Isubf | Imulf | Idivf))) -> 1 | Lop (Specific Inegmulf) -> 1 @@ -986,21 +980,137 @@ let emit_load_literal dst lbl = ` ldr {emit_reg dst}, [{emit_reg reg_tmp1}, #:lo12:{emit_label lbl}]\n` end - -let move src dst = - if src.loc <> dst.loc then begin - match (src, dst) with - | {loc = Reg _; typ = Float}, {loc = Reg _} -> - ` fmov {emit_reg dst}, {emit_reg src}\n` - | {loc = Reg _}, {loc = Reg _} -> - ` mov {emit_reg dst}, {emit_reg src}\n` - | {loc = Reg _}, {loc = Stack _} -> - ` str {emit_reg src}, {emit_stack dst}\n` - | {loc = Stack _}, {loc = Reg _} -> - ` ldr {emit_reg dst}, {emit_stack src}\n` - | _ -> - assert false - end[@@warning "-4"] +let move (src : Reg.t) (dst : Reg.t) = + let distinct = not (Reg.same_loc src dst) in + if distinct then + match src.typ, src.loc, dst.typ, dst.loc with + | Float, Reg _, Float, Reg _ + | Float32, Reg _, Float32, Reg _ + -> + DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |] + | (Vec128|Valx2), Reg _, (Vec128|Valx2), Reg _ -> + DSL.ins I.MOV [| DSL.emit_reg_v2d dst; DSL.emit_reg_v2d src |] + | (Int | Val | Addr), Reg _, (Int | Val | Addr), Reg _ -> + DSL.ins I.MOV [| DSL.emit_reg dst; DSL.emit_reg src |] + | _, Reg _, _, Stack _ -> + ` str {emit_reg src}, {emit_stack dst}\n` + | _, Stack _, _, Reg _ -> + ` ldr {emit_reg dst}, {emit_stack src}\n` + | _, Stack _, _, Stack _ -> + Misc.fatal_errorf + "Illegal move between registers (%a to %a)\n" + Printreg.reg src Printreg.reg dst + | _, Unknown, _, (Reg _ | Stack _ | Unknown) + | _, (Reg _ | Stack _), _, Unknown -> + Misc.fatal_errorf + "Illegal move with an unknown register location (%a to %a)\n" + Printreg.reg src Printreg.reg dst + | (Float | Float32 | Vec128 | Int | Val | Addr | Valx2), (Reg _), _, _ -> + Misc.fatal_errorf + "Illegal move between registers of differing types (%a to %a)\n" + Printreg.reg src Printreg.reg dst + +let emit_reinterpret_cast (cast : Cmm.reinterpret_cast) i = + let src = i.arg.(0) in + let dst = i.res.(0) in + let distinct = not (Reg.same_loc src dst) in + match cast with + | Int64_of_float -> + DSL.check_reg Float src; + DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |] + | Float_of_int64 -> + DSL.check_reg Float dst; + DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |] + | Float32_of_int32 -> + DSL.check_reg Float32 dst; + DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg_w src |] + | Int32_of_float32 -> + DSL.check_reg Float32 src; + DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg src |] + | Float32_of_float -> + if distinct then ( + DSL.check_reg Float src; + DSL.check_reg Float32 dst; + DSL.ins I.MOV [| DSL.emit_reg_d dst; DSL.emit_reg_d src |]) + | Float_of_float32 -> + if distinct then ( + DSL.check_reg Float32 src; + DSL.check_reg Float dst; + DSL.ins I.MOV [| DSL.emit_reg_d dst; DSL.emit_reg_d src |]) + | V128_of_v128 -> + if distinct then ( + DSL.check_reg Vec128 src; + DSL.check_reg Vec128 dst; + DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg src |]) + | Int_of_value | Value_of_int -> move src dst + +let emit_static_cast (cast : Cmm.static_cast) i = + let dst = i.res.(0) in + let src = i.arg.(0) in + let distinct = not (Reg.same_loc src dst) in + match cast with + | Int_of_float Float64 -> + DSL.check_reg Float src; + DSL.ins I.FCVTZS[| DSL.emit_reg dst; DSL.emit_reg src |] + | Int_of_float Float32 -> + DSL.check_reg Float32 src; + DSL.ins I.FCVTZS[| DSL.emit_reg dst; DSL.emit_reg src |] + | Float_of_int Float64 -> + DSL.check_reg Float dst; + DSL.ins I.SCVTF [| DSL.emit_reg dst; DSL.emit_reg src |]; + | Float_of_int Float32 -> + DSL.check_reg Float32 dst; + DSL.ins I.SCVTF [| DSL.emit_reg dst; DSL.emit_reg src |]; + | Float_of_float32 -> + DSL.check_reg Float dst; + DSL.check_reg Float32 src; + DSL.ins I.FCVT [| DSL.emit_reg dst; DSL.emit_reg src |]; + | Float32_of_float -> + DSL.check_reg Float32 dst; + DSL.check_reg Float src; + DSL.ins I.FCVT [| DSL.emit_reg dst; DSL.emit_reg src |]; + | Scalar_of_v128 v -> + DSL.check_reg Vec128 src; + begin match v with + | Int8x16 -> + DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg_s src |]; + DSL.ins I.UXTB [| DSL.emit_reg dst; DSL.emit_reg_w dst; |]; + | Int16x8 -> + DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg_s src |]; + DSL.ins I.UXTH [| DSL.emit_reg dst; DSL.emit_reg_w dst; |]; + | Int32x4 -> + DSL.ins I.FMOV [| DSL.emit_reg_w dst; DSL.emit_reg_s src |] + | Int64x2 -> + DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg_d src |] + | Float32x4 -> + if distinct then ( + DSL.check_reg Float32 dst; + DSL.ins I.FMOV [| DSL.emit_reg dst; DSL.emit_reg_s src |]) + | Float64x2 -> + if distinct then ( + DSL.check_reg Float dst; + DSL.ins I.FMOV [| DSL.emit_reg dst ; DSL.emit_reg_d src |]) + end + | V128_of_scalar v -> + DSL.check_reg Vec128 dst; + begin match v with + | Int8x16 -> + DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg_w src |]; + | Int16x8 -> + DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg_w src |]; + | Int32x4 -> + DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg_w src |] + | Int64x2 -> + DSL.ins I.FMOV [| DSL.emit_reg_d dst; DSL.emit_reg src |] + | Float32x4 -> + if distinct then ( + DSL.check_reg Float32 src; + DSL.ins I.FMOV [| DSL.emit_reg_s dst; DSL.emit_reg src |]) + | Float64x2 -> + if distinct then ( + DSL.check_reg Float src; + DSL.ins I.FMOV [| DSL.emit_reg_d dst ; DSL.emit_reg src |]) + end (* Output the assembly code for an instruction *) @@ -1020,22 +1130,10 @@ let emit_instr i = | Lop(Intop_atomic _) -> (* Never generated; builtins are not yet translated to atomics *) assert false - | Lop (Reinterpret_cast (Int64_of_float | Float_of_int64)) -> - ` fmov {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` - | Lop(Static_cast (Int_of_float Float64)) -> - ` fcvtzs {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` - | Lop(Static_cast (Float_of_int Float64)) -> - ` scvtf {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}\n` - | Lop (Reinterpret_cast (Float32_of_float | Float_of_float32 | - Int32_of_float32 | Float32_of_int32)) - | Lop (Static_cast (Float_of_int Float32 | Int_of_float Float32 | - Float_of_float32 | Float32_of_float)) -> - (* CR mslater: (float32) arm64 *) - Misc.fatal_error "float32 not supported on this architecture" - | Lop(Reinterpret_cast V128_of_v128) - | Lop(Static_cast (V128_of_scalar _ | Scalar_of_v128 _)) -> - (* CR mslater: (SIMD) arm64 *) - Misc.fatal_error "SIMD is not supported on this architecture" + | Lop (Reinterpret_cast cast) -> + emit_reinterpret_cast cast i + | Lop (Static_cast cast) -> + emit_static_cast cast i | Lop(Move | Spill | Reload) -> move i.arg.(0) i.res.(0) | Lop(Specific Imove32) -> @@ -1294,19 +1392,6 @@ let emit_instr i = ` fmsub {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n` | Lop(Specific(Inegmulsubf)) -> ` fnmsub {emit_reg i.res.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}, {emit_reg i.arg.(0)}\n` - | Lop(Reinterpret_cast (Int_of_value | Value_of_int)) -> - let src = i.arg.(0) and dst = i.res.(0) in - if src.loc <> dst.loc then begin - match (src, dst) with - | {loc = Reg _}, {loc = Reg _} -> - ` mov {emit_reg dst}, {emit_reg src}\n` - | {loc = Reg _}, {loc = Stack _} -> - ` str {emit_reg src}, {emit_stack dst}\n` - | {loc = Stack _}, {loc = Reg _} -> - ` ldr {emit_reg dst}, {emit_stack src}\n` - | _ -> - assert false - end[@warning "-4"] | Lop(Opaque) -> assert (i.arg.(0).loc = i.res.(0).loc) | Lop(Specific(Ishiftarith(op, shift))) ->