Skip to content

Commit

Permalink
RISC-V: Support MASK_LEN_{LOAD_LANES,STORE_LANES}
Browse files Browse the repository at this point in the history
This patch allow us auto-vectorize this following case:

  void __attribute__ ((noinline, noclone))                                     \
  NAME##_8 (OUTTYPE *__restrict dest, INTYPE *__restrict src,                  \
	    MASKTYPE *__restrict cond, intptr_t n)                             \
  {                                                                            \
    for (intptr_t i = 0; i < n; ++i)                                           \
      if (cond[i])                                                             \
	dest[i] = (src[i * 8] + src[i * 8 + 1] + src[i * 8 + 2]                \
		   + src[i * 8 + 3] + src[i * 8 + 4] + src[i * 8 + 5]          \
		   + src[i * 8 + 6] + src[i * 8 + 7]);                         \
  }

  TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, int32_t)                               \

  TEST2 (NAME##_i32, OUTTYPE, int32_t)                                         \

  TEST1 (NAME##_i32, int32_t)                                                  \

TEST (test)

ASM:

test_i32_i32_f32_8:
	ble	a3,zero,.L5
.L3:
	vsetvli	a4,a3,e8,mf4,ta,ma
	vle32.v	v0,0(a2)
	vsetvli	a5,zero,e32,m1,ta,ma
	vmsne.vi	v0,v0,0
	vsetvli	zero,a4,e32,m1,ta,ma
	vlseg8e32.v	v8,(a1),v0.t
	vsetvli	a5,zero,e32,m1,ta,ma
	slli	a6,a4,2
	vadd.vv	v1,v9,v8
	slli	a7,a4,5
	vadd.vv	v1,v1,v10
	sub	a3,a3,a4
	vadd.vv	v1,v1,v11
	vadd.vv	v1,v1,v12
	vadd.vv	v1,v1,v13
	vadd.vv	v1,v1,v14
	vadd.vv	v1,v1,v15
	vsetvli	zero,a4,e32,m1,ta,ma
	vse32.v	v1,0(a0),v0.t
	add	a2,a2,a6
	add	a1,a1,a7
	add	a0,a0,a6
	bne	a3,zero,.L3
.L5:
	ret

gcc/ChangeLog:

	* config/riscv/autovec.md (vec_mask_len_load_lanes<mode><vsingle>):
	New pattern.
	(vec_mask_len_store_lanes<mode><vsingle>): Ditto.
	* config/riscv/riscv-protos.h (expand_lanes_load_store): New function.
	* config/riscv/riscv-v.cc (get_mask_mode): Add tuple mask mode.
	(expand_lanes_load_store): New function.
	* config/riscv/vector-iterators.md: New iterator.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-2.c:
	Adapt test.
	* gcc.target/riscv/rvv/autovec/partial/slp-1.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-16.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-17.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-18.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-19.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-2.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-3.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-4.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-5.c: Ditto.
	* gcc.target/riscv/rvv/autovec/partial/slp-6.c: Ditto.
	* gcc.target/riscv/rvv/rvv.exp: Add lanes tests.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-1.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-2.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-3.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-4.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-5.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-6.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load-7.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-1.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-2.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-3.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-4.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-5.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-6.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_load_run-7.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-1.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-2.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-3.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-4.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-5.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-6.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store-7.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-1.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-2.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-3.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-4.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-5.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-6.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/mask_struct_store_run-7.c:
	New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-1.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-10.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-11.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-12.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-13.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-14.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-15.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-16.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-17.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-18.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-2.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-3.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-4.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-5.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-6.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-7.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-8.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect-9.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-10.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-11.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-12.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-13.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-14.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-15.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-16.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-17.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-18.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-3.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-4.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-5.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-6.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-7.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-8.c: New test.
	* gcc.target/riscv/rvv/autovec/struct/struct_vect_run-9.c: New test.
  • Loading branch information
zhongjuzhe authored and Incarnation-p-lee committed Aug 16, 2023
1 parent d5acdd6 commit fe57888
Show file tree
Hide file tree
Showing 80 changed files with 2,841 additions and 21 deletions.
30 changes: 30 additions & 0 deletions gcc/config/riscv/autovec.md
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,36 @@
DONE;
})

;; =========================================================================
;; == Array Load/Store
;; =========================================================================

(define_expand "vec_mask_len_load_lanes<mode><vsingle>"
[(match_operand:VT 0 "register_operand")
(match_operand:VT 1 "memory_operand")
(match_operand:<VM> 2 "vector_mask_operand")
(match_operand 3 "autovec_length_operand")
(match_operand 4 "const_0_operand")]
"TARGET_VECTOR"
{
riscv_vector::expand_lanes_load_store (operands, true);
DONE;
}
)

(define_expand "vec_mask_len_store_lanes<mode><vsingle>"
[(match_operand:VT 0 "memory_operand")
(match_operand:VT 1 "register_operand")
(match_operand:<VM> 2 "vector_mask_operand")
(match_operand 3 "autovec_length_operand")
(match_operand 4 "const_0_operand")]
"TARGET_VECTOR"
{
riscv_vector::expand_lanes_load_store (operands, false);
DONE;
}
)

;; =========================================================================
;; == Vector creation
;; =========================================================================
Expand Down
1 change: 1 addition & 0 deletions gcc/config/riscv/riscv-protos.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ void expand_load_store (rtx *, bool);
void expand_gather_scatter (rtx *, bool);
void expand_cond_len_ternop (unsigned, rtx *);
void prepare_ternary_operands (rtx *, bool = false);
void expand_lanes_load_store (rtx *, bool);

/* Rounding mode bitfield for fixed point VXRM. */
enum fixed_point_rounding_mode
Expand Down
52 changes: 51 additions & 1 deletion gcc/config/riscv/riscv-v.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1900,7 +1900,13 @@ get_avl_type_rtx (enum avl_type type)
machine_mode
get_mask_mode (machine_mode mode)
{
return get_vector_mode (BImode, GET_MODE_NUNITS (mode)).require();
poly_int64 nunits = GET_MODE_NUNITS (mode);
if (riscv_v_ext_tuple_mode_p (mode))
{
unsigned int nf = get_nf (mode);
nunits = exact_div (nunits, nf);
}
return get_vector_mode (BImode, nunits).require ();
}

/* Return the appropriate M1 mode for MODE. */
Expand Down Expand Up @@ -3716,4 +3722,48 @@ prepare_ternary_operands (rtx *ops, bool split_p)
}
}

/* Expand VEC_MASK_LEN_{LOAD_LANES,STORE_LANES}. */
void
expand_lanes_load_store (rtx *ops, bool is_load)
{
poly_int64 value;
rtx mask = ops[2];
rtx len = ops[3];
rtx addr = is_load ? XEXP (ops[1], 0) : XEXP (ops[0], 0);
rtx reg = is_load ? ops[0] : ops[1];
machine_mode mode = GET_MODE (ops[0]);

if (poly_int_rtx_p (len, &value) && known_eq (value, GET_MODE_NUNITS (mode)))
{
/* If the length operand is equal to VF, it is VLMAX load/store. */
if (is_load)
{
rtx m_ops[] = {reg, mask, RVV_VUNDEF (mode), addr};
emit_vlmax_masked_insn (code_for_pred_unit_strided_load (mode),
RVV_UNOP_M, m_ops);
}
else
{
len = gen_reg_rtx (Pmode);
emit_vlmax_vsetvl (mode, len);
emit_insn (gen_pred_unit_strided_store (mode, mask, addr, reg, len,
get_avl_type_rtx (VLMAX)));
}
}
else
{
if (!satisfies_constraint_K (len))
len = force_reg (Pmode, len);
if (is_load)
{
rtx m_ops[] = {reg, mask, RVV_VUNDEF (mode), addr};
emit_nonvlmax_masked_insn (code_for_pred_unit_strided_load (mode),
RVV_UNOP_M, m_ops, len);
}
else
emit_insn (gen_pred_unit_strided_store (mode, mask, addr, reg, len,
get_avl_type_rtx (NONVLMAX)));
}
}

} // namespace riscv_vector
95 changes: 95 additions & 0 deletions gcc/config/riscv/vector-iterators.md
Original file line number Diff line number Diff line change
Expand Up @@ -1417,6 +1417,101 @@
(V1DF "df") (V2DF "df") (V4DF "df") (V8DF "df") (V16DF "df") (V32DF "df") (V64DF "df") (V128DF "df") (V256DF "df") (V512DF "df")
])

(define_mode_attr vsingle [
(RVVM1x8QI "rvvm1qi") (RVVMF2x8QI "rvvmf2qi") (RVVMF4x8QI "rvvmf4qi") (RVVMF8x8QI "rvvmf8qi")
(RVVM1x7QI "rvvm1qi") (RVVMF2x7QI "rvvmf2qi") (RVVMF4x7QI "rvvmf4qi") (RVVMF8x7QI "rvvmf8qi")
(RVVM1x6QI "rvvm1qi") (RVVMF2x6QI "rvvmf2qi") (RVVMF4x6QI "rvvmf4qi") (RVVMF8x6QI "rvvmf8qi")
(RVVM1x5QI "rvvm1qi") (RVVMF2x5QI "rvvmf2qi") (RVVMF4x5QI "rvvmf4qi") (RVVMF8x5QI "rvvmf8qi")
(RVVM2x4QI "rvvm2qi") (RVVM1x4QI "rvvm1qi") (RVVMF2x4QI "rvvmf2qi") (RVVMF4x4QI "rvvmf4qi") (RVVMF8x4QI "rvvmf8qi")
(RVVM2x3QI "rvvm2qi") (RVVM1x3QI "rvvm1qi") (RVVMF2x3QI "rvvmf2qi") (RVVMF4x3QI "rvvmf4qi") (RVVMF8x3QI "rvvmf8qi")
(RVVM4x2QI "rvvm4qi") (RVVM2x2QI "rvvm1qi") (RVVM1x2QI "rvvm1qi") (RVVMF2x2QI "rvvmf2qi") (RVVMF4x2QI "rvvmf4qi") (RVVMF8x2QI "rvvmf8qi")

(RVVM1x8HI "rvvm1hi") (RVVMF2x8HI "rvvmf2hi") (RVVMF4x8HI "rvvmf4hi")
(RVVM1x7HI "rvvm1hi") (RVVMF2x7HI "rvvmf2hi") (RVVMF4x7HI "rvvmf4hi")
(RVVM1x6HI "rvvm1hi") (RVVMF2x6HI "rvvmf2hi") (RVVMF4x6HI "rvvmf4hi")
(RVVM1x5HI "rvvm1hi") (RVVMF2x5HI "rvvmf2hi") (RVVMF4x5HI "rvvmf4hi")
(RVVM2x4HI "rvvm2hi") (RVVM1x4HI "rvvm1hi") (RVVMF2x4HI "rvvmf2hi") (RVVMF4x4HI "rvvmf4hi")
(RVVM2x3HI "rvvm2hi") (RVVM1x3HI "rvvm1hi") (RVVMF2x3HI "rvvmf2hi") (RVVMF4x3HI "rvvmf4hi")
(RVVM4x2HI "rvvm4hi") (RVVM2x2HI "rvvm2hi") (RVVM1x2HI "rvvm1hi") (RVVMF2x2HI "rvvmf2hi") (RVVMF4x2HI "rvvmf4hi")

(RVVM1x8HF "rvvm1hf")
(RVVMF2x8HF "rvvmf2hf")
(RVVMF4x8HF "rvvmf4hf")
(RVVM1x7HF "rvvm1hf")
(RVVMF2x7HF "rvvmf2hf")
(RVVMF4x7HF "rvvmf4hf")
(RVVM1x6HF "rvvm1hf")
(RVVMF2x6HF "rvvmf2hf")
(RVVMF4x6HF "rvvmf4hf")
(RVVM1x5HF "rvvm1hf")
(RVVMF2x5HF "rvvmf2hf")
(RVVMF4x5HF "rvvmf4hf")
(RVVM2x4HF "rvvm2hf")
(RVVM1x4HF "rvvm1hf")
(RVVMF2x4HF "rvvmf2hf")
(RVVMF4x4HF "rvvmf4hf")
(RVVM2x3HF "rvvm2hf")
(RVVM1x3HF "rvvm1hf")
(RVVMF2x3HF "rvvmf2hf")
(RVVMF4x3HF "rvvmf4hf")
(RVVM4x2HF "rvvm4hf")
(RVVM2x2HF "rvvm2hf")
(RVVM1x2HF "rvvm1hf")
(RVVMF2x2HF "rvvmf2hf")
(RVVMF4x2HF "rvvmf4hf")

(RVVM1x8SI "rvvm1si") (RVVMF2x8SI "rvvmf2si")
(RVVM1x7SI "rvvm1si") (RVVMF2x7SI "rvvmf2si")
(RVVM1x6SI "rvvm1si") (RVVMF2x6SI "rvvmf2si")
(RVVM1x5SI "rvvm1si") (RVVMF2x5SI "rvvmf2si")
(RVVM2x4SI "rvvm2si") (RVVM1x4SI "rvvm1si") (RVVMF2x4SI "rvvmf2si")
(RVVM2x3SI "rvvm2si") (RVVM1x3SI "rvvm1si") (RVVMF2x3SI "rvvmf2si")
(RVVM4x2SI "rvvm4si") (RVVM2x2SI "rvvm2si") (RVVM1x2SI "rvvm1si") (RVVMF2x2SI "rvvmf2si")

(RVVM1x8SF "rvvm1sf")
(RVVMF2x8SF "rvvmf2sf")
(RVVM1x7SF "rvvm1sf")
(RVVMF2x7SF "rvvmf2sf")
(RVVM1x6SF "rvvm1sf")
(RVVMF2x6SF "rvvmf2sf")
(RVVM1x5SF "rvvm1sf")
(RVVMF2x5SF "rvvmf2sf")
(RVVM2x4SF "rvvm2sf")
(RVVM1x4SF "rvvm1sf")
(RVVMF2x4SF "rvvmf2sf")
(RVVM2x3SF "rvvm2sf")
(RVVM1x3SF "rvvm1sf")
(RVVMF2x3SF "rvvmf2sf")
(RVVM4x2SF "rvvm4sf")
(RVVM2x2SF "rvvm2sf")
(RVVM1x2SF "rvvm1sf")
(RVVMF2x2SF "rvvmf2sf")

(RVVM1x8DI "rvvm1di")
(RVVM1x7DI "rvvm1di")
(RVVM1x6DI "rvvm1di")
(RVVM1x5DI "rvvm1di")
(RVVM2x4DI "rvvm2di")
(RVVM1x4DI "rvvm1di")
(RVVM2x3DI "rvvm2di")
(RVVM1x3DI "rvvm1di")
(RVVM4x2DI "rvvm4di")
(RVVM2x2DI "rvvm2di")
(RVVM1x2DI "rvvm1di")

(RVVM1x8DF "rvvm1df")
(RVVM1x7DF "rvvm1df")
(RVVM1x6DF "rvvm1df")
(RVVM1x5DF "rvvm1df")
(RVVM2x4DF "rvvm2df")
(RVVM1x4DF "rvvm1df")
(RVVM2x3DF "rvvm2df")
(RVVM1x3DF "rvvm1df")
(RVVM4x2DF "rvvm4df")
(RVVM2x2DF "rvvm2df")
(RVVM1x2DF "rvvm1df")
])

(define_mode_attr VSUBEL [
(RVVM8HI "QI") (RVVM4HI "QI") (RVVM2HI "QI") (RVVM1HI "QI") (RVVMF2HI "QI") (RVVMF4HI "QI")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,6 @@

TEST_ALL (TEST_LOOP)

/* { dg-final { scan-tree-dump-times " \.MASK_LEN_GATHER_LOAD" 46 "optimized" } } */
/* { dg-final { scan-tree-dump-times " \.MASK_LEN_GATHER_LOAD" 33 "optimized" } } */
/* { dg-final { scan-tree-dump-not " \.GATHER_LOAD" "optimized" } } */
/* { dg-final { scan-tree-dump-not " \.MASK_GATHER_LOAD" "optimized" } } */
7 changes: 4 additions & 3 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ f (int8_t *restrict a, int8_t *restrict b, int n)
}
}

/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
/* { dg-final { scan-assembler {\tvid\.v} } } */
/* { dg-final { scan-assembler {\tvand} } } */
/* FIXME: Since we don't have VECT cost model yet, LOAD_LANES/STORE_LANES are chosen instead of SLP. */
/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" { xfail *-*-* } } } */
/* { dg-final { scan-assembler {\tvid\.v} { xfail *-*-* } } } */
/* { dg-final { scan-assembler {\tvand} { xfail *-*-* } } } */
5 changes: 3 additions & 2 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-16.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ f (uint8_t *restrict a, uint8_t *restrict b, int n)
}
}

/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
/* { dg-final { scan-assembler {\tvid\.v} } } */
/* FIXME: Since we don't have VECT cost model yet, LOAD_LANES/STORE_LANES are chosen instead of SLP. */
/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" { xfail *-*-* } } } */
/* { dg-final { scan-assembler {\tvid\.v} { xfail *-*-* } } } */
/* { dg-final { scan-assembler-not {\tvmul} } } */
5 changes: 3 additions & 2 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-17.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ f (uint8_t *restrict a, uint8_t *restrict b,
}
}

/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 2 "optimized" } } */
/* { dg-final { scan-assembler {\tvid\.v} } } */
/* FIXME: Since we don't have VECT cost model yet, LOAD_LANES/STORE_LANES are chosen instead of SLP. */
/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 2 "optimized" { xfail *-*-* } } } */
/* { dg-final { scan-assembler {\tvid\.v} { xfail *-*-* } } } */
/* { dg-final { scan-assembler-not {\tvmul} } } */
5 changes: 3 additions & 2 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-18.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ f (float *restrict a, float *restrict b,
}
}

/* { dg-final { scan-tree-dump "\.VEC_PERM" "optimized" } } */
/* { dg-final { scan-assembler {\tvid\.v} } } */
/* FIXME: Since we don't have VECT cost model yet, LOAD_LANES/STORE_LANES are chosen instead of SLP. */
/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" { xfail *-*-* } } } */
/* { dg-final { scan-assembler {\tvid\.v} { xfail *-*-* } } } */
/* { dg-final { scan-assembler-not {\tvmul} } } */
4 changes: 2 additions & 2 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-19.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ f (float *restrict a, float *restrict b,
}
}

/* { dg-final { scan-tree-dump "\.VEC_PERM" "optimized" } } */
/* { dg-final { scan-assembler {\tvid\.v} } } */
/* { dg-final { scan-tree-dump "\.VEC_PERM" "optimized" { xfail *-*-* } } } */
/* { dg-final { scan-assembler {\tvid\.v} { xfail *-*-* } } } */
/* { dg-final { scan-assembler-not {\tvmul} } } */
5 changes: 3 additions & 2 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-2.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fdump-tree-optimized-details" } */

#include <stdint-gcc.h>

Expand All @@ -19,4 +19,5 @@ f (int16_t *restrict a, int16_t *restrict b, int n)
}
}

/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
/* FIXME: Since we don't have VECT cost model yet, LOAD_LANES/STORE_LANES are chosen instead of SLP. */
/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" { xfail *-*-* } } } */
3 changes: 2 additions & 1 deletion gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-3.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ f (int8_t *restrict a, int8_t *restrict b, int n)
}
}

/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
/* FIXME: Since we don't have VECT cost model yet, LOAD_LANES/STORE_LANES are chosen instead of SLP. */
/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" { xfail *-*-* } } } */
5 changes: 3 additions & 2 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-4.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fno-vect-cost-model -fdump-tree-optimized-details" } */

#include <stdint-gcc.h>

Expand All @@ -19,4 +19,5 @@ f (int16_t *restrict a, int16_t *restrict b, int n)
}
}

/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
/* FIXME: Since we don't have VECT cost model yet, LOAD_LANES/STORE_LANES are chosen instead of SLP. */
/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" { xfail *-*-* } } } */
3 changes: 2 additions & 1 deletion gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-5.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ f (int8_t *restrict a, int8_t *restrict b, int n)
}
}

/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
/* FIXME: Since we don't have VECT cost model yet, LOAD_LANES/STORE_LANES are chosen instead of SLP. */
/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" { xfail *-*-* } } } */
5 changes: 3 additions & 2 deletions gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/slp-6.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param riscv-autovec-preference=scalable -fdump-tree-optimized-details -fno-vect-cost-model" } */

#include <stdint-gcc.h>

Expand All @@ -19,5 +19,6 @@ f (uint8_t *restrict a, uint8_t *restrict b, int n)
}
}

/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" } } */
/* FIXME: Since we don't have VECT cost model yet, LOAD_LANES/STORE_LANES are chosen instead of SLP. */
/* { dg-final { scan-tree-dump-times "\.VEC_PERM" 1 "optimized" { xfail *-*-* } } } */

Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/* { dg-do compile } */
/* { dg-additional-options "-std=c99 -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */

#include <stdint-gcc.h>

#define TEST_LOOP(NAME, OUTTYPE, INTYPE, MASKTYPE) \
void __attribute__ ((noinline, noclone)) \
NAME##_2 (OUTTYPE *__restrict dest, INTYPE *__restrict src, \
MASKTYPE *__restrict cond, intptr_t n) \
{ \
for (intptr_t i = 0; i < n; ++i) \
if (cond[i]) \
dest[i] = src[i * 2] + src[i * 2 + 1]; \
}

#define TEST2(NAME, OUTTYPE, INTYPE) \
TEST_LOOP (NAME##_i8, OUTTYPE, INTYPE, int8_t) \
TEST_LOOP (NAME##_i16, OUTTYPE, INTYPE, uint16_t) \
TEST_LOOP (NAME##_f32, OUTTYPE, INTYPE, float) \
TEST_LOOP (NAME##_f64, OUTTYPE, INTYPE, double)

#define TEST1(NAME, OUTTYPE) \
TEST2 (NAME##_i8, OUTTYPE, int8_t) \
TEST2 (NAME##_i16, OUTTYPE, uint16_t) \
TEST2 (NAME##_i32, OUTTYPE, int32_t) \
TEST2 (NAME##_i64, OUTTYPE, uint64_t)

#define TEST(NAME) \
TEST1 (NAME##_i8, int8_t) \
TEST1 (NAME##_i16, uint16_t) \
TEST1 (NAME##_i32, int32_t) \
TEST1 (NAME##_i64, uint64_t) \
TEST2 (NAME##_f16_f16, _Float16, _Float16) \
TEST2 (NAME##_f32_f32, float, float) \
TEST2 (NAME##_f64_f64, double, double)

TEST (test)

/* { dg-final { scan-assembler-times {vlseg2e8\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 16 } } */
/* { dg-final { scan-assembler-times {vlseg2e16\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
/* { dg-final { scan-assembler-times {vlseg2e32\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
/* { dg-final { scan-assembler-times {vlseg2e64\.v\s+v[0-9]+,\s*\([a-x0-9]+\),\s*v0.t} 20 } } */
Loading

0 comments on commit fe57888

Please sign in to comment.