/* { dg-do assemble { target aarch64_asm_sve-b16b16_ok } } */
/* { dg-do compile { target { ! aarch64_asm_sve-b16b16_ok } } } */
/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */

#include "test_sve_acle.h"

#pragma GCC target "+sve-b16b16"
#ifdef STREAMING_COMPATIBLE
#pragma GCC target "+sme2"
#endif

/*
** mul_bf16_m_tied1:
**	bfmul	z0\.h, p0/m, z0\.h, z1\.h
**	ret
*/
TEST_UNIFORM_Z (mul_bf16_m_tied1, svbfloat16_t,
		z0 = svmul_bf16_m (p0, z0, z1),
		z0 = svmul_m (p0, z0, z1))

/*
** mul_bf16_m_tied2:
**	mov	(z[0-9]+)\.d, z0\.d
**	movprfx	z0, z1
**	bfmul	z0\.h, p0/m, z0\.h, \1\.h
**	ret
*/
TEST_UNIFORM_Z (mul_bf16_m_tied2, svbfloat16_t,
		z0 = svmul_bf16_m (p0, z1, z0),
		z0 = svmul_m (p0, z1, z0))

/*
** mul_bf16_m_untied:
**	movprfx	z0, z1
**	bfmul	z0\.h, p0/m, z0\.h, z2\.h
**	ret
*/
TEST_UNIFORM_Z (mul_bf16_m_untied, svbfloat16_t,
		z0 = svmul_bf16_m (p0, z1, z2),
		z0 = svmul_m (p0, z1, z2))

/*
** mul_h4_bf16_m_tied1:
**	mov	(z[0-9]+\.h), h4
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_ZD (mul_h4_bf16_m_tied1, svbfloat16_t, __bf16,
		 z0 = svmul_n_bf16_m (p0, z0, d4),
		 z0 = svmul_m (p0, z0, d4))

/*
** mul_h4_bf16_m_untied:
**	mov	(z[0-9]+\.h), h4
**	movprfx	z0, z1
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_ZD (mul_h4_bf16_m_untied, svbfloat16_t, __bf16,
		 z0 = svmul_n_bf16_m (p0, z1, d4),
		 z0 = svmul_m (p0, z1, d4))

/*
** mul_1_bf16_m:
**	fmov	(z[0-9]+\.h), #1\.875(?:e\+0)?
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_Z (mul_1_bf16_m, svbfloat16_t,
		z0 = svmul_n_bf16_m (p0, z0, 1),
		z0 = svmul_m (p0, z0, 1))

/*
** mul_0p5_bf16_m:
**	fmov	(z[0-9]+\.h), #1\.75(?:e\+0)?
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_Z (mul_0p5_bf16_m, svbfloat16_t,
		z0 = svmul_n_bf16_m (p0, z0, (bfloat16_t) (0.5)),
		z0 = svmul_m (p0, z0, (bfloat16_t) (0.5)))

/*
** mul_m1_bf16_m:
**	fmov	(z[0-9]+\.h), #-1\.875(?:e\+0)?
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_Z (mul_m1_bf16_m, svbfloat16_t,
		z0 = svmul_n_bf16_m (p0, z0, -1),
		z0 = svmul_m (p0, z0, -1))

/*
** mul_m0p5_bf16_m:
**	fmov	(z[0-9]+\.h), #-1\.75(?:e\+0)?
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_Z (mul_m0p5_bf16_m, svbfloat16_t,
		z0 = svmul_n_bf16_m (p0, z0, (bfloat16_t) (-0.5)),
		z0 = svmul_m (p0, z0, (bfloat16_t) (-0.5)))

/*
** mul_m2_bf16_m_tied1:
**	fmov	(z[0-9]+\.h), #-2\.0(?:e\+0)?
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_Z (mul_m2_bf16_m_tied1, svbfloat16_t,
		z0 = svmul_n_bf16_m (p0, z0, -2),
		z0 = svmul_m (p0, z0, -2))

/*
** mul_m2_bf16_m_untied:
**	fmov	(z[0-9]+\.h), #-2\.0(?:e\+0)?
**	movprfx	z0, z1
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_Z (mul_m2_bf16_m_untied, svbfloat16_t,
		z0 = svmul_n_bf16_m (p0, z1, -2),
		z0 = svmul_m (p0, z1, -2))

/*
** mul_bf16_z_tied1:
**	movprfx	z0\.h, p0/z, z0\.h
**	bfmul	z0\.h, p0/m, z0\.h, z1\.h
**	ret
*/
TEST_UNIFORM_Z (mul_bf16_z_tied1, svbfloat16_t,
		z0 = svmul_bf16_z (p0, z0, z1),
		z0 = svmul_z (p0, z0, z1))

/*
** mul_bf16_z_tied2:
**	movprfx	z0\.h, p0/z, z0\.h
**	bfmul	z0\.h, p0/m, z0\.h, z1\.h
**	ret
*/
TEST_UNIFORM_Z (mul_bf16_z_tied2, svbfloat16_t,
		z0 = svmul_bf16_z (p0, z1, z0),
		z0 = svmul_z (p0, z1, z0))

/*
** mul_bf16_z_untied:
** (
**	movprfx	z0\.h, p0/z, z1\.h
**	bfmul	z0\.h, p0/m, z0\.h, z2\.h
** |
**	movprfx	z0\.h, p0/z, z2\.h
**	bfmul	z0\.h, p0/m, z0\.h, z1\.h
** )
**	ret
*/
TEST_UNIFORM_Z (mul_bf16_z_untied, svbfloat16_t,
		z0 = svmul_bf16_z (p0, z1, z2),
		z0 = svmul_z (p0, z1, z2))

/*
** mul_h4_bf16_z_tied1:
**	mov	(z[0-9]+\.h), h4
**	movprfx	z0\.h, p0/z, z0\.h
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_ZD (mul_h4_bf16_z_tied1, svbfloat16_t, __bf16,
		 z0 = svmul_n_bf16_z (p0, z0, d4),
		 z0 = svmul_z (p0, z0, d4))

/*
** mul_h4_bf16_z_untied:
**	mov	(z[0-9]+\.h), h4
** (
**	movprfx	z0\.h, p0/z, z1\.h
**	bfmul	z0\.h, p0/m, z0\.h, \1
** |
**	movprfx	z0\.h, p0/z, \1
**	bfmul	z0\.h, p0/m, z0\.h, z1\.h
** )
**	ret
*/
TEST_UNIFORM_ZD (mul_h4_bf16_z_untied, svbfloat16_t, __bf16,
		 z0 = svmul_n_bf16_z (p0, z1, d4),
		 z0 = svmul_z (p0, z1, d4))

/*
** mul_1_bf16_z:
**	fmov	(z[0-9]+\.h), #1\.875(?:e\+0)?
**	movprfx	z0\.h, p0/z, z0\.h
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_Z (mul_1_bf16_z, svbfloat16_t,
		z0 = svmul_n_bf16_z (p0, z0, 1),
		z0 = svmul_z (p0, z0, 1))

/*
** mul_bf16_x_tied1:
**	bfmul	z0\.h, p0/m, z0\.h, z1\.h
**	ret
*/
TEST_UNIFORM_Z (mul_bf16_x_tied1, svbfloat16_t,
		z0 = svmul_bf16_x (p0, z0, z1),
		z0 = svmul_x (p0, z0, z1))

/*
** mul_bf16_x_tied2:
**	bfmul	z0\.h, p0/m, z0\.h, z1\.h
**	ret
*/
TEST_UNIFORM_Z (mul_bf16_x_tied2, svbfloat16_t,
		z0 = svmul_bf16_x (p0, z1, z0),
		z0 = svmul_x (p0, z1, z0))

/*
** mul_bf16_x_untied:
** (
**	movprfx	z0, z1
**	bfmul	z0\.h, p0/m, z0\.h, z2\.h
** |
**	movprfx	z0, z2
**	bfmul	z0\.h, p0/m, z0\.h, z1\.h
** )
**	ret
*/
TEST_UNIFORM_Z (mul_bf16_x_untied, svbfloat16_t,
		z0 = svmul_bf16_x (p0, z1, z2),
		z0 = svmul_x (p0, z1, z2))

/*
** mul_h4_bf16_x_tied1:
**	mov	(z[0-9]+\.h), h4
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_ZD (mul_h4_bf16_x_tied1, svbfloat16_t, __bf16,
		 z0 = svmul_n_bf16_x (p0, z0, d4),
		 z0 = svmul_x (p0, z0, d4))

/*
** mul_h4_bf16_x_untied:
**	mov	z0\.h, h4
**	bfmul	z0\.h, p0/m, z0\.h, z1\.h
**	ret
*/
TEST_UNIFORM_ZD (mul_h4_bf16_x_untied, svbfloat16_t, __bf16,
		 z0 = svmul_n_bf16_x (p0, z1, d4),
		 z0 = svmul_x (p0, z1, d4))

/*
** mul_1_bf16_x_tied1:
**	fmov	(z[0-9]+\.h), #1\.875(?:e\+0)?
**	bfmul	z0\.h, p0/m, z0\.h, \1
**	ret
*/
TEST_UNIFORM_Z (mul_1_bf16_x_tied1, svbfloat16_t,
		z0 = svmul_n_bf16_x (p0, z0, 1),
		z0 = svmul_x (p0, z0, 1))

/*
** mul_1_bf16_x_untied:
**	fmov	z0\.h, #1\.875(?:e\+0)?
**	bfmul	z0\.h, p0/m, z0\.h, z1\.h
**	ret
*/
TEST_UNIFORM_Z (mul_1_bf16_x_untied, svbfloat16_t,
		z0 = svmul_n_bf16_x (p0, z1, 1),
		z0 = svmul_x (p0, z1, 1))

/*
** ptrue_mul_bf16_x_tied1:
**	bfmul	z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h)
**	ret
*/
TEST_UNIFORM_Z (ptrue_mul_bf16_x_tied1, svbfloat16_t,
		z0 = svmul_bf16_x (svptrue_b16 (), z0, z1),
		z0 = svmul_x (svptrue_b16 (), z0, z1))

/*
** ptrue_mul_bf16_x_tied2:
**	bfmul	z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h)
**	ret
*/
TEST_UNIFORM_Z (ptrue_mul_bf16_x_tied2, svbfloat16_t,
		z0 = svmul_bf16_x (svptrue_b16 (), z1, z0),
		z0 = svmul_x (svptrue_b16 (), z1, z0))

/*
** ptrue_mul_bf16_x_untied:
**	bfmul	z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h)
**	ret
*/
TEST_UNIFORM_Z (ptrue_mul_bf16_x_untied, svbfloat16_t,
		z0 = svmul_bf16_x (svptrue_b16 (), z1, z2),
		z0 = svmul_x (svptrue_b16 (), z1, z2))

/*
** ptrue_mul_1_bf16_x_tied1:
**	fmov	(z[0-9]+\.h), #1\.875(?:e\+0)?
**	bfmul	z0\.h, (z0\.h, \1|\1, z0\.h)
**	ret
*/
TEST_UNIFORM_Z (ptrue_mul_1_bf16_x_tied1, svbfloat16_t,
		z0 = svmul_n_bf16_x (svptrue_b16 (), z0, 1),
		z0 = svmul_x (svptrue_b16 (), z0, 1))

/*
** ptrue_mul_1_bf16_x_untied:
**	fmov	(z[0-9]+\.h), #1\.875(?:e\+0)?
**	bfmul	z0\.h, (z1\.h, \1|\1, z1\.h)
**	ret
*/
TEST_UNIFORM_Z (ptrue_mul_1_bf16_x_untied, svbfloat16_t,
		z0 = svmul_n_bf16_x (svptrue_b16 (), z1, 1),
		z0 = svmul_x (svptrue_b16 (), z1, 1))
