[Mesa-dev] [PATCH 36/38] ac/nir: generate better code for nir_op_f2f16_rtz

Rhys Perry pendingchaos02 at gmail.com
Fri Dec 7 17:22:29 UTC 2018


Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
 src/amd/common/ac_nir_to_llvm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index b4418af50a..92b773981b 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -889,7 +889,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 			src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
 		LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
 		result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
-		result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+		// generates better code than an extractelement with slp vectorization
+		result = LLVMBuildBitCast(ctx->ac.builder, result, ctx->ac.i32, "");
+		result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i16, "");
 		break;
 	case nir_op_f2f16_rtne:
 	case nir_op_f2f16:
-- 
2.19.2



More information about the mesa-dev mailing list