[Beignet] [PATCH] GBE: Optimize byte gather read using untyped read.

Zhigang Gong zhigang.gong at linux.intel.com
Tue Apr 22 22:43:22 PDT 2014


LGTM, will push latter, thanks.

On Wed, Apr 23, 2014 at 10:56:50AM +0800, Ruiling Song wrote:
> Untyped read seems better than byte gather read.
> Some performance test in opencv got doubled after the patch.
> 
> Signed-off-by: Ruiling Song <ruiling.song at intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp |   25 ++++++++++++++-----------
>  1 file changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index bcbf115..8c7ac09 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -2594,19 +2594,22 @@ namespace gbe
>        } else {
>          GBE_ASSERT(insn.getValueNum() == 1);
>          const GenRegister value = sel.selReg(insn.getValue(0));
> -        // We need a temporary register if we read bytes or words
> -        Register dst = Register(value.value.reg);
> -        if (elemSize == GEN_BYTE_SCATTER_WORD ||
> -            elemSize == GEN_BYTE_SCATTER_BYTE) {
> -          dst = sel.reg(FAMILY_DWORD);
> -          sel.BYTE_GATHER(GenRegister::fxgrf(simdWidth, dst), address, elemSize, bti);
> -        }
> -
> -        // Repack bytes or words using a converting mov instruction
> +        GBE_ASSERT(elemSize == GEN_BYTE_SCATTER_WORD || elemSize == GEN_BYTE_SCATTER_BYTE);
> +
> +        Register tmpReg = sel.reg(FAMILY_DWORD);
> +        GenRegister tmpAddr = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
> +        GenRegister tmpData = GenRegister::udxgrf(simdWidth, tmpReg);
> +        // Get dword aligned addr
> +        sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc));
> +        sel.UNTYPED_READ(tmpAddr, &tmpData, 1, bti);
> +        // Get the remaining offset from aligned addr
> +        sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
> +        sel.SHL(tmpAddr, tmpAddr, GenRegister::immud(0x3));
> +        sel.SHR(tmpData, tmpData, tmpAddr);
>          if (elemSize == GEN_BYTE_SCATTER_WORD)
> -          sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(dst));
> +          sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(tmpReg));
>          else if (elemSize == GEN_BYTE_SCATTER_BYTE)
> -          sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
> +          sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(tmpReg));
>        }
>      }
>  
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list