[Mesa-dev] [PATCH 2/2] llvmpipe: Improve color buffer loads/stores alignment.

Fri Nov 30 11:56:29 PST 2012

Am 30.11.2012 19:51, schrieb Jose Fonseca:
> 
> 
> ----- Original Message -----
>> Am 30.11.2012 18:43, schrieb jfonseca at vmware.com:
>>> From: José Fonseca <jfonseca at vmware.com>
>>>
>>> Tell LLVM the exact alignment we can guarantee, based on the fs
>>> block
>>> dimensions, pixel format, and the alignment of the resource base
>>> pointer
>>> and stride.
>>> ---
>>>  src/gallium/drivers/llvmpipe/lp_state_fs.c |   31
>>>  +++++++++++++++++++---------
>>>  1 file changed, 21 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c
>>> b/src/gallium/drivers/llvmpipe/lp_state_fs.c
>>> index 2d2e346..6819d33 100644
>>> --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
>>> +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
>>> @@ -839,7 +839,8 @@ load_unswizzled_block(struct gallivm_state
>>> *gallivm,
>>>                        unsigned block_height,
>>>                        LLVMValueRef* dst,
>>>                        struct lp_type dst_type,
>>> -                      unsigned dst_count)
>>> +                      unsigned dst_count,
>>> +                      unsigned dst_alignment)
>>>  {
>>>     LLVMBuilderRef builder = gallivm->builder;
>>>     unsigned row_size = dst_count / block_height;
>>> @@ -866,9 +867,7 @@ load_unswizzled_block(struct gallivm_state
>>> *gallivm,
>>>  
>>>        dst[i] = LLVMBuildLoad(builder, dst_ptr, "");
>>>  
>>> -      if ((dst_type.length % 3) == 0) {
>>> -         lp_set_load_alignment(dst[i], dst_type.width / 8);
>>> -      }
>>> +      lp_set_load_alignment(dst[i], dst_alignment);
>>>     }
>>>  }
>>>  
>>> @@ -884,7 +883,8 @@ store_unswizzled_block(struct gallivm_state
>>> *gallivm,
>>>                         unsigned block_height,
>>>                         LLVMValueRef* src,
>>>                         struct lp_type src_type,
>>> -                       unsigned src_count)
>>> +                       unsigned src_count,
>>> +                       unsigned src_alignment)
>>>  {
>>>     LLVMBuilderRef builder = gallivm->builder;
>>>     unsigned row_size = src_count / block_height;
>>> @@ -893,6 +893,8 @@ store_unswizzled_block(struct gallivm_state
>>> *gallivm,
>>>     /* Ensure src exactly fits into block */
>>>     assert((block_width * block_height) % src_count == 0);
>>>  
>>> +
>>> +
>> extra whitespace.
>>
>>>     for (i = 0; i < src_count; ++i) {
>>>        unsigned x = i % row_size;
>>>        unsigned y = i / row_size;
>>> @@ -911,9 +913,7 @@ store_unswizzled_block(struct gallivm_state
>>> *gallivm,
>>>  
>>>        src_ptr = LLVMBuildStore(builder, src[i], src_ptr);
>>>  
>>> -      if ((src_type.length % 3) == 0) {
>>> -         lp_set_store_alignment(src_ptr, src_type.width / 8);
>>> -      }
>>> +      lp_set_store_alignment(src_ptr, src_alignment);
>>>     }
>>>  }
>>>  
>>> @@ -1333,6 +1333,8 @@ generate_unswizzled_blend(struct
>>> gallivm_state *gallivm,
>>>  
>>>     const struct util_format_description* out_format_desc =
>>>     util_format_description(out_format);
>>>  
>>> +   unsigned dst_alignment;
>>> +
>>>     bool pad_inline = is_arithmetic_format(out_format_desc);
>>>     bool has_alpha = false;
>>>  
>>> @@ -1340,6 +1342,13 @@ generate_unswizzled_blend(struct
>>> gallivm_state *gallivm,
>>>     mask_type = lp_int32_vec4_type();
>>>     mask_type.length = fs_type.length;
>>>  
>>> +   /* Compute the alignment of the destination pointer in bytes */
>>> +   dst_alignment = (block_width * out_format_desc->block.bits +
>>> 7)/(out_format_desc->block.width * 8);
>>> +   /* Force power-of-two alignment by extracting only the
>>> least-significant-bit */
>>> +   dst_alignment = 1 << (ffs(dst_alignment) - 1);
>>> +   /* Resource base and stride pointers are aligned to 16 bytes,
>>> so that's the maximum alignment we can guarantee */
>>> +   dst_alignment = MIN2(dst_alignment, 16);
>> I wonder if this is all necessary. block_width is a constant and pot,
>> and the formats are all pot-sized too, 
> 
> Formats are not necessarily pot-sized: R8G8B8, R16G16B16, etc.
> 
> For example, for R8G8B8, 4*3 = 12 bytes. So if we need to round this down to 4 bytes.
<slaps head>
Oops, yes. I somehow thought we only supported those which are power of two.

> 
>> so trying to align this to pot
>> won't really do much.
>> So unless this somehow should handle non-pot block_width (and I don't
>> think this makes sense) this should probably be simplified.
>>
>>> +
>>>     /* Do not bother executing code when mask is empty.. */
>>>     if (do_branch) {
>>>        check_mask = LLVMConstNull(lp_build_int_vec_type(gallivm,
>>>        mask_type));
>>> @@ -1616,7 +1625,8 @@ generate_unswizzled_blend(struct
>>> gallivm_state *gallivm,
>>>  
>>>     dst_type.length *= 16 / dst_count;
>>>  
>>> -   load_unswizzled_block(gallivm, color_ptr, stride, block_width,
>>> block_height, dst, dst_type, dst_count);
>>> +   load_unswizzled_block(gallivm, color_ptr, stride, block_width,
>>> block_height,
>>> +                         dst, dst_type, dst_count, dst_alignment);
>>>  
>>>  
>>>     /*
>>> @@ -1681,7 +1691,8 @@ generate_unswizzled_blend(struct
>>> gallivm_state *gallivm,
>>>     /*
>>>      * Store blend result to memory
>>>      */
>>> -   store_unswizzled_block(gallivm, color_ptr, stride, block_width,
>>> block_height, dst, dst_type, dst_count);
>>> +   store_unswizzled_block(gallivm, color_ptr, stride, block_width,
>>> block_height,
>>> +                          dst, dst_type, dst_count,
>>> dst_alignment);
>>>  
>>>     if (do_branch) {
>>>        lp_build_mask_end(&mask_ctx);
>>>
>>
>> Otherwise, this (and rest of series) looks good to me.
>>
> 
> Thanks for the review.
> 
> Jose
>