[Intel-gfx] [PATCH] mesa/intel: support tiled textures on pre-965

Sun Aug 9 09:15:29 CEST 2009

On Tue, 2009-07-14 at 13:55 -0700, Jesse Barnes wrote:
> This patch adds support for tiled textures on pre-965 chips.  It uses
> the new libdrm tiled allocation function and the new fence register
> relocation type to specify buffer properties and rendering requirements.
> 
> This one currently causes some rendering errors, but is otherwise
> stable.  Feedback appreciated.  I also noticed one other place we might
> use tiled objects, intel_bufferobj_alloc_buffer(), but didn't make the
> conversion here.
> 
> Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
> 
> diff --git a/src/mesa/drivers/dri/i915/i915_metaops.c b/src/mesa/drivers/dri/i915/i915_metaops.c
> index 90a78c6..dae292f 100644
> --- a/src/mesa/drivers/dri/i915/i915_metaops.c
> +++ b/src/mesa/drivers/dri/i915/i915_metaops.c
> @@ -334,6 +334,7 @@ meta_tex_rect_source(struct intel_context *intel,
>     GLuint *state = i915->meta.Tex[0];
>     GLuint textureFormat;
>     GLuint cpp;
> +   uint32_t tiling, swizzle;
>  
>     /* A full implementation of this would do the upload through
>      * glTexImage2d, and get all the conversion operations at that
> @@ -398,10 +399,16 @@ meta_tex_rect_source(struct intel_context *intel,
>  /*    intel_region_reference(&i915->meta.tex_region[0], region); */
>     i915->meta.tex_buffer[0] = buffer;
>     i915->meta.tex_offset[0] = offset;
> +   drm_intel_bo_get_tiling(buffer, &tiling, &swizzle);
>  
>     state[I915_TEXREG_MS3] = (((height - 1) << MS3_HEIGHT_SHIFT) |
>                               ((pitch - 1) << MS3_WIDTH_SHIFT) |
> -                             textureFormat | MS3_USE_FENCE_REGS);
> +                             textureFormat);
> +   if (tiling != I915_TILING_NONE) {
> +      state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE;
> +      if (tiling == I915_TILING_Y)
> +	      state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
> +   }
>  
>     state[I915_TEXREG_MS4] = (((((pitch * cpp) / 4) - 1) << MS4_PITCH_SHIFT) |
>                               MS4_CUBE_FACE_ENA_MASK |
> diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> index 0f87fc4..73843c7 100644
> --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
> @@ -284,6 +284,32 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
>     return GL_TRUE;
>  }
>  
> +GLboolean
> +intel_batchbuffer_emit_fenced_reloc(struct intel_batchbuffer *batch,
> +				    dri_bo *buffer,
> +				    uint32_t read_domains,
> +				    uint32_t write_domain,
> +				    uint32_t delta)
> +{
> +   int ret;
> +
> +   if (batch->ptr - batch->map > batch->buf->size)
> +    _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
> +		  batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
> +   ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map,
> +				       buffer, delta, read_domains,
> +				       write_domain);
> +
> +   /*
> +    * Using the old buffer offset, write in what the right data would be, in case
> +    * the buffer doesn't move and we can short-circuit the relocation processing
> +    * in the kernel
> +    */
> +   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
> +
> +   return GL_TRUE;
> +}
> +
>  void
>  intel_batchbuffer_data(struct intel_batchbuffer *batch,
>                         const void *data, GLuint bytes,
> diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
> index 51579df..baf5c05 100644
> --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
> +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
> @@ -95,6 +95,11 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
>  				       uint32_t read_domains,
>  				       uint32_t write_domain,
>  				       uint32_t offset);
> +GLboolean intel_batchbuffer_emit_fenced_reloc(struct intel_batchbuffer *batch,
> +					      dri_bo *buffer,
> +					      uint32_t read_domains,
> +					      uint32_t write_domain,
> +					      uint32_t offset);
>  
>  /* Inline functions - might actually be better off with these
>   * non-inlined.  Certainly better off switching all command packets to
> @@ -162,6 +167,13 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
>  				read_domains, write_domain, delta);	\
>  } while (0)
>  
> +#define OUT_RELOC_FENCE(buf, read_domains, write_domain, delta) do {	\
> +   assert((delta) >= 0);						\
> +   intel_batchbuffer_emit_fenced_reloc(intel->batch, buf,		\
> +				       read_domains, write_domain,	\
> +				       delta);				\
> +} while (0)
> +
>  #define ADVANCE_BATCH() do {						\
>     unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr;	\
>     assert(intel->batch->emit.start_ptr != NULL);			\
> diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
> index 2e95bd1..410a89d 100644
> --- a/src/mesa/drivers/dri/intel/intel_blit.c
> +++ b/src/mesa/drivers/dri/intel/intel_blit.c
> @@ -156,14 +156,20 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
>  	 OUT_BATCH((box.y1 << 16) | box.x1);
>  	 OUT_BATCH((box.y2 << 16) | box.x2);
>  
> -	 OUT_RELOC(dst->buffer,
> -		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> -		   0);
> +	 if (dst->tiling != I915_TILING_NONE)
> +		 OUT_RELOC_FENCE(dst->buffer,
> +				 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> +				 0);
> +	 else
> +		 OUT_RELOC(dst->buffer,
> +			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
> +
>  	 OUT_BATCH((src_y << 16) | src_x);
>  	 OUT_BATCH(src_pitch);
> -	 OUT_RELOC(src->buffer,
> -		   I915_GEM_DOMAIN_RENDER, 0,
> -		   0);
> +	 if (src->tiling != I915_TILING_NONE)
> +		 OUT_RELOC_FENCE(src->buffer, I915_GEM_DOMAIN_RENDER, 0, 0);
> +	 else
> +		 OUT_RELOC(src->buffer, I915_GEM_DOMAIN_RENDER, 0, 0);
>  	 ADVANCE_BATCH();
>        }
>  
> @@ -321,14 +327,23 @@ intelEmitCopyBlit(struct intel_context *intel,
>     OUT_BATCH(BR13 | (uint16_t)dst_pitch);
>     OUT_BATCH((dst_y << 16) | dst_x);
>     OUT_BATCH((dst_y2 << 16) | dst_x2);
> -   OUT_RELOC(dst_buffer,
> -	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> -	     dst_offset);
> +   if (dst_tiling != I915_TILING_NONE)
> +	   OUT_RELOC_FENCE(dst_buffer,
> +			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> +			   dst_offset);
> +   else
> +	   OUT_RELOC(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> +		     dst_offset);
> +
>     OUT_BATCH((src_y << 16) | src_x);
>     OUT_BATCH((uint16_t)src_pitch);
> -   OUT_RELOC(src_buffer,
> -	     I915_GEM_DOMAIN_RENDER, 0,
> -	     src_offset);
> +   if (src_tiling != I915_TILING_NONE)
> +	   OUT_RELOC_FENCE(src_buffer,
> +			   I915_GEM_DOMAIN_RENDER, 0,
> +			   src_offset);
> +   else
> +	   OUT_RELOC(src_buffer, I915_GEM_DOMAIN_RENDER, 0, src_offset);
> +
>     ADVANCE_BATCH();
>  
>     intel_batchbuffer_emit_mi_flush(intel->batch);
> @@ -532,9 +547,16 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
>                 OUT_BATCH(BR13);
>                 OUT_BATCH((b.y1 << 16) | b.x1);
>                 OUT_BATCH((b.y2 << 16) | b.x2);
> -               OUT_RELOC(write_buffer,
> -			 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> -                         irb->region->draw_offset);
> + 	       if (irb->region->tiling != I915_TILING_NONE)
> +		       OUT_RELOC_FENCE(write_buffer,
> +				       I915_GEM_DOMAIN_RENDER,
> +				       I915_GEM_DOMAIN_RENDER,
> +				       irb->region->draw_offset);
> +	       else
> +		       OUT_RELOC(write_buffer, I915_GEM_DOMAIN_RENDER,
> +				 I915_GEM_DOMAIN_RENDER,
> +				 irb->region->draw_offset);
> +
>                 OUT_BATCH(clearVal);
>                 ADVANCE_BATCH();
>                 clearMask &= ~bufBit;    /* turn off bit, for faster loop exit */
> @@ -612,9 +634,14 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
>     OUT_BATCH(br13);
>     OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
>     OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
> -   OUT_RELOC(dst_buffer,
> -	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> -	     dst_offset);
> +   if (dst_tiling != I915_TILING_NONE)
> +	   OUT_RELOC_FENCE(dst_buffer,
> +			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> +			   dst_offset);
> +   else
> +	   OUT_RELOC(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
> +		     dst_offset);
> +
>     OUT_BATCH(0); /* bg */
>     OUT_BATCH(fg_color); /* fg */
>     OUT_BATCH(0); /* pattern base addr */
> diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
> index 6668935..3b65d6a 100644
> --- a/src/mesa/drivers/dri/intel/intel_fbo.c
> +++ b/src/mesa/drivers/dri/intel/intel_fbo.c
> @@ -105,6 +105,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
>  {
>     struct intel_context *intel = intel_context(ctx);
>     struct intel_renderbuffer *irb = intel_renderbuffer(rb);
> +   uint32_t tiling_mode = I915_TILING_NONE;
>     GLboolean softwareBuffer = GL_FALSE;
>     int cpp;
>  
> @@ -217,9 +218,11 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
>        DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
>  	  height, pitch);
>  
> -      irb->region = intel_region_alloc(intel, I915_TILING_NONE,
> -				       cpp, width, height, pitch,
> -				       GL_TRUE);
> +      if (intel->use_texture_tiling)
> +	 tiling_mode = I915_TILING_X;
> +
> +      irb->region = intel_region_alloc(intel, tiling_mode, cpp, width, height,
> +				       pitch, GL_TRUE);
>        if (!irb->region)
>           return GL_FALSE;       /* out of memory? */

What if it's a format unsuited for X tiling (depth)?  We handle that in
the texture code, and it should happen here as well.  Applies to the
stuff below, too.

We should probably split these changes out and get them in on 965
anyway.

> diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> index c985da5..8abb7c2 100644
> --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
> @@ -161,12 +161,17 @@ intel_miptree_create_for_region(struct intel_context *intel,
>  				GLuint compress_byte)
>  {
>     struct intel_mipmap_tree *mt;
> +   uint32_t tiling = I915_TILING_NONE;
> +
> +   if (intel->use_texture_tiling && compress_byte == 0 &&
> +       intel->intelScreen->kernel_exec_fencing)
> +      tiling = I915_TILING_X;
>  
>     mt = intel_miptree_create_internal(intel, target, internal_format,
>  				      first_level, last_level,
>  				      region->width, region->height, 1,
>  				      region->cpp, compress_byte,
> -				      I915_TILING_NONE);
> +				      tiling);
>     if (!mt)
>        return mt;
>  #if 0
> diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
> index 7525cd9..0444f8e 100644
> --- a/src/mesa/drivers/dri/intel/intel_regions.c
> +++ b/src/mesa/drivers/dri/intel/intel_regions.c
> @@ -180,22 +180,26 @@ intel_region_alloc(struct intel_context *intel,
>  {
>     dri_bo *buffer;
>     struct intel_region *region;
> +   unsigned long flags = 0, stride;
> +   int ret;
>  
> -   if (expect_accelerated_upload) {
> -      buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region",
> -					     pitch * cpp * height, 64);
> -   } else {
> -      buffer = drm_intel_bo_alloc(intel->bufmgr, "region",
> -				  pitch * cpp * height, 64);
> -   }
> +   if (expect_accelerated_upload)
> +	   flags |= BO_ALLOC_FOR_RENDER;
> +
> +   buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region", width, height,
> +				     cpp, &tiling, &stride, flags);
> +   if (!buffer)
> +      return NULL;
>  
> -   region = intel_region_alloc_internal(intel, cpp, width, height,
> -					pitch, buffer);
> +   pitch = stride / cpp;
> +   region = intel_region_alloc_internal(intel, cpp, width, height, pitch,
> +					buffer);
>  
> -   if (tiling != I915_TILING_NONE) {
> -      assert(((pitch * cpp) & 127) == 0);
> -      drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp);
> -      drm_intel_bo_get_tiling(buffer, &region->tiling, &region->bit_6_swizzle);
> +   ret = drm_intel_bo_get_tiling(buffer, &region->tiling,
> +				 &region->bit_6_swizzle);
> +   if (ret != 0) {
> +      intel_region_release(&region);
> +      return NULL;
>     }
>  
>     return region;
> @@ -213,8 +217,8 @@ intel_region_alloc_for_handle(struct intel_context *intel,
>  
>     buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle);
>  
> -   region = intel_region_alloc_internal(intel, cpp,
> -					width, height, pitch, buffer);
> +   region = intel_region_alloc_internal(intel, cpp, width, height, pitch,
> +					buffer);
>     if (region == NULL)
>        return region;
>  
> @@ -433,6 +437,9 @@ void
>  intel_region_release_pbo(struct intel_context *intel,
>                           struct intel_region *region)
>  {
> +   uint32_t tiling = I915_TILING_X;
> +   unsigned long stride;
> +
>     _DBG("%s %p\n", __FUNCTION__, region);
>     assert(region->buffer == region->pbo->buffer);
>     region->pbo->region = NULL;
> @@ -440,9 +447,11 @@ intel_region_release_pbo(struct intel_context *intel,
>     dri_bo_unreference(region->buffer);
>     region->buffer = NULL;
>  
> -   region->buffer = dri_bo_alloc(intel->bufmgr, "region",
> -				 region->pitch * region->cpp * region->height,
> -				 64);
> +   region->buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region",
> +					     region->width, region->height,
> +					     region->cpp, &tiling, &stride, 0);
> +   dri_bo_get_tiling(region->buffer, &region->tiling, &region->bit_6_swizzle);
> +   region->pitch = stride / region->cpp;
>  }
>  
>  /* Break the COW tie to the pbo.  Both the pbo and the region end up
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
-- 
Eric Anholt
eric at anholt.net                         eric.anholt at intel.com

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 197 bytes
Desc: This is a digitally signed message part
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20090809/49fcf42d/attachment.sig>