[Intel-gfx] [PATCH] mesa/intel: support tiled textures on pre-965

Jesse Barnes jbarnes at virtuousgeek.org
Tue Jul 14 22:55:03 CEST 2009


This patch adds support for tiled textures on pre-965 chips.  It uses
the new libdrm tiled allocation function and the new fence register
relocation type to specify buffer properties and rendering requirements.

This one currently causes some rendering errors, but is otherwise
stable.  Feedback appreciated.  I also noticed one other place we might
use tiled objects, intel_bufferobj_alloc_buffer(), but didn't make the
conversion here.

Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>

diff --git a/src/mesa/drivers/dri/i915/i915_metaops.c b/src/mesa/drivers/dri/i915/i915_metaops.c
index 90a78c6..dae292f 100644
--- a/src/mesa/drivers/dri/i915/i915_metaops.c
+++ b/src/mesa/drivers/dri/i915/i915_metaops.c
@@ -334,6 +334,7 @@ meta_tex_rect_source(struct intel_context *intel,
    GLuint *state = i915->meta.Tex[0];
    GLuint textureFormat;
    GLuint cpp;
+   uint32_t tiling, swizzle;
 
    /* A full implementation of this would do the upload through
     * glTexImage2d, and get all the conversion operations at that
@@ -398,10 +399,16 @@ meta_tex_rect_source(struct intel_context *intel,
 /*    intel_region_reference(&i915->meta.tex_region[0], region); */
    i915->meta.tex_buffer[0] = buffer;
    i915->meta.tex_offset[0] = offset;
+   drm_intel_bo_get_tiling(buffer, &tiling, &swizzle);
 
    state[I915_TEXREG_MS3] = (((height - 1) << MS3_HEIGHT_SHIFT) |
                              ((pitch - 1) << MS3_WIDTH_SHIFT) |
-                             textureFormat | MS3_USE_FENCE_REGS);
+                             textureFormat);
+   if (tiling != I915_TILING_NONE) {
+      state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE;
+      if (tiling == I915_TILING_Y)
+	      state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
+   }
 
    state[I915_TEXREG_MS4] = (((((pitch * cpp) / 4) - 1) << MS4_PITCH_SHIFT) |
                              MS4_CUBE_FACE_ENA_MASK |
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
index 0f87fc4..73843c7 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -284,6 +284,32 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
    return GL_TRUE;
 }
 
+GLboolean
+intel_batchbuffer_emit_fenced_reloc(struct intel_batchbuffer *batch,
+				    dri_bo *buffer,
+				    uint32_t read_domains,
+				    uint32_t write_domain,
+				    uint32_t delta)
+{
+   int ret;
+
+   if (batch->ptr - batch->map > batch->buf->size)
+    _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+		  batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map,
+				       buffer, delta, read_domains,
+				       write_domain);
+
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+   return GL_TRUE;
+}
+
 void
 intel_batchbuffer_data(struct intel_batchbuffer *batch,
                        const void *data, GLuint bytes,
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
index 51579df..baf5c05 100644
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -95,6 +95,11 @@ GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
 				       uint32_t read_domains,
 				       uint32_t write_domain,
 				       uint32_t offset);
+GLboolean intel_batchbuffer_emit_fenced_reloc(struct intel_batchbuffer *batch,
+					      dri_bo *buffer,
+					      uint32_t read_domains,
+					      uint32_t write_domain,
+					      uint32_t offset);
 
 /* Inline functions - might actually be better off with these
  * non-inlined.  Certainly better off switching all command packets to
@@ -162,6 +167,13 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
 				read_domains, write_domain, delta);	\
 } while (0)
 
+#define OUT_RELOC_FENCE(buf, read_domains, write_domain, delta) do {	\
+   assert((delta) >= 0);						\
+   intel_batchbuffer_emit_fenced_reloc(intel->batch, buf,		\
+				       read_domains, write_domain,	\
+				       delta);				\
+} while (0)
+
 #define ADVANCE_BATCH() do {						\
    unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr;	\
    assert(intel->batch->emit.start_ptr != NULL);			\
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
index 2e95bd1..410a89d 100644
--- a/src/mesa/drivers/dri/intel/intel_blit.c
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -156,14 +156,20 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
 	 OUT_BATCH((box.y1 << 16) | box.x1);
 	 OUT_BATCH((box.y2 << 16) | box.x2);
 
-	 OUT_RELOC(dst->buffer,
-		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		   0);
+	 if (dst->tiling != I915_TILING_NONE)
+		 OUT_RELOC_FENCE(dst->buffer,
+				 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+				 0);
+	 else
+		 OUT_RELOC(dst->buffer,
+			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+
 	 OUT_BATCH((src_y << 16) | src_x);
 	 OUT_BATCH(src_pitch);
-	 OUT_RELOC(src->buffer,
-		   I915_GEM_DOMAIN_RENDER, 0,
-		   0);
+	 if (src->tiling != I915_TILING_NONE)
+		 OUT_RELOC_FENCE(src->buffer, I915_GEM_DOMAIN_RENDER, 0, 0);
+	 else
+		 OUT_RELOC(src->buffer, I915_GEM_DOMAIN_RENDER, 0, 0);
 	 ADVANCE_BATCH();
       }
 
@@ -321,14 +327,23 @@ intelEmitCopyBlit(struct intel_context *intel,
    OUT_BATCH(BR13 | (uint16_t)dst_pitch);
    OUT_BATCH((dst_y << 16) | dst_x);
    OUT_BATCH((dst_y2 << 16) | dst_x2);
-   OUT_RELOC(dst_buffer,
-	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-	     dst_offset);
+   if (dst_tiling != I915_TILING_NONE)
+	   OUT_RELOC_FENCE(dst_buffer,
+			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+			   dst_offset);
+   else
+	   OUT_RELOC(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		     dst_offset);
+
    OUT_BATCH((src_y << 16) | src_x);
    OUT_BATCH((uint16_t)src_pitch);
-   OUT_RELOC(src_buffer,
-	     I915_GEM_DOMAIN_RENDER, 0,
-	     src_offset);
+   if (src_tiling != I915_TILING_NONE)
+	   OUT_RELOC_FENCE(src_buffer,
+			   I915_GEM_DOMAIN_RENDER, 0,
+			   src_offset);
+   else
+	   OUT_RELOC(src_buffer, I915_GEM_DOMAIN_RENDER, 0, src_offset);
+
    ADVANCE_BATCH();
 
    intel_batchbuffer_emit_mi_flush(intel->batch);
@@ -532,9 +547,16 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
                OUT_BATCH(BR13);
                OUT_BATCH((b.y1 << 16) | b.x1);
                OUT_BATCH((b.y2 << 16) | b.x2);
-               OUT_RELOC(write_buffer,
-			 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                         irb->region->draw_offset);
+ 	       if (irb->region->tiling != I915_TILING_NONE)
+		       OUT_RELOC_FENCE(write_buffer,
+				       I915_GEM_DOMAIN_RENDER,
+				       I915_GEM_DOMAIN_RENDER,
+				       irb->region->draw_offset);
+	       else
+		       OUT_RELOC(write_buffer, I915_GEM_DOMAIN_RENDER,
+				 I915_GEM_DOMAIN_RENDER,
+				 irb->region->draw_offset);
+
                OUT_BATCH(clearVal);
                ADVANCE_BATCH();
                clearMask &= ~bufBit;    /* turn off bit, for faster loop exit */
@@ -612,9 +634,14 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
    OUT_BATCH(br13);
    OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
    OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
-   OUT_RELOC(dst_buffer,
-	     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-	     dst_offset);
+   if (dst_tiling != I915_TILING_NONE)
+	   OUT_RELOC_FENCE(dst_buffer,
+			   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+			   dst_offset);
+   else
+	   OUT_RELOC(dst_buffer, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		     dst_offset);
+
    OUT_BATCH(0); /* bg */
    OUT_BATCH(fg_color); /* fg */
    OUT_BATCH(0); /* pattern base addr */
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 6668935..3b65d6a 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -105,6 +105,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
 {
    struct intel_context *intel = intel_context(ctx);
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   uint32_t tiling_mode = I915_TILING_NONE;
    GLboolean softwareBuffer = GL_FALSE;
    int cpp;
 
@@ -217,9 +218,11 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
       DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width,
 	  height, pitch);
 
-      irb->region = intel_region_alloc(intel, I915_TILING_NONE,
-				       cpp, width, height, pitch,
-				       GL_TRUE);
+      if (intel->use_texture_tiling)
+	 tiling_mode = I915_TILING_X;
+
+      irb->region = intel_region_alloc(intel, tiling_mode, cpp, width, height,
+				       pitch, GL_TRUE);
       if (!irb->region)
          return GL_FALSE;       /* out of memory? */
 
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index c985da5..8abb7c2 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -161,12 +161,17 @@ intel_miptree_create_for_region(struct intel_context *intel,
 				GLuint compress_byte)
 {
    struct intel_mipmap_tree *mt;
+   uint32_t tiling = I915_TILING_NONE;
+
+   if (intel->use_texture_tiling && compress_byte == 0 &&
+       intel->intelScreen->kernel_exec_fencing)
+      tiling = I915_TILING_X;
 
    mt = intel_miptree_create_internal(intel, target, internal_format,
 				      first_level, last_level,
 				      region->width, region->height, 1,
 				      region->cpp, compress_byte,
-				      I915_TILING_NONE);
+				      tiling);
    if (!mt)
       return mt;
 #if 0
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index 7525cd9..0444f8e 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -180,22 +180,26 @@ intel_region_alloc(struct intel_context *intel,
 {
    dri_bo *buffer;
    struct intel_region *region;
+   unsigned long flags = 0, stride;
+   int ret;
 
-   if (expect_accelerated_upload) {
-      buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region",
-					     pitch * cpp * height, 64);
-   } else {
-      buffer = drm_intel_bo_alloc(intel->bufmgr, "region",
-				  pitch * cpp * height, 64);
-   }
+   if (expect_accelerated_upload)
+	   flags |= BO_ALLOC_FOR_RENDER;
+
+   buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region", width, height,
+				     cpp, &tiling, &stride, flags);
+   if (!buffer)
+      return NULL;
 
-   region = intel_region_alloc_internal(intel, cpp, width, height,
-					pitch, buffer);
+   pitch = stride / cpp;
+   region = intel_region_alloc_internal(intel, cpp, width, height, pitch,
+					buffer);
 
-   if (tiling != I915_TILING_NONE) {
-      assert(((pitch * cpp) & 127) == 0);
-      drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp);
-      drm_intel_bo_get_tiling(buffer, &region->tiling, &region->bit_6_swizzle);
+   ret = drm_intel_bo_get_tiling(buffer, &region->tiling,
+				 &region->bit_6_swizzle);
+   if (ret != 0) {
+      intel_region_release(&region);
+      return NULL;
    }
 
    return region;
@@ -213,8 +217,8 @@ intel_region_alloc_for_handle(struct intel_context *intel,
 
    buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle);
 
-   region = intel_region_alloc_internal(intel, cpp,
-					width, height, pitch, buffer);
+   region = intel_region_alloc_internal(intel, cpp, width, height, pitch,
+					buffer);
    if (region == NULL)
       return region;
 
@@ -433,6 +437,9 @@ void
 intel_region_release_pbo(struct intel_context *intel,
                          struct intel_region *region)
 {
+   uint32_t tiling = I915_TILING_X;
+   unsigned long stride;
+
    _DBG("%s %p\n", __FUNCTION__, region);
    assert(region->buffer == region->pbo->buffer);
    region->pbo->region = NULL;
@@ -440,9 +447,11 @@ intel_region_release_pbo(struct intel_context *intel,
    dri_bo_unreference(region->buffer);
    region->buffer = NULL;
 
-   region->buffer = dri_bo_alloc(intel->bufmgr, "region",
-				 region->pitch * region->cpp * region->height,
-				 64);
+   region->buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region",
+					     region->width, region->height,
+					     region->cpp, &tiling, &stride, 0);
+   dri_bo_get_tiling(region->buffer, &region->tiling, &region->bit_6_swizzle);
+   region->pitch = stride / region->cpp;
 }
 
 /* Break the COW tie to the pbo.  Both the pbo and the region end up



More information about the Intel-gfx mailing list