[Mesa-dev] [PATCH 2/2] i965/hsw: Set MOCS for surfaces

Chad Versace chad.versace at linux.intel.com
Thu May 9 16:48:31 PDT 2013


The drivers was setting MOCS (Memory Object Control State) to 0 for all
objects. This patch sets it as following:
    renderbuffer, depthbuffer => LLC uncacheable, L3 cacheable
    texture, stencil, hiz => LLC cacheable, L3 cacheable

The goal here is to avoid blowing out the LLC with too-large buffers.

Performance gains:
    Haswell Harris Beach GT3
    Android 4.2.2
    kernel based on 3.8-4fc7c97

    GLBenchmark 2.5.1 Egypt HD C24Z16 Offscreen DXT1
	+32.0309% +/- 0.775397%,  n = 5, 95% confidence

    GLBenchmark 2.7 T-Rex HD C24Z16 Offscreen Fixed timestep ETC1
	+20.2435% +/- 0.821163%,  n = 5, 95% confidence

Tested-by: Matt Turner <mattst88 at gmail.com>
Signed-off-by: Chad Versace <chad.versace at linux.intel.com>
---

This branch lives on my 'mocs' branch.


 src/mesa/drivers/dri/i965/brw_context.c           | 31 +++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_context.h           | 13 ++++++++++
 src/mesa/drivers/dri/i965/gen7_blorp.cpp          | 14 +++++++---
 src/mesa/drivers/dri/i965/gen7_misc_state.c       | 16 ++++++++++--
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |  7 +++--
 5 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 4650553..edcf59d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -418,3 +418,34 @@ brwCreateContext(int api,
    return true;
 }
 
+/**
+ * Get the region's Memory Object Control State.
+ */
+uint32_t
+brw_get_mocs(struct brw_context *brw,
+             struct intel_region *region,
+             enum brw_mocs_usage usage)
+{
+   struct intel_context *intel = &brw->intel;
+   uint32_t mocs = 0;
+
+   if (intel->is_haswell) {
+      /* This heuristic is dumb: it considers the buffer's usage, but not its
+       * size. A more intelligent heuristic may give us better performance.
+       */
+      switch (usage) {
+      case BRW_MOCS_USAGE_RB_SURFACE:
+      case BRW_MOCS_USAGE_DEPTH:
+         /* These surfaces are usually so large they blow out the LLC. */
+         mocs = HSW_MOCS_LCC_UNCACHEABLE | HSW_MOCS_L3_CACHEABLE;
+         break;
+      case BRW_MOCS_USAGE_TEX_SURFACE:
+      case BRW_MOCS_USAGE_HIZ:
+      case BRW_MOCS_USAGE_STENCIL:
+         mocs = HSW_MOCS_LCC_WB_TO_ALL | HSW_MOCS_L3_CACHEABLE;
+         break;
+      }
+   }
+
+   return mocs;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c682501..172d36d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1371,6 +1371,19 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
                             uint32_t width, uint32_t height,
                             uint32_t tile_x, uint32_t tile_y);
 
+enum brw_mocs_usage {
+   BRW_MOCS_USAGE_RB_SURFACE,
+   BRW_MOCS_USAGE_TEX_SURFACE,
+   BRW_MOCS_USAGE_DEPTH,
+   BRW_MOCS_USAGE_STENCIL,
+   BRW_MOCS_USAGE_HIZ,
+};
+
+uint32_t
+brw_get_mocs(struct brw_context *brw,
+             struct intel_region *region,
+             enum brw_mocs_usage usage);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 1c23866..48e98dc 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -151,6 +151,9 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
     */
    struct intel_region *region = surface->mt->region;
    uint32_t tile_x, tile_y;
+   uint32_t mocs = brw_get_mocs(brw, region, is_render_target
+                                                ? BRW_MOCS_USAGE_RB_SURFACE
+                                                : BRW_MOCS_USAGE_TEX_SURFACE);
 
    uint32_t tiling = surface->map_stencil_as_y_tiled
       ? I915_TILING_Y : region->tiling;
@@ -183,7 +186,8 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
    assert(tile_x % 4 == 0);
    assert(tile_y % 2 == 0);
    surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) |
-             SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET);
+             SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) |
+             SET_FIELD(mocs, GEN7_SURFACE_MOCS);
 
    surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
              SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
@@ -598,6 +602,8 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
          intel_region_get_aligned_offset(params->depth.mt->region,
                                          draw_x & ~tile_mask_x,
                                          draw_y & ~tile_mask_y, false);
+      uint32_t depth_mocs = brw_get_mocs(brw, params->depth.mt->region,
+                                         BRW_MOCS_USAGE_DEPTH);
 
       /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
        * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
@@ -634,7 +640,7 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
                 offset);
       OUT_BATCH((params->depth.width + tile_x - 1) << 4 |
                 (params->depth.height + tile_y - 1) << 18);
-      OUT_BATCH(0);
+      OUT_BATCH(depth_mocs);
       OUT_BATCH(tile_x |
                 tile_y << 16);
       OUT_BATCH(0);
@@ -648,10 +654,12 @@ gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
          intel_region_get_aligned_offset(hiz_region,
                                          draw_x & ~tile_mask_x,
                                          (draw_y & ~tile_mask_y) / 2, false);
+      uint32_t hiz_mocs = brw_get_mocs(brw, hiz_region, BRW_MOCS_USAGE_HIZ);
 
       BEGIN_BATCH(3);
       OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
-      OUT_BATCH(hiz_region->pitch - 1);
+      OUT_BATCH((hiz_mocs << 25) |
+                (hiz_region->pitch - 1));
       OUT_RELOC(hiz_region->bo,
                 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                 hiz_offset);
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index 12b752c..7e358ae 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -41,6 +41,11 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
 {
    struct intel_context *intel = &brw->intel;
    struct gl_context *ctx = &intel->ctx;
+   uint32_t depth_mocs = 0;
+
+   if (depth_mt) {
+      depth_mocs = brw_get_mocs(brw, depth_mt->region, BRW_MOCS_USAGE_DEPTH);
+   }
 
    intel_emit_depth_stall_flushes(intel);
 
@@ -64,7 +69,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
 
    OUT_BATCH(((width + tile_x - 1) << 4) |
              ((height + tile_y - 1) << 18));
-   OUT_BATCH(0);
+   OUT_BATCH(depth_mocs);
    OUT_BATCH(tile_x | (tile_y << 16));
    OUT_BATCH(0);
    ADVANCE_BATCH();
@@ -77,9 +82,13 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
       ADVANCE_BATCH();
    } else {
       struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
+      uint32_t hiz_mocs = brw_get_mocs(brw, hiz_mt->region,
+                                       BRW_MOCS_USAGE_HIZ);
+
       BEGIN_BATCH(3);
       OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
-      OUT_BATCH(hiz_mt->region->pitch - 1);
+      OUT_BATCH((hiz_mocs << 25) |
+                (hiz_mt->region->pitch - 1));
       OUT_RELOC(hiz_mt->region->bo,
                 I915_GEM_DOMAIN_RENDER,
                 I915_GEM_DOMAIN_RENDER,
@@ -95,6 +104,8 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
       ADVANCE_BATCH();
    } else {
       const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0;
+      uint32_t stencil_mocs = brw_get_mocs(brw, stencil_mt->region,
+                                           BRW_MOCS_USAGE_STENCIL);
 
       BEGIN_BATCH(3);
       OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
@@ -113,6 +124,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
        * indicate that it does.
        */
       OUT_BATCH(enabled |
+                (stencil_mocs << 25) |
 	        (2 * stencil_mt->region->pitch - 1));
       OUT_RELOC(stencil_mt->region->bo,
 	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 435f9dc..34d5e68 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -294,6 +294,7 @@ gen7_update_texture_surface(struct gl_context *ctx,
    struct intel_mipmap_tree *mt = intelObj->mt;
    struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
    struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
+   uint32_t mocs = brw_get_mocs(brw, mt->region, BRW_MOCS_USAGE_TEX_SURFACE);
    int width, height, depth;
    uint32_t tile_x, tile_y;
 
@@ -347,6 +348,7 @@ gen7_update_texture_surface(struct gl_context *ctx,
     */
    surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
               (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
+              SET_FIELD(mocs, GEN7_SURFACE_MOCS) |
               /* mip count */
               (intelObj->_MaxLevel - tObj->BaseLevel));
 
@@ -532,7 +534,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
    uint32_t format;
    /* _NEW_BUFFERS */
    gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
-
+   uint32_t mocs = brw_get_mocs(brw, region, BRW_MOCS_USAGE_RB_SURFACE);
    uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
                                     8 * 4, 32, &brw->wm.surf_offset[unit]);
    memset(surf, 0, 8 * 4);
@@ -569,7 +571,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
    assert(tile_x % 4 == 0);
    assert(tile_y % 2 == 0);
    surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) |
-             SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET);
+             SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) |
+             SET_FIELD(mocs, GEN7_SURFACE_MOCS);
 
    surf[2] = SET_FIELD(rb->Width - 1, GEN7_SURFACE_WIDTH) |
              SET_FIELD(rb->Height - 1, GEN7_SURFACE_HEIGHT);
-- 
1.8.1.4



More information about the mesa-dev mailing list