[Mesa-dev] [PATCH 03/10] i965: Always use the pre-computed offset for the relocation entry

Chris Wilson chris at chris-wilson.co.uk
Fri Jul 21 15:36:45 UTC 2017


We must be careful to only compute the address once based on the
per-context information (rather than accessing the unlocked global
bo->offset64) so that the value in the batch does match the
reloc.presumed_offset we declare to the kernel. Otherwise, highly
unlikely, but we may see GPU hangs in multithreaded users.

The only real complication here is isl_surf_fill_state() which needs to
adjust the reloc.delta to both general a tile offset and to encode state
into the lower 12 bits.

Cc: Kenneth Graunke <kenneth at whitecape.org>
Cc: Matt Turner <mattst88 at gmail.com>
Cc: Jason Ekstrand <jason.ekstrand at intel.com>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 65 +++++++++++-------------
 src/mesa/drivers/dri/i965/genX_state_upload.c    | 13 +++--
 src/mesa/drivers/dri/i965/intel_batchbuffer.c    |  4 +-
 3 files changed, 38 insertions(+), 44 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 45ac106f3f..af7b07ace2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -163,12 +163,12 @@ brw_emit_surface_state(struct brw_context *brw,
          aux_surf = &mt->mcs_buf->surf;
 
          aux_bo = mt->mcs_buf->bo;
-         aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
+         aux_offset = mt->mcs_buf->offset;
       } else {
          aux_surf = &mt->hiz_buf->surf;
 
          aux_bo = mt->hiz_buf->bo;
-         aux_offset = mt->hiz_buf->bo->offset64;
+         aux_offset = 0;
       }
 
       /* We only really need a clear color if we also have an auxiliary
@@ -183,28 +183,29 @@ brw_emit_surface_state(struct brw_context *brw,
                                  surf_offset);
 
    isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
-                       .address = mt->bo->offset64 + offset,
+                       .address = brw_emit_reloc(&brw->batch,
+                                                 *surf_offset + brw->isl_dev.ss.addr_offset,
+                                                 mt->bo, offset, read_domains, write_domains),
                        .aux_surf = aux_surf, .aux_usage = aux_usage,
                        .aux_address = aux_offset,
                        .mocs = mocs, .clear_color = clear_color,
                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
-
-   brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
-                  mt->bo, offset, read_domains, write_domains);
-
    if (aux_surf) {
       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
        * contain other control information.  Since buffer addresses are always
        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
        * an ordinary reloc to do the necessary address translation.
+       *
+       * FIXME: move to the point of assignment.
        */
       assert((aux_offset & 0xfff) == 0);
       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
-      brw_emit_reloc(&brw->batch,
-                     *surf_offset + brw->isl_dev.ss.aux_addr_offset,
-                     aux_bo, *aux_addr - aux_bo->offset64,
-                     read_domains, write_domains);
+      *aux_addr = brw_emit_reloc(&brw->batch,
+                                 *surf_offset +
+                                 brw->isl_dev.ss.aux_addr_offset,
+                                 aux_bo, *aux_addr, /* XXX read from WC */
+                                 read_domains, write_domains);
    }
 }
 
@@ -683,18 +684,16 @@ brw_emit_buffer_surface_state(struct brw_context *brw,
                                   out_offset);
 
    isl_buffer_fill_state(&brw->isl_dev, dw,
-                         .address = (bo ? bo->offset64 : 0) + buffer_offset,
+                         .address = !bo ? buffer_offset :
+                                    brw_emit_reloc(&brw->batch,
+                                                   *out_offset + brw->isl_dev.ss.addr_offset,
+                                                   bo, buffer_offset,
+                                                   I915_GEM_DOMAIN_SAMPLER,
+                                                   (rw ? I915_GEM_DOMAIN_SAMPLER : 0)),
                          .size = buffer_size,
                          .format = surface_format,
                          .stride = pitch,
                          .mocs = tex_mocs[brw->gen]);
-
-   if (bo) {
-      brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
-                     bo, buffer_offset,
-                     I915_GEM_DOMAIN_SAMPLER,
-                     (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
-   }
 }
 
 void
@@ -857,17 +856,15 @@ brw_update_sol_surface(struct brw_context *brw,
       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
       surface_format << BRW_SURFACE_FORMAT_SHIFT |
       BRW_SURFACE_RC_READ_WRITE;
-   surf[1] = bo->offset64 + offset_bytes; /* reloc */
+   surf[1] = brw_emit_reloc(&brw->batch,
+                            *out_offset + 4, bo, offset_bytes,
+                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 	      height << BRW_SURFACE_HEIGHT_SHIFT);
    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
    surf[4] = 0;
    surf[5] = 0;
-
-   /* Emit relocation to surface contents. */
-   brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
-                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 }
 
 /* Creates a new WM constant buffer reflecting the current fragment program's
@@ -975,7 +972,9 @@ brw_emit_null_surface_state(struct brw_context *brw,
 		  1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 		  1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
    }
-   surf[1] = bo ? bo->offset64 : 0;
+   surf[1] = !bo ? 0 :
+             brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
+                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 
@@ -988,11 +987,6 @@ brw_emit_null_surface_state(struct brw_context *brw,
               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
    surf[4] = multisampling_state;
    surf[5] = 0;
-
-   if (bo) {
-      brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
-                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-   }
 }
 
 /**
@@ -1049,8 +1043,12 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
 
    /* reloc */
    assert(mt->offset % mt->cpp == 0);
-   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
-	      mt->bo->offset64 + mt->offset);
+   surf[1] = brw_emit_reloc(&brw->batch, offset + 4, mt->bo,
+                            mt->offset +
+                            intel_renderbuffer_get_tile_offsets(irb,
+                                                                &tile_x,
+                                                                &tile_y),
+                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 
    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 	      (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
@@ -1092,9 +1090,6 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
       }
    }
 
-   brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
-                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-
    return offset;
 }
 
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index ef04603df8..71570018a1 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -5053,14 +5053,13 @@ genX(update_sampler_state)(struct brw_context *brw,
                                  texObj->StencilSampling,
                                  &border_color_offset);
    }
-
-   samp_st.BorderColorPointer = border_color_offset;
-
    if (GEN_GEN < 6) {
-      samp_st.BorderColorPointer += brw->batch.bo->offset64; /* reloc */
-      brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
-                     brw->batch.bo, border_color_offset,
-                     I915_GEM_DOMAIN_SAMPLER, 0);
+      samp_st.BorderColorPointer =
+         brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
+                        brw->batch.bo, border_color_offset,
+                        I915_GEM_DOMAIN_SAMPLER, 0);
+   } else {
+      samp_st.BorderColorPointer = border_color_offset;
    }
 
 #if GEN_GEN >= 8
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 140678f88c..9364dab7f2 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -736,7 +736,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
                struct brw_bo *target, uint32_t target_offset,
                uint32_t read_domains, uint32_t write_domain)
 {
-   uint64_t offset64;
+   assert(target);
 
    if (batch->reloc_count == batch->reloc_array_size) {
       batch->reloc_array_size *= 2;
@@ -758,7 +758,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
    batch->reloc_count++;
 
    /* ensure gcc doesn't reload */
-   offset64 = *((volatile uint64_t *)&target->offset64);
+   uint64_t offset64 = *((volatile uint64_t *)&target->offset64);
    reloc->offset = batch_offset;
    reloc->delta = target_offset;
    reloc->target_handle = target->gem_handle;
-- 
2.13.3



More information about the mesa-dev mailing list