[Mesa-dev] [PATCH 2/2] i965: Always use the pre-computed offset for the relocation entry

Chris Wilson chris at chris-wilson.co.uk
Fri Jul 14 08:26:04 UTC 2017


We must be careful to only compute the address once based on the
per-context information (rather than accessing the unlocked global
bo->offset64) so that the value in the batch does match the
reloc.presumed_offset we declare to the kernel. Otherwise, highly
unlikely, but we may see GPU hangs in multithreaded users.

The only real complication here is isl_surf_fill_state() which needs to
adjust the reloc.delta to both general a tile offset and to encode state
into the lower 12 bits. In a few of the gen4/5 emitters, I had to
rearrange the code slightly in order to have the correct state ready
prior to emitting the relocation, these should hopefully continue to
disappear as they are converted to genxml routines.

Cc: Kenneth Graunke <kenneth at whitecape.org>
Cc: Matt Turner <mattst88 at gmail.com>
Cc: Jason Ekstrand <jason.ekstrand at intel.com>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
---
 src/mesa/drivers/dri/i965/brw_clip_state.c       | 13 ++---
 src/mesa/drivers/dri/i965/brw_sf_state.c         | 36 +++++++-------
 src/mesa/drivers/dri/i965/brw_wm_state.c         | 55 +++++++++-------------
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 60 ++++++++++--------------
 src/mesa/drivers/dri/i965/genX_blorp_exec.c      |  8 ++--
 src/mesa/drivers/dri/i965/genX_state_upload.c    | 13 +++--
 src/mesa/drivers/dri/i965/intel_batchbuffer.h    |  6 +--
 7 files changed, 84 insertions(+), 107 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
index 8f22c0ff67..2ff2b4a51a 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_state.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -101,14 +101,11 @@ brw_upload_clip_unit(struct brw_context *brw)
    /* enable guardband clipping if we can */
    clip->clip5.guard_band_enable = 1;
    clip->clip6.clipper_viewport_state_ptr =
-      (brw->batch.bo->offset64 + brw->clip.vp_offset) >> 5;
-
-   /* emit clip viewport relocation */
-   brw_emit_reloc(&brw->batch,
-                  (brw->clip.state_offset +
-                   offsetof(struct brw_clip_unit_state, clip6)),
-                  brw->batch.bo, brw->clip.vp_offset,
-                  I915_GEM_DOMAIN_INSTRUCTION, 0);
+      brw_emit_reloc(&brw->batch,
+                     (brw->clip.state_offset +
+                      offsetof(struct brw_clip_unit_state, clip6)),
+                     brw->batch.bo, brw->clip.vp_offset,
+                     I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
 
    /* _NEW_TRANSFORM */
    if (!ctx->Transform.DepthClamp)
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
index c739201f7b..6a9653545f 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_state.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -86,24 +86,35 @@ static void upload_sf_unit( struct brw_context *brw )
 				  brw->urb.nr_sf_entries) - 1;
 
    /* BRW_NEW_SF_VP */
-   sf->sf5.sf_viewport_state_offset = (brw->batch.bo->offset64 +
-				       brw->sf.vp_offset) >> 5; /* reloc */
-
-   sf->sf5.viewport_transform = 1;
-
+   int viewport_transform = 1;
    sf->sf6.scissor = 1;
 
    /* _NEW_POLYGON */
+   int front_winding;
    if (brw->polygon_front_bit)
-      sf->sf5.front_winding = BRW_FRONTWINDING_CW;
+      front_winding = BRW_FRONTWINDING_CW;
    else
-      sf->sf5.front_winding = BRW_FRONTWINDING_CCW;
+      front_winding = BRW_FRONTWINDING_CCW;
 
    /* _NEW_BUFFERS
     * The viewport is inverted for rendering to a FBO, and that inverts
     * polygon front/back orientation.
     */
-   sf->sf5.front_winding ^= render_to_fbo;
+   front_winding ^= render_to_fbo;
+
+   /* Emit SF viewport relocation */
+   sf->sf5.sf_viewport_state_offset =
+      brw_emit_reloc(&brw->batch,
+                     brw->sf.state_offset +
+                     offsetof(struct brw_sf_unit_state, sf5),
+                     brw->batch.bo,
+                     brw->sf.vp_offset |
+                     front_winding |
+                     (viewport_transform << 1),
+                     I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
+
+   sf->sf5.viewport_transform = viewport_transform;
+   sf->sf5.front_winding = front_winding;
 
    /* _NEW_POLYGON */
    switch (ctx->Polygon.CullFlag ? ctx->Polygon.CullFaceMode : GL_NONE) {
@@ -168,15 +179,6 @@ static void upload_sf_unit( struct brw_context *brw )
     * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
     */
 
-   /* Emit SF viewport relocation */
-   brw_emit_reloc(&brw->batch,
-                  brw->sf.state_offset +
-		  offsetof(struct brw_sf_unit_state, sf5),
-                  brw->batch.bo,
-                  brw->sf.vp_offset | sf->sf5.front_winding |
-                  (sf->sf5.viewport_transform << 1),
-                  I915_GEM_DOMAIN_INSTRUCTION, 0);
-
    brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 69bbeb26d4..551d02a961 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -132,10 +132,15 @@ brw_upload_wm_unit(struct brw_context *brw)
       prog_data->base.binding_table.size_bytes / 4;
 
    if (prog_data->base.total_scratch != 0) {
+      int per_thread_scratch_space = ffs(brw->wm.base.per_thread_scratch) - 11;
       wm->thread2.scratch_space_base_pointer =
-	 brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */
-      wm->thread2.per_thread_scratch_space =
-	 ffs(brw->wm.base.per_thread_scratch) - 11;
+         brw_emit_reloc(&brw->batch,
+                        brw->wm.base.state_offset +
+                        offsetof(struct brw_wm_unit_state, thread2),
+                        brw->wm.base.scratch_bo,
+                        per_thread_scratch_space,
+                        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER) >> 10;
+      wm->thread2.per_thread_scratch_space = per_thread_scratch_space;
    } else {
       wm->thread2.scratch_space_base_pointer = 0;
       wm->thread2.per_thread_scratch_space = 0;
@@ -151,20 +156,31 @@ brw_upload_wm_unit(struct brw_context *brw)
    /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
    wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2;
 
+   int sampler_count;
    if (brw->gen == 5)
-      wm->wm4.sampler_count = 0; /* hardware requirement */
+      sampler_count = 0; /* hardware requirement */
    else {
-      wm->wm4.sampler_count = (brw->wm.base.sampler_count + 1) / 4;
+      sampler_count = (brw->wm.base.sampler_count + 1) / 4;
    }
 
    if (brw->wm.base.sampler_count) {
       /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */
-      wm->wm4.sampler_state_pointer = (brw->batch.bo->offset64 +
-				       brw->wm.base.sampler_offset) >> 5;
+      wm->wm4.sampler_state_pointer =
+         brw_emit_reloc(&brw->batch,
+                        brw->wm.base.state_offset +
+                        offsetof(struct brw_wm_unit_state, wm4),
+                        brw->batch.bo,
+                        brw->wm.base.sampler_offset |
+                        brw->stats_wm | /* BRW_NEW_STATS_WM */
+                        (sampler_count << 2),
+                        I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5;
    } else {
       wm->wm4.sampler_state_pointer = 0;
    }
 
+   wm->wm4.stats_enable = brw->stats_wm;
+   wm->wm4.sampler_count = sampler_count;
+
    /* BRW_NEW_FRAGMENT_PROGRAM */
    wm->wm5.program_uses_depth = prog_data->uses_src_depth;
    wm->wm5.program_computes_depth = (fp->info.outputs_written &
@@ -216,31 +232,6 @@ brw_upload_wm_unit(struct brw_context *brw)
    /* _NEW_LINE */
    wm->wm5.line_stipple = ctx->Line.StippleFlag;
 
-   /* BRW_NEW_STATS_WM */
-   if (brw->stats_wm)
-      wm->wm4.stats_enable = 1;
-
-   /* Emit scratch space relocation */
-   if (prog_data->base.total_scratch != 0) {
-      brw_emit_reloc(&brw->batch,
-                     brw->wm.base.state_offset +
-                     offsetof(struct brw_wm_unit_state, thread2),
-                     brw->wm.base.scratch_bo,
-                     wm->thread2.per_thread_scratch_space,
-                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-   }
-
-   /* Emit sampler state relocation */
-   if (brw->wm.base.sampler_count != 0) {
-      brw_emit_reloc(&brw->batch,
-                     brw->wm.base.state_offset +
-                     offsetof(struct brw_wm_unit_state, wm4),
-                     brw->batch.bo,
-                     brw->wm.base.sampler_offset | wm->wm4.stats_enable |
-                     (wm->wm4.sampler_count << 2),
-                     I915_GEM_DOMAIN_INSTRUCTION, 0);
-   }
-
    brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
 
    /* _NEW_POLGYON */
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 8d18179d89..1068469937 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -149,12 +149,12 @@ brw_emit_surface_state(struct brw_context *brw,
 
          assert(mt->mcs_buf->offset == 0);
          aux_bo = mt->mcs_buf->bo;
-         aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
+         aux_offset = mt->mcs_buf->offset;
       } else {
          aux_surf = &mt->hiz_buf->surf;
 
          aux_bo = mt->hiz_buf->bo;
-         aux_offset = mt->hiz_buf->bo->offset64;
+         aux_offset = 0;
       }
 
       /* We only really need a clear color if we also have an auxiliary
@@ -169,28 +169,28 @@ brw_emit_surface_state(struct brw_context *brw,
                                  surf_offset);
 
    isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
-                       .address = mt->bo->offset64 + offset,
+                       .address = brw_emit_reloc(&brw->batch,
+                                                 *surf_offset + brw->isl_dev.ss.addr_offset,
+                                                 mt->bo, offset, read_domains, write_domains),
                        .aux_surf = aux_surf, .aux_usage = aux_usage,
                        .aux_address = aux_offset,
                        .mocs = mocs, .clear_color = clear_color,
                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
-
-   brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
-                  mt->bo, offset, read_domains, write_domains);
-
    if (aux_surf) {
       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
        * contain other control information.  Since buffer addresses are always
        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
        * an ordinary reloc to do the necessary address translation.
+       *
+       * FIXME: move to the point of assignment.
        */
       assert((aux_offset & 0xfff) == 0);
       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
-      brw_emit_reloc(&brw->batch,
-                     *surf_offset + brw->isl_dev.ss.aux_addr_offset,
-                     aux_bo, *aux_addr - aux_bo->offset64,
-                     read_domains, write_domains);
+      *aux_addr = brw_emit_reloc(&brw->batch,
+                                 *surf_offset + brw->isl_dev.ss.aux_addr_offset,
+                                 aux_bo, *aux_addr, /* XXX read from WC */
+                                 read_domains, write_domains);
    }
 }
 
@@ -667,18 +667,15 @@ brw_emit_buffer_surface_state(struct brw_context *brw,
                                   out_offset);
 
    isl_buffer_fill_state(&brw->isl_dev, dw,
-                         .address = (bo ? bo->offset64 : 0) + buffer_offset,
+                         .address = brw_emit_reloc(&brw->batch,
+                                                   *out_offset + brw->isl_dev.ss.addr_offset,
+                                                   bo, buffer_offset,
+                                                   I915_GEM_DOMAIN_SAMPLER,
+                                                   (rw ? I915_GEM_DOMAIN_SAMPLER : 0)),
                          .size = buffer_size,
                          .format = surface_format,
                          .stride = pitch,
                          .mocs = tex_mocs[brw->gen]);
-
-   if (bo) {
-      brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
-                     bo, buffer_offset,
-                     I915_GEM_DOMAIN_SAMPLER,
-                     (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
-   }
 }
 
 void
@@ -841,17 +838,15 @@ brw_update_sol_surface(struct brw_context *brw,
       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
       surface_format << BRW_SURFACE_FORMAT_SHIFT |
       BRW_SURFACE_RC_READ_WRITE;
-   surf[1] = bo->offset64 + offset_bytes; /* reloc */
+   surf[1] = brw_emit_reloc(&brw->batch,
+                            *out_offset + 4, bo, offset_bytes,
+                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
 	      height << BRW_SURFACE_HEIGHT_SHIFT);
    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
    surf[4] = 0;
    surf[5] = 0;
-
-   /* Emit relocation to surface contents. */
-   brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
-                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 }
 
 /* Creates a new WM constant buffer reflecting the current fragment program's
@@ -959,7 +954,8 @@ brw_emit_null_surface_state(struct brw_context *brw,
 		  1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
 		  1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
    }
-   surf[1] = bo ? bo->offset64 : 0;
+   surf[1] = brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
+                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 
@@ -972,11 +968,6 @@ brw_emit_null_surface_state(struct brw_context *brw,
               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
    surf[4] = multisampling_state;
    surf[5] = 0;
-
-   if (bo) {
-      brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
-                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-   }
 }
 
 /**
@@ -1033,8 +1024,10 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
 
    /* reloc */
    assert(mt->offset % mt->cpp == 0);
-   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
-	      mt->bo->offset64 + mt->offset);
+   surf[1] = brw_emit_reloc(&brw->batch, offset + 4,
+                            mt->bo,
+                            mt->offset + intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y),
+                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 
    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
 	      (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
@@ -1076,9 +1069,6 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
       }
    }
 
-   brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
-                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-
    return offset;
 }
 
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 5718d7429c..2fc3a7d925 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -74,10 +74,10 @@ blorp_surface_reloc(struct blorp_batch *batch,
    struct brw_context *brw = batch->driver_batch;
    struct brw_bo *bo = address.buffer;
 
-   brw_emit_reloc(&brw->batch, ss_offset, bo, address.offset + delta,
-                  address.read_domains, address.write_domain);
-
-   uint64_t reloc_val = bo->offset64 + address.offset + delta;
+   uint64_t reloc_val =
+      brw_emit_reloc(&brw->batch, ss_offset, bo,
+                     address.offset + delta,
+                     address.read_domains, address.write_domain);
 #if GEN_GEN >= 8
    *(uint64_t *)location = reloc_val;
 #else
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 54a547cb79..7c443602af 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -4840,14 +4840,13 @@ genX(update_sampler_state)(struct brw_context *brw,
                                  texObj->StencilSampling,
                                  &border_color_offset);
    }
-
-   samp_st.BorderColorPointer = border_color_offset;
-
    if (GEN_GEN < 6) {
-      samp_st.BorderColorPointer += brw->batch.bo->offset64; /* reloc */
-      brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
-                     brw->batch.bo, border_color_offset,
-                     I915_GEM_DOMAIN_SAMPLER, 0);
+      samp_st.BorderColorPointer =
+         brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
+                        brw->batch.bo, border_color_offset,
+                        I915_GEM_DOMAIN_SAMPLER, 0);
+   } else {
+      samp_st.BorderColorPointer = border_color_offset;
    }
 
 #if GEN_GEN >= 8
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index 2783ba3c0f..7763f0564e 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -83,10 +83,8 @@ brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
       return prog_offset;
    }
 
-   brw_emit_reloc(&brw->batch, state_offset, brw->cache.bo, prog_offset,
-                  I915_GEM_DOMAIN_INSTRUCTION, 0);
-
-   return brw->cache.bo->offset64 + prog_offset;
+   return brw_emit_reloc(&brw->batch, state_offset, brw->cache.bo, prog_offset,
+                         I915_GEM_DOMAIN_INSTRUCTION, 0);
 }
 
 #define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map))
-- 
2.13.2



More information about the mesa-dev mailing list