[Mesa-dev] [PATCH 03/10] i965: Always use the pre-computed offset for the relocation entry
Chris Wilson
chris at chris-wilson.co.uk
Fri Jul 21 15:36:45 UTC 2017
We must be careful to only compute the address once based on the
per-context information (rather than accessing the unlocked global
bo->offset64) so that the value in the batch does match the
reloc.presumed_offset we declare to the kernel. Otherwise, highly
unlikely, but we may see GPU hangs in multithreaded users.
The only real complication here is isl_surf_fill_state() which needs to
adjust the reloc.delta to both general a tile offset and to encode state
into the lower 12 bits.
Cc: Kenneth Graunke <kenneth at whitecape.org>
Cc: Matt Turner <mattst88 at gmail.com>
Cc: Jason Ekstrand <jason.ekstrand at intel.com>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
---
src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 65 +++++++++++-------------
src/mesa/drivers/dri/i965/genX_state_upload.c | 13 +++--
src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 +-
3 files changed, 38 insertions(+), 44 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 45ac106f3f..af7b07ace2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -163,12 +163,12 @@ brw_emit_surface_state(struct brw_context *brw,
aux_surf = &mt->mcs_buf->surf;
aux_bo = mt->mcs_buf->bo;
- aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
+ aux_offset = mt->mcs_buf->offset;
} else {
aux_surf = &mt->hiz_buf->surf;
aux_bo = mt->hiz_buf->bo;
- aux_offset = mt->hiz_buf->bo->offset64;
+ aux_offset = 0;
}
/* We only really need a clear color if we also have an auxiliary
@@ -183,28 +183,29 @@ brw_emit_surface_state(struct brw_context *brw,
surf_offset);
isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
- .address = mt->bo->offset64 + offset,
+ .address = brw_emit_reloc(&brw->batch,
+ *surf_offset + brw->isl_dev.ss.addr_offset,
+ mt->bo, offset, read_domains, write_domains),
.aux_surf = aux_surf, .aux_usage = aux_usage,
.aux_address = aux_offset,
.mocs = mocs, .clear_color = clear_color,
.x_offset_sa = tile_x, .y_offset_sa = tile_y);
-
- brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
- mt->bo, offset, read_domains, write_domains);
-
if (aux_surf) {
/* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
* upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
* contain other control information. Since buffer addresses are always
* on 4k boundaries (and thus have their lower 12 bits zero), we can use
* an ordinary reloc to do the necessary address translation.
+ *
+ * FIXME: move to the point of assignment.
*/
assert((aux_offset & 0xfff) == 0);
uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
- brw_emit_reloc(&brw->batch,
- *surf_offset + brw->isl_dev.ss.aux_addr_offset,
- aux_bo, *aux_addr - aux_bo->offset64,
- read_domains, write_domains);
+ *aux_addr = brw_emit_reloc(&brw->batch,
+ *surf_offset +
+ brw->isl_dev.ss.aux_addr_offset,
+ aux_bo, *aux_addr, /* XXX read from WC */
+ read_domains, write_domains);
}
}
@@ -683,18 +684,16 @@ brw_emit_buffer_surface_state(struct brw_context *brw,
out_offset);
isl_buffer_fill_state(&brw->isl_dev, dw,
- .address = (bo ? bo->offset64 : 0) + buffer_offset,
+ .address = !bo ? buffer_offset :
+ brw_emit_reloc(&brw->batch,
+ *out_offset + brw->isl_dev.ss.addr_offset,
+ bo, buffer_offset,
+ I915_GEM_DOMAIN_SAMPLER,
+ (rw ? I915_GEM_DOMAIN_SAMPLER : 0)),
.size = buffer_size,
.format = surface_format,
.stride = pitch,
.mocs = tex_mocs[brw->gen]);
-
- if (bo) {
- brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
- bo, buffer_offset,
- I915_GEM_DOMAIN_SAMPLER,
- (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
- }
}
void
@@ -857,17 +856,15 @@ brw_update_sol_surface(struct brw_context *brw,
BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
surface_format << BRW_SURFACE_FORMAT_SHIFT |
BRW_SURFACE_RC_READ_WRITE;
- surf[1] = bo->offset64 + offset_bytes; /* reloc */
+ surf[1] = brw_emit_reloc(&brw->batch,
+ *out_offset + 4, bo, offset_bytes,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
height << BRW_SURFACE_HEIGHT_SHIFT);
surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
surf[4] = 0;
surf[5] = 0;
-
- /* Emit relocation to surface contents. */
- brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}
/* Creates a new WM constant buffer reflecting the current fragment program's
@@ -975,7 +972,9 @@ brw_emit_null_surface_state(struct brw_context *brw,
1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
}
- surf[1] = bo ? bo->offset64 : 0;
+ surf[1] = !bo ? 0 :
+ brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
(height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
@@ -988,11 +987,6 @@ brw_emit_null_surface_state(struct brw_context *brw,
pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
surf[4] = multisampling_state;
surf[5] = 0;
-
- if (bo) {
- brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
- }
}
/**
@@ -1049,8 +1043,12 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
/* reloc */
assert(mt->offset % mt->cpp == 0);
- surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
- mt->bo->offset64 + mt->offset);
+ surf[1] = brw_emit_reloc(&brw->batch, offset + 4, mt->bo,
+ mt->offset +
+ intel_renderbuffer_get_tile_offsets(irb,
+ &tile_x,
+ &tile_y),
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
(rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
@@ -1092,9 +1090,6 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
}
}
- brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-
return offset;
}
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index ef04603df8..71570018a1 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -5053,14 +5053,13 @@ genX(update_sampler_state)(struct brw_context *brw,
texObj->StencilSampling,
&border_color_offset);
}
-
- samp_st.BorderColorPointer = border_color_offset;
-
if (GEN_GEN < 6) {
- samp_st.BorderColorPointer += brw->batch.bo->offset64; /* reloc */
- brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
- brw->batch.bo, border_color_offset,
- I915_GEM_DOMAIN_SAMPLER, 0);
+ samp_st.BorderColorPointer =
+ brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
+ brw->batch.bo, border_color_offset,
+ I915_GEM_DOMAIN_SAMPLER, 0);
+ } else {
+ samp_st.BorderColorPointer = border_color_offset;
}
#if GEN_GEN >= 8
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 140678f88c..9364dab7f2 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -736,7 +736,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
struct brw_bo *target, uint32_t target_offset,
uint32_t read_domains, uint32_t write_domain)
{
- uint64_t offset64;
+ assert(target);
if (batch->reloc_count == batch->reloc_array_size) {
batch->reloc_array_size *= 2;
@@ -758,7 +758,7 @@ brw_emit_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
batch->reloc_count++;
/* ensure gcc doesn't reload */
- offset64 = *((volatile uint64_t *)&target->offset64);
+ uint64_t offset64 = *((volatile uint64_t *)&target->offset64);
reloc->offset = batch_offset;
reloc->delta = target_offset;
reloc->target_handle = target->gem_handle;
--
2.13.3
More information about the mesa-dev
mailing list