[Mesa-dev] [PATCH 10/16] i965: Add and use brw_bo_map()
Kenneth Graunke
kenneth at whitecape.org
Fri May 26 08:55:47 UTC 2017
On Wednesday, May 24, 2017 1:04:52 PM PDT Matt Turner wrote:
> We can encapsulate the logic for choosing the mapping type. This will
> also help when we add WC mappings.
It might be worth mentioning that this patch changes a few mappings from
CPU to GTT on non-LLC systems. They may even be bug fixes...
I'll call them out below...
> ---
> src/mesa/drivers/dri/i965/brw_bufmgr.c | 30 +++++++++++++++++++++--
> src/mesa/drivers/dri/i965/brw_bufmgr.h | 5 ++--
> src/mesa/drivers/dri/i965/brw_performance_query.c | 6 ++---
> src/mesa/drivers/dri/i965/brw_program.c | 2 +-
> src/mesa/drivers/dri/i965/brw_program_cache.c | 6 ++---
> src/mesa/drivers/dri/i965/brw_queryobj.c | 2 +-
> src/mesa/drivers/dri/i965/gen6_queryobj.c | 2 +-
> src/mesa/drivers/dri/i965/gen6_sol.c | 2 +-
> src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 +--
> src/mesa/drivers/dri/i965/intel_buffer_objects.c | 13 ++--------
> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 ++------------
> src/mesa/drivers/dri/i965/intel_pixel_read.c | 2 +-
> src/mesa/drivers/dri/i965/intel_screen.c | 4 +--
> src/mesa/drivers/dri/i965/intel_tex_image.c | 2 +-
> src/mesa/drivers/dri/i965/intel_tex_subimage.c | 2 +-
> src/mesa/drivers/dri/i965/intel_upload.c | 5 +---
> 16 files changed, 52 insertions(+), 54 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index b79f566..ec9611f 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -658,7 +658,7 @@ set_domain(struct brw_context *brw, const char *action,
> }
> }
>
> -void *
> +static void *
> brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
> {
> struct brw_bufmgr *bufmgr = bo->bufmgr;
> @@ -740,7 +740,7 @@ map_gtt(struct brw_bo *bo)
> return bo->map_gtt;
> }
>
> -void *
> +static void *
> brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
> {
> struct brw_bufmgr *bufmgr = bo->bufmgr;
> @@ -814,6 +814,32 @@ brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo)
> return map;
> }
>
> +static bool
> +can_map_cpu(struct brw_bo *bo, unsigned flags)
> +{
> + if (bo->cache_coherent)
> + return true;
> +
> + if (flags & MAP_PERSISTENT)
> + return false;
> +
> + if (flags & MAP_COHERENT)
> + return false;
> +
> + return !(flags & MAP_WRITE);
> +}
> +
> +void *
> +brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
> +{
> + if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
> + return brw_bo_map_gtt(brw, bo, flags);
> + else if (can_map_cpu(bo, flags))
> + return brw_bo_map_cpu(brw, bo, flags);
> + else
> + return brw_bo_map_gtt(brw, bo, flags);
> +}
> +
> int
> brw_bo_unmap(struct brw_bo *bo)
> {
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> index 831da69..099afcf 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> @@ -137,7 +137,7 @@ struct brw_bo {
> *
> * Buffer objects are not necessarily initially mapped into CPU virtual
> * address space or graphics device aperture. They must be mapped
> - * using brw_bo_map_cpu() or brw_bo_map_gtt() to be used by the CPU.
> + * using brw_bo_map() to be used by the CPU.
> */
> struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name,
> uint64_t size, uint64_t alignment);
> @@ -189,7 +189,7 @@ void brw_bo_unreference(struct brw_bo *bo);
> * This function will block waiting for any existing execution on the
> * buffer to complete, first. The resulting mapping is returned.
> */
> -MUST_CHECK void *brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
> +MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
>
> /**
> * Reduces the refcount on the userspace mapping of the buffer
> @@ -263,7 +263,6 @@ struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
> unsigned int handle);
> void brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr);
> MUST_CHECK void *brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo);
> -MUST_CHECK void *brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
>
> int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 2ec070b..1c9ddf5 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -713,7 +713,7 @@ accumulate_oa_reports(struct brw_context *brw,
> if (!read_oa_samples(brw))
> goto error;
>
> - query_buffer = brw_bo_map_cpu(brw, obj->oa.bo, MAP_READ);
> + query_buffer = brw_bo_map(brw, obj->oa.bo, MAP_READ);
>
> start = last = query_buffer;
> end = query_buffer + (MI_RPC_BO_END_OFFSET_BYTES / sizeof(uint32_t));
> @@ -992,7 +992,7 @@ brw_begin_perf_query(struct gl_context *ctx,
> MI_RPC_BO_SIZE, 64);
> #ifdef DEBUG
> /* Pre-filling the BO helps debug whether writes landed. */
> - void *map = brw_bo_map_cpu(brw, obj->oa.bo, MAP_WRITE);
> + void *map = brw_bo_map(brw, obj->oa.bo, MAP_WRITE);
> memset(map, 0x80, MI_RPC_BO_SIZE);
> brw_bo_unmap(obj->oa.bo);
> #endif
This will get changed from a CPU mapping to a GTT mapping on non-LLC
systems. I doubt this will realistically have any impact.
> @@ -1214,7 +1214,7 @@ get_pipeline_stats_data(struct brw_context *brw,
> int n_counters = obj->query->n_counters;
> uint8_t *p = data;
>
> - uint64_t *start = brw_bo_map_cpu(brw, obj->pipeline_stats.bo, MAP_READ);
> + uint64_t *start = brw_bo_map(brw, obj->pipeline_stats.bo, MAP_READ);
> uint64_t *end = start + (STATS_BO_END_OFFSET_BYTES / sizeof(uint64_t));
>
> for (int i = 0; i < n_counters; i++) {
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
> index 7f87e73..bff3475 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -578,7 +578,7 @@ brw_collect_shader_time(struct brw_context *brw)
> * delaying reading the reports, but it doesn't look like it's a big
> * overhead compared to the cost of tracking the time in the first place.
> */
> - void *bo_map = brw_bo_map_cpu(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
> + void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
This will change from a CPU map to a GTT map on non-LLC due to
MAP_WRITE. This may be a bug fix for INTEL_DEBUG=shader_time.
>
> for (int i = 0; i < brw->shader_time.num_entries; i++) {
> uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
> diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c
> index 079e2ae..ab03969 100644
> --- a/src/mesa/drivers/dri/i965/brw_program_cache.c
> +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
> @@ -227,7 +227,7 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
> if (brw->has_llc) {
> memcpy(llc_map, cache->map, cache->next_offset);
> } else {
> - void *map = brw_bo_map_cpu(brw, cache->bo, MAP_READ);
> + void *map = brw_bo_map(brw, cache->bo, MAP_READ);
> brw_bo_subdata(new_bo, 0, cache->next_offset, map);
> brw_bo_unmap(cache->bo);
> }
> @@ -268,7 +268,7 @@ brw_lookup_prog(const struct brw_cache *cache,
>
> void *map;
> if (!brw->has_llc)
> - map = brw_bo_map_cpu(brw, cache->bo, MAP_READ);
> + map = brw_bo_map(brw, cache->bo, MAP_READ);
> else
> map = cache->map;
>
> @@ -550,7 +550,7 @@ brw_print_program_cache(struct brw_context *brw)
> void *map;
>
> if (!brw->has_llc)
> - map = brw_bo_map_cpu(brw, cache->bo, MAP_READ);
> + map = brw_bo_map(brw, cache->bo, MAP_READ);
> else
> map = cache->map;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
> index 05e23cd..a7b8962 100644
> --- a/src/mesa/drivers/dri/i965/brw_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
> @@ -146,7 +146,7 @@ brw_queryobj_get_results(struct gl_context *ctx,
> }
> }
>
> - results = brw_bo_map_cpu(brw, query->bo, MAP_READ);
> + results = brw_bo_map(brw, query->bo, MAP_READ);
> switch (query->Base.Target) {
> case GL_TIME_ELAPSED_EXT:
> /* The query BO contains the starting and ending timestamps.
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index 160182a..d27b0d1 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -212,7 +212,7 @@ gen6_queryobj_get_results(struct gl_context *ctx,
> if (query->bo == NULL)
> return;
>
> - uint64_t *results = brw_bo_map_cpu(brw, query->bo, MAP_READ);
> + uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ);
> switch (query->Base.Target) {
> case GL_TIME_ELAPSED:
> /* The query BO contains the starting and ending timestamps.
> diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
> index 5873afd..b4824b6 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sol.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
> @@ -247,7 +247,7 @@ tally_prims_generated(struct brw_context *brw,
> if (unlikely(brw->perf_debug && brw_bo_busy(obj->prim_count_bo)))
> perf_debug("Stalling for # of transform feedback primitives written.\n");
>
> - uint64_t *prim_counts = brw_bo_map_cpu(brw, obj->prim_count_bo, MAP_READ);
> + uint64_t *prim_counts = brw_bo_map(brw, obj->prim_count_bo, MAP_READ);
>
> assert(obj->prim_count_buffer_index % (2 * streams) == 0);
> int pairs = obj->prim_count_buffer_index / (2 * streams);
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index de93aeb..62d2fe8 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -100,7 +100,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch,
>
> batch->bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096);
> if (has_llc) {
> - batch->map = brw_bo_map_cpu(NULL, batch->bo, MAP_READ | MAP_WRITE);
> + batch->map = brw_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE);
> }
> batch->map_next = batch->map;
>
> @@ -239,7 +239,7 @@ do_batch_dump(struct brw_context *brw)
> if (batch->ring != RENDER_RING)
> return;
>
> - void *map = brw_bo_map_cpu(brw, batch->bo, MAP_READ);
> + void *map = brw_bo_map(brw, batch->bo, MAP_READ);
> if (map == NULL) {
> fprintf(stderr,
> "WARNING: failed to map batchbuffer, "
> diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> index cf6382d..5813989 100644
> --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> @@ -395,12 +395,7 @@ brw_map_buffer_range(struct gl_context *ctx,
> length +
> intel_obj->map_extra[index],
> alignment);
> - void *map;
> - if (brw->has_llc) {
> - map = brw_bo_map_cpu(brw, intel_obj->range_map_bo[index], access);
> - } else {
> - map = brw_bo_map_gtt(brw, intel_obj->range_map_bo[index], access);
> - }
> + void *map = brw_bo_map(brw, intel_obj->range_map_bo[index], access);
> obj->Mappings[index].Pointer = map + intel_obj->map_extra[index];
> return obj->Mappings[index].Pointer;
> }
> @@ -412,12 +407,8 @@ brw_map_buffer_range(struct gl_context *ctx,
> perf_debug("MapBufferRange with GL_MAP_UNSYNCHRONIZED_BIT stalling (it's actually synchronized on non-LLC platforms)\n");
> }
> map = brw_bo_map_unsynchronized(brw, intel_obj->buffer);
> - } else if (!brw->has_llc && (!(access & GL_MAP_READ_BIT) ||
> - (access & GL_MAP_PERSISTENT_BIT))) {
> - map = brw_bo_map_gtt(brw, intel_obj->buffer, access);
> - mark_buffer_inactive(intel_obj);
> } else {
> - map = brw_bo_map_cpu(brw, intel_obj->buffer, access);
> + map = brw_bo_map(brw, intel_obj->buffer, access);
> mark_buffer_inactive(intel_obj);
> }
It looks like access = (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT) would have
failed the original !brw->has_llc && ... case, and fall through to the
brw_bo_map_cpu case. So...RW mappings would have been CPU before, and
would now be GTT. This may be a significant bug fix...
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170526/f28e54db/attachment.sig>
More information about the mesa-dev
mailing list