[Mesa-dev] [PATCH 10/16] i965: Add and use brw_bo_map()

Kenneth Graunke kenneth at whitecape.org
Fri May 26 08:55:47 UTC 2017


On Wednesday, May 24, 2017 1:04:52 PM PDT Matt Turner wrote:
> We can encapsulate the logic for choosing the mapping type. This will
> also help when we add WC mappings.

It might be worth mentioning that this patch changes a few mappings from
CPU to GTT on non-LLC systems.  They may even be bug fixes...

I'll call them out below...

> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c            | 30 +++++++++++++++++++++--
>  src/mesa/drivers/dri/i965/brw_bufmgr.h            |  5 ++--
>  src/mesa/drivers/dri/i965/brw_performance_query.c |  6 ++---
>  src/mesa/drivers/dri/i965/brw_program.c           |  2 +-
>  src/mesa/drivers/dri/i965/brw_program_cache.c     |  6 ++---
>  src/mesa/drivers/dri/i965/brw_queryobj.c          |  2 +-
>  src/mesa/drivers/dri/i965/gen6_queryobj.c         |  2 +-
>  src/mesa/drivers/dri/i965/gen6_sol.c              |  2 +-
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c     |  4 +--
>  src/mesa/drivers/dri/i965/intel_buffer_objects.c  | 13 ++--------
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c     | 19 ++------------
>  src/mesa/drivers/dri/i965/intel_pixel_read.c      |  2 +-
>  src/mesa/drivers/dri/i965/intel_screen.c          |  4 +--
>  src/mesa/drivers/dri/i965/intel_tex_image.c       |  2 +-
>  src/mesa/drivers/dri/i965/intel_tex_subimage.c    |  2 +-
>  src/mesa/drivers/dri/i965/intel_upload.c          |  5 +---
>  16 files changed, 52 insertions(+), 54 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index b79f566..ec9611f 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -658,7 +658,7 @@ set_domain(struct brw_context *brw, const char *action,
>     }
>  }
>  
> -void *
> +static void *
>  brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
>  {
>     struct brw_bufmgr *bufmgr = bo->bufmgr;
> @@ -740,7 +740,7 @@ map_gtt(struct brw_bo *bo)
>     return bo->map_gtt;
>  }
>  
> -void *
> +static void *
>  brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
>  {
>     struct brw_bufmgr *bufmgr = bo->bufmgr;
> @@ -814,6 +814,32 @@ brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo)
>     return map;
>  }
>  
> +static bool
> +can_map_cpu(struct brw_bo *bo, unsigned flags)
> +{
> +   if (bo->cache_coherent)
> +      return true;
> +
> +   if (flags & MAP_PERSISTENT)
> +      return false;
> +
> +   if (flags & MAP_COHERENT)
> +      return false;
> +
> +   return !(flags & MAP_WRITE);
> +}
> +
> +void *
> +brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
> +{
> +   if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW))
> +      return brw_bo_map_gtt(brw, bo, flags);
> +   else if (can_map_cpu(bo, flags))
> +      return brw_bo_map_cpu(brw, bo, flags);
> +   else
> +      return brw_bo_map_gtt(brw, bo, flags);
> +}
> +
>  int
>  brw_bo_unmap(struct brw_bo *bo)
>  {
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> index 831da69..099afcf 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> @@ -137,7 +137,7 @@ struct brw_bo {
>   *
>   * Buffer objects are not necessarily initially mapped into CPU virtual
>   * address space or graphics device aperture.  They must be mapped
> - * using brw_bo_map_cpu() or brw_bo_map_gtt() to be used by the CPU.
> + * using brw_bo_map() to be used by the CPU.
>   */
>  struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name,
>                              uint64_t size, uint64_t alignment);
> @@ -189,7 +189,7 @@ void brw_bo_unreference(struct brw_bo *bo);
>   * This function will block waiting for any existing execution on the
>   * buffer to complete, first.  The resulting mapping is returned.
>   */
> -MUST_CHECK void *brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
> +MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
>  
>  /**
>   * Reduces the refcount on the userspace mapping of the buffer
> @@ -263,7 +263,6 @@ struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr,
>                                             unsigned int handle);
>  void brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr);
>  MUST_CHECK void *brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo);
> -MUST_CHECK void *brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags);
>  
>  int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 2ec070b..1c9ddf5 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -713,7 +713,7 @@ accumulate_oa_reports(struct brw_context *brw,
>     if (!read_oa_samples(brw))
>        goto error;
>  
> -   query_buffer = brw_bo_map_cpu(brw, obj->oa.bo, MAP_READ);
> +   query_buffer = brw_bo_map(brw, obj->oa.bo, MAP_READ);
>  
>     start = last = query_buffer;
>     end = query_buffer + (MI_RPC_BO_END_OFFSET_BYTES / sizeof(uint32_t));
> @@ -992,7 +992,7 @@ brw_begin_perf_query(struct gl_context *ctx,
>                        MI_RPC_BO_SIZE, 64);
>  #ifdef DEBUG
>        /* Pre-filling the BO helps debug whether writes landed. */
> -      void *map = brw_bo_map_cpu(brw, obj->oa.bo, MAP_WRITE);
> +      void *map = brw_bo_map(brw, obj->oa.bo, MAP_WRITE);
>        memset(map, 0x80, MI_RPC_BO_SIZE);
>        brw_bo_unmap(obj->oa.bo);
>  #endif

This will get changed from a CPU mapping to a GTT mapping on non-LLC
systems.  I doubt this will realistically have any impact.

> @@ -1214,7 +1214,7 @@ get_pipeline_stats_data(struct brw_context *brw,
>     int n_counters = obj->query->n_counters;
>     uint8_t *p = data;
>  
> -   uint64_t *start = brw_bo_map_cpu(brw, obj->pipeline_stats.bo, MAP_READ);
> +   uint64_t *start = brw_bo_map(brw, obj->pipeline_stats.bo, MAP_READ);
>     uint64_t *end = start + (STATS_BO_END_OFFSET_BYTES / sizeof(uint64_t));
>  
>     for (int i = 0; i < n_counters; i++) {
> diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
> index 7f87e73..bff3475 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.c
> +++ b/src/mesa/drivers/dri/i965/brw_program.c
> @@ -578,7 +578,7 @@ brw_collect_shader_time(struct brw_context *brw)
>      * delaying reading the reports, but it doesn't look like it's a big
>      * overhead compared to the cost of tracking the time in the first place.
>      */
> -   void *bo_map = brw_bo_map_cpu(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
> +   void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);

This will change from a CPU map to a GTT map on non-LLC due to
MAP_WRITE.  This may be a bug fix for INTEL_DEBUG=shader_time.

>  
>     for (int i = 0; i < brw->shader_time.num_entries; i++) {
>        uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
> diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c
> index 079e2ae..ab03969 100644
> --- a/src/mesa/drivers/dri/i965/brw_program_cache.c
> +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
> @@ -227,7 +227,7 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
>        if (brw->has_llc) {
>           memcpy(llc_map, cache->map, cache->next_offset);
>        } else {
> -         void *map = brw_bo_map_cpu(brw, cache->bo, MAP_READ);
> +         void *map = brw_bo_map(brw, cache->bo, MAP_READ);
>           brw_bo_subdata(new_bo, 0, cache->next_offset, map);
>           brw_bo_unmap(cache->bo);
>        }
> @@ -268,7 +268,7 @@ brw_lookup_prog(const struct brw_cache *cache,
>  
>           void *map;
>           if (!brw->has_llc)
> -            map = brw_bo_map_cpu(brw, cache->bo, MAP_READ);
> +            map = brw_bo_map(brw, cache->bo, MAP_READ);
>           else
>              map = cache->map;
>  
> @@ -550,7 +550,7 @@ brw_print_program_cache(struct brw_context *brw)
>     void *map;
>  
>     if (!brw->has_llc)
> -      map = brw_bo_map_cpu(brw, cache->bo, MAP_READ);
> +      map = brw_bo_map(brw, cache->bo, MAP_READ);
>     else
>        map = cache->map;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
> index 05e23cd..a7b8962 100644
> --- a/src/mesa/drivers/dri/i965/brw_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
> @@ -146,7 +146,7 @@ brw_queryobj_get_results(struct gl_context *ctx,
>        }
>     }
>  
> -   results = brw_bo_map_cpu(brw, query->bo, MAP_READ);
> +   results = brw_bo_map(brw, query->bo, MAP_READ);
>     switch (query->Base.Target) {
>     case GL_TIME_ELAPSED_EXT:
>        /* The query BO contains the starting and ending timestamps.
> diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> index 160182a..d27b0d1 100644
> --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
> +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
> @@ -212,7 +212,7 @@ gen6_queryobj_get_results(struct gl_context *ctx,
>     if (query->bo == NULL)
>        return;
>  
> -   uint64_t *results = brw_bo_map_cpu(brw, query->bo, MAP_READ);
> +   uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ);
>     switch (query->Base.Target) {
>     case GL_TIME_ELAPSED:
>        /* The query BO contains the starting and ending timestamps.
> diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
> index 5873afd..b4824b6 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sol.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
> @@ -247,7 +247,7 @@ tally_prims_generated(struct brw_context *brw,
>     if (unlikely(brw->perf_debug && brw_bo_busy(obj->prim_count_bo)))
>        perf_debug("Stalling for # of transform feedback primitives written.\n");
>  
> -   uint64_t *prim_counts = brw_bo_map_cpu(brw, obj->prim_count_bo, MAP_READ);
> +   uint64_t *prim_counts = brw_bo_map(brw, obj->prim_count_bo, MAP_READ);
>  
>     assert(obj->prim_count_buffer_index % (2 * streams) == 0);
>     int pairs = obj->prim_count_buffer_index / (2 * streams);
> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> index de93aeb..62d2fe8 100644
> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
> @@ -100,7 +100,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch,
>  
>     batch->bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096);
>     if (has_llc) {
> -      batch->map = brw_bo_map_cpu(NULL, batch->bo, MAP_READ | MAP_WRITE);
> +      batch->map = brw_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE);
>     }
>     batch->map_next = batch->map;
>  
> @@ -239,7 +239,7 @@ do_batch_dump(struct brw_context *brw)
>     if (batch->ring != RENDER_RING)
>        return;
>  
> -   void *map = brw_bo_map_cpu(brw, batch->bo, MAP_READ);
> +   void *map = brw_bo_map(brw, batch->bo, MAP_READ);
>     if (map == NULL) {
>        fprintf(stderr,
>  	      "WARNING: failed to map batchbuffer, "
> diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> index cf6382d..5813989 100644
> --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
> @@ -395,12 +395,7 @@ brw_map_buffer_range(struct gl_context *ctx,
>                                                            length +
>                                                            intel_obj->map_extra[index],
>                                                            alignment);
> -      void *map;
> -      if (brw->has_llc) {
> -         map = brw_bo_map_cpu(brw, intel_obj->range_map_bo[index], access);
> -      } else {
> -         map = brw_bo_map_gtt(brw, intel_obj->range_map_bo[index], access);
> -      }
> +      void *map = brw_bo_map(brw, intel_obj->range_map_bo[index], access);
>        obj->Mappings[index].Pointer = map + intel_obj->map_extra[index];
>        return obj->Mappings[index].Pointer;
>     }
> @@ -412,12 +407,8 @@ brw_map_buffer_range(struct gl_context *ctx,
>           perf_debug("MapBufferRange with GL_MAP_UNSYNCHRONIZED_BIT stalling (it's actually synchronized on non-LLC platforms)\n");
>        }
>        map = brw_bo_map_unsynchronized(brw, intel_obj->buffer);
> -   } else if (!brw->has_llc && (!(access & GL_MAP_READ_BIT) ||
> -                              (access & GL_MAP_PERSISTENT_BIT))) {
> -      map = brw_bo_map_gtt(brw, intel_obj->buffer, access);
> -      mark_buffer_inactive(intel_obj);
>     } else {
> -      map = brw_bo_map_cpu(brw, intel_obj->buffer, access);
> +      map = brw_bo_map(brw, intel_obj->buffer, access);
>        mark_buffer_inactive(intel_obj);
>     }

It looks like access = (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT) would have
failed the original !brw->has_llc && ... case, and fall through to the
brw_bo_map_cpu case.  So...RW mappings would have been CPU before, and
would now be GTT.  This may be a significant bug fix...
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170526/f28e54db/attachment.sig>


More information about the mesa-dev mailing list