[Mesa-dev] [PATCH 01/14] radeonsi: add a HUD query for getting an average GFX BO list size

Samuel Pitoiset samuel.pitoiset at gmail.com
Thu Jun 29 21:15:52 UTC 2017



On 06/29/2017 09:47 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> ---
>   src/gallium/drivers/radeon/r600_query.c           | 18 ++++++++++++++++++
>   src/gallium/drivers/radeon/r600_query.h           |  1 +
>   src/gallium/drivers/radeon/radeon_winsys.h        |  1 +
>   src/gallium/winsys/amdgpu/drm/amdgpu_cs.c         |  3 +++
>   src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c     |  2 ++
>   src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h     |  1 +
>   src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  1 +
>   7 files changed, 27 insertions(+)
> 
> diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
> index 3308ad8..db70878 100644
> --- a/src/gallium/drivers/radeon/r600_query.c
> +++ b/src/gallium/drivers/radeon/r600_query.c
> @@ -64,20 +64,21 @@ static enum radeon_value_id winsys_id_from_type(unsigned type)
>   {
>   	switch (type) {
>   	case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
>   	case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
>   	case R600_QUERY_MAPPED_VRAM: return RADEON_MAPPED_VRAM;
>   	case R600_QUERY_MAPPED_GTT: return RADEON_MAPPED_GTT;
>   	case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
>   	case R600_QUERY_NUM_MAPPED_BUFFERS: return RADEON_NUM_MAPPED_BUFFERS;
>   	case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
>   	case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
> +	case R600_QUERY_GFX_BO_LIST_SIZE: return RADEON_GFX_BO_LIST_COUNTER;
>   	case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
>   	case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
>   	case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: return RADEON_NUM_VRAM_CPU_PAGE_FAULTS;
>   	case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
>   	case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
>   	case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
>   	case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
>   	case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
>   	case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
>   	case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
> @@ -166,20 +167,26 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
>   	case R600_QUERY_BUFFER_WAIT_TIME:
>   	case R600_QUERY_NUM_GFX_IBS:
>   	case R600_QUERY_NUM_SDMA_IBS:
>   	case R600_QUERY_NUM_BYTES_MOVED:
>   	case R600_QUERY_NUM_EVICTIONS:
>   	case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
>   		enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
>   		query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
>   		break;
>   	}
> +	case R600_QUERY_GFX_BO_LIST_SIZE:
> +		ws_id = winsys_id_from_type(query->b.type);
> +		query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
> +		query->begin_time = rctx->ws->query_value(rctx->ws,
> +							  RADEON_NUM_GFX_IBS);

begin_time for counting the number of graphics IBs is confusing, but 
either way, it's debug code. :)

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

> +		break;
>   	case R600_QUERY_CS_THREAD_BUSY:
>   		ws_id = winsys_id_from_type(query->b.type);
>   		query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
>   		query->begin_time = os_time_get_nano();
>   		break;
>   	case R600_QUERY_GALLIUM_THREAD_BUSY:
>   		query->begin_result =
>   			rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
>   		query->begin_time = os_time_get_nano();
>   		break;
> @@ -311,20 +318,26 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
>   	case R600_QUERY_NUM_MAPPED_BUFFERS:
>   	case R600_QUERY_NUM_GFX_IBS:
>   	case R600_QUERY_NUM_SDMA_IBS:
>   	case R600_QUERY_NUM_BYTES_MOVED:
>   	case R600_QUERY_NUM_EVICTIONS:
>   	case R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS: {
>   		enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
>   		query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
>   		break;
>   	}
> +	case R600_QUERY_GFX_BO_LIST_SIZE:
> +		ws_id = winsys_id_from_type(query->b.type);
> +		query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
> +		query->end_time = rctx->ws->query_value(rctx->ws,
> +							RADEON_NUM_GFX_IBS);
> +		break;
>   	case R600_QUERY_CS_THREAD_BUSY:
>   		ws_id = winsys_id_from_type(query->b.type);
>   		query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
>   		query->end_time = os_time_get_nano();
>   		break;
>   	case R600_QUERY_GALLIUM_THREAD_BUSY:
>   		query->end_result =
>   			rctx->tc ? util_queue_get_thread_time_nano(&rctx->tc->queue, 0) : 0;
>   		query->end_time = os_time_get_nano();
>   		break;
> @@ -397,20 +410,24 @@ static bool r600_query_sw_get_result(struct r600_common_context *rctx,
>   		return true;
>   	case PIPE_QUERY_GPU_FINISHED: {
>   		struct pipe_screen *screen = rctx->b.screen;
>   		struct pipe_context *ctx = rquery->b.flushed ? NULL : &rctx->b;
>   
>   		result->b = screen->fence_finish(screen, ctx, query->fence,
>   						 wait ? PIPE_TIMEOUT_INFINITE : 0);
>   		return result->b;
>   	}
>   
> +	case R600_QUERY_GFX_BO_LIST_SIZE:
> +		result->u64 = (query->end_result - query->begin_result) /
> +			      (query->end_time - query->begin_time);
> +		return true;
>   	case R600_QUERY_CS_THREAD_BUSY:
>   	case R600_QUERY_GALLIUM_THREAD_BUSY:
>   		result->u64 = (query->end_result - query->begin_result) * 100 /
>   			      (query->end_time - query->begin_time);
>   		return true;
>   	case R600_QUERY_GPIN_ASIC_ID:
>   		result->u32 = 0;
>   		return true;
>   	case R600_QUERY_GPIN_NUM_SIMD:
>   		result->u32 = rctx->screen->info.num_good_compute_units;
> @@ -1854,20 +1871,21 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
>   	X("CS-thread-busy",		CS_THREAD_BUSY,		UINT64, AVERAGE),
>   	X("gallium-thread-busy",	GALLIUM_THREAD_BUSY,	UINT64, AVERAGE),
>   	X("requested-VRAM",		REQUESTED_VRAM,		BYTES, AVERAGE),
>   	X("requested-GTT",		REQUESTED_GTT,		BYTES, AVERAGE),
>   	X("mapped-VRAM",		MAPPED_VRAM,		BYTES, AVERAGE),
>   	X("mapped-GTT",			MAPPED_GTT,		BYTES, AVERAGE),
>   	X("buffer-wait-time",		BUFFER_WAIT_TIME,	MICROSECONDS, CUMULATIVE),
>   	X("num-mapped-buffers",		NUM_MAPPED_BUFFERS,	UINT64, AVERAGE),
>   	X("num-GFX-IBs",		NUM_GFX_IBS,		UINT64, AVERAGE),
>   	X("num-SDMA-IBs",		NUM_SDMA_IBS,		UINT64, AVERAGE),
> +	X("GFX-BO-list-size",		GFX_BO_LIST_SIZE,	UINT64, AVERAGE),
>   	X("num-bytes-moved",		NUM_BYTES_MOVED,	BYTES, CUMULATIVE),
>   	X("num-evictions",		NUM_EVICTIONS,		UINT64, CUMULATIVE),
>   	X("VRAM-CPU-page-faults",	NUM_VRAM_CPU_PAGE_FAULTS, UINT64, CUMULATIVE),
>   	X("VRAM-usage",			VRAM_USAGE,		BYTES, AVERAGE),
>   	X("VRAM-vis-usage",		VRAM_VIS_USAGE,		BYTES, AVERAGE),
>   	X("GTT-usage",			GTT_USAGE,		BYTES, AVERAGE),
>   	X("back-buffer-ps-draw-ratio",	BACK_BUFFER_PS_DRAW_RATIO, UINT64, AVERAGE),
>   
>   	/* GPIN queries are for the benefit of old versions of GPUPerfStudio,
>   	 * which use it as a fallback path to detect the GPU type.
> diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
> index 1f97e11..1bbaa76 100644
> --- a/src/gallium/drivers/radeon/r600_query.h
> +++ b/src/gallium/drivers/radeon/r600_query.h
> @@ -62,20 +62,21 @@ enum {
>   	R600_QUERY_CS_THREAD_BUSY,
>   	R600_QUERY_GALLIUM_THREAD_BUSY,
>   	R600_QUERY_REQUESTED_VRAM,
>   	R600_QUERY_REQUESTED_GTT,
>   	R600_QUERY_MAPPED_VRAM,
>   	R600_QUERY_MAPPED_GTT,
>   	R600_QUERY_BUFFER_WAIT_TIME,
>   	R600_QUERY_NUM_MAPPED_BUFFERS,
>   	R600_QUERY_NUM_GFX_IBS,
>   	R600_QUERY_NUM_SDMA_IBS,
> +	R600_QUERY_GFX_BO_LIST_SIZE,
>   	R600_QUERY_NUM_BYTES_MOVED,
>   	R600_QUERY_NUM_EVICTIONS,
>   	R600_QUERY_NUM_VRAM_CPU_PAGE_FAULTS,
>   	R600_QUERY_VRAM_USAGE,
>   	R600_QUERY_VRAM_VIS_USAGE,
>   	R600_QUERY_GTT_USAGE,
>   	R600_QUERY_GPU_TEMPERATURE,
>   	R600_QUERY_CURRENT_GPU_SCLK,
>   	R600_QUERY_CURRENT_GPU_MCLK,
>   	R600_QUERY_GPU_LOAD,
> diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
> index e19fde6..247fff0 100644
> --- a/src/gallium/drivers/radeon/radeon_winsys.h
> +++ b/src/gallium/drivers/radeon/radeon_winsys.h
> @@ -84,20 +84,21 @@ enum ring_type {
>   enum radeon_value_id {
>       RADEON_REQUESTED_VRAM_MEMORY,
>       RADEON_REQUESTED_GTT_MEMORY,
>       RADEON_MAPPED_VRAM,
>       RADEON_MAPPED_GTT,
>       RADEON_BUFFER_WAIT_TIME_NS,
>       RADEON_NUM_MAPPED_BUFFERS,
>       RADEON_TIMESTAMP,
>       RADEON_NUM_GFX_IBS,
>       RADEON_NUM_SDMA_IBS,
> +    RADEON_GFX_BO_LIST_COUNTER, /* number of BOs submitted in gfx IBs */
>       RADEON_NUM_BYTES_MOVED,
>       RADEON_NUM_EVICTIONS,
>       RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
>       RADEON_VRAM_USAGE,
>       RADEON_VRAM_VIS_USAGE,
>       RADEON_GTT_USAGE,
>       RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */
>       RADEON_CURRENT_SCLK,
>       RADEON_CURRENT_MCLK,
>       RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> index c88be06..1b3ca65 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
> @@ -1218,20 +1218,23 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
>   
>         for (i = 0; i < cs->num_real_buffers; ++i) {
>            struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i];
>   
>            assert(buffer->u.real.priority_usage != 0);
>   
>            cs->handles[i] = buffer->bo->bo;
>            cs->flags[i] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
>         }
>   
> +      if (acs->ring_type == RING_GFX)
> +         ws->gfx_bo_list_counter += cs->num_real_buffers;
> +
>         r = amdgpu_bo_list_create(ws->dev, cs->num_real_buffers,
>                                   cs->handles, cs->flags,
>                                   &cs->request.resources);
>      }
>   bo_list_error:
>   
>      if (r) {
>         fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
>         cs->request.resources = NULL;
>         amdgpu_fence_signalled(cs->fence);
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> index c4d5216..2148c49 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> @@ -133,20 +133,22 @@ static uint64_t amdgpu_query_value(struct radeon_winsys *rws,
>         return ws->buffer_wait_time;
>      case RADEON_NUM_MAPPED_BUFFERS:
>         return ws->num_mapped_buffers;
>      case RADEON_TIMESTAMP:
>         amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
>         return retval;
>      case RADEON_NUM_GFX_IBS:
>         return ws->num_gfx_IBs;
>      case RADEON_NUM_SDMA_IBS:
>         return ws->num_sdma_IBs;
> +   case RADEON_GFX_BO_LIST_COUNTER:
> +      return ws->gfx_bo_list_counter;
>      case RADEON_NUM_BYTES_MOVED:
>         amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval);
>         return retval;
>      case RADEON_NUM_EVICTIONS:
>         amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS, 8, &retval);
>         return retval;
>      case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
>         amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS, 8, &retval);
>         return retval;
>      case RADEON_VRAM_USAGE:
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
> index 896a463..f011b8e 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
> @@ -58,20 +58,21 @@ struct amdgpu_winsys {
>      unsigned num_total_rejected_cs;
>      uint32_t next_bo_unique_id;
>      uint64_t allocated_vram;
>      uint64_t allocated_gtt;
>      uint64_t mapped_vram;
>      uint64_t mapped_gtt;
>      uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
>      uint64_t num_gfx_IBs;
>      uint64_t num_sdma_IBs;
>      uint64_t num_mapped_buffers;
> +   uint64_t gfx_bo_list_counter;
>   
>      struct radeon_info info;
>   
>      /* multithreaded IB submission */
>      struct util_queue cs_queue;
>   
>      struct amdgpu_gpu_info amdinfo;
>      ADDR_HANDLE addrlib;
>   
>      bool check_vm;
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index 2a6ce2c..8e43b68 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -624,20 +624,21 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws,
>           return ws->num_gfx_IBs;
>       case RADEON_NUM_SDMA_IBS:
>           return ws->num_sdma_IBs;
>       case RADEON_NUM_BYTES_MOVED:
>           radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BYTES_MOVED,
>                                "num-bytes-moved", (uint32_t*)&retval);
>           return retval;
>       case RADEON_NUM_EVICTIONS:
>       case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
>       case RADEON_VRAM_VIS_USAGE:
> +    case RADEON_GFX_BO_LIST_COUNTER:
>           return 0; /* unimplemented */
>       case RADEON_VRAM_USAGE:
>           radeon_get_drm_value(ws->fd, RADEON_INFO_VRAM_USAGE,
>                                "vram-usage", (uint32_t*)&retval);
>           return retval;
>       case RADEON_GTT_USAGE:
>           radeon_get_drm_value(ws->fd, RADEON_INFO_GTT_USAGE,
>                                "gtt-usage", (uint32_t*)&retval);
>           return retval;
>       case RADEON_GPU_TEMPERATURE:
> 


More information about the mesa-dev mailing list