[Mesa-dev] [PATCH 3/4] gallium/radeon: add a HUD query for monitoring the CS thread activity

Nicolai Hähnle nhaehnle at gmail.com
Mon Feb 13 15:37:48 UTC 2017


On 11.02.2017 20:58, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
>  src/gallium/drivers/radeon/r600_query.c           | 25 ++++++++++++++++++++++-
>  src/gallium/drivers/radeon/r600_query.h           |  1 +
>  src/gallium/drivers/radeon/radeon_winsys.h        |  1 +
>  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c     |  2 ++
>  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  2 ++
>  5 files changed, 30 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
> index 05741d3..8009416 100644
> --- a/src/gallium/drivers/radeon/r600_query.c
> +++ b/src/gallium/drivers/radeon/r600_query.c
> @@ -19,37 +19,41 @@
>   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
>   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>   * USE OR OTHER DEALINGS IN THE SOFTWARE.
>   */
>
>  #include "r600_query.h"
>  #include "r600_cs.h"
>  #include "util/u_memory.h"
>  #include "util/u_upload_mgr.h"
> -
> +#include "os/os_time.h"
>  #include "tgsi/tgsi_text.h"
>
>  struct r600_hw_query_params {
>  	unsigned start_offset;
>  	unsigned end_offset;
>  	unsigned fence_offset;
>  	unsigned pair_stride;
>  	unsigned pair_count;
>  };
>
>  /* Queries without buffer handling or suspend/resume. */
>  struct r600_query_sw {
>  	struct r600_query b;
>
>  	uint64_t begin_result;
>  	uint64_t end_result;
> +
> +	uint64_t begin_time;
> +	uint64_t end_time;
> +
>  	/* Fence for GPU_FINISHED. */
>  	struct pipe_fence_handle *fence;
>  };
>
>  static void r600_query_sw_destroy(struct r600_common_context *rctx,
>  				  struct r600_query *rquery)
>  {
>  	struct pipe_screen *screen = rctx->b.screen;
>  	struct r600_query_sw *query = (struct r600_query_sw *)rquery;
>
> @@ -69,28 +73,30 @@ static enum radeon_value_id winsys_id_from_type(unsigned type)
>  	case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
>  	case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
>  	case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
>  	case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
>  	case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
>  	case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
>  	case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
>  	case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
>  	case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
>  	case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
> +	case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
>  	default: unreachable("query type does not correspond to winsys id");
>  	}
>  }
>
>  static bool r600_query_sw_begin(struct r600_common_context *rctx,
>  				struct r600_query *rquery)
>  {
>  	struct r600_query_sw *query = (struct r600_query_sw *)rquery;
> +	enum radeon_value_id ws_id;
>
>  	switch(query->b.type) {
>  	case PIPE_QUERY_TIMESTAMP_DISJOINT:
>  	case PIPE_QUERY_GPU_FINISHED:
>  		break;
>  	case R600_QUERY_DRAW_CALLS:
>  		query->begin_result = rctx->num_draw_calls;
>  		break;
>  	case R600_QUERY_SPILL_DRAW_CALLS:
>  		query->begin_result = rctx->num_spill_draw_calls;
> @@ -139,22 +145,28 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
>  	case R600_QUERY_NUM_MAPPED_BUFFERS:
>  		query->begin_result = 0;
>  		break;
>  	case R600_QUERY_BUFFER_WAIT_TIME:
>  	case R600_QUERY_NUM_GFX_IBS:
>  	case R600_QUERY_NUM_SDMA_IBS:
>  	case R600_QUERY_NUM_BYTES_MOVED:
>  	case R600_QUERY_NUM_EVICTIONS: {
>  		enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
>  		query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
> +		query->begin_time = os_time_get_nano();
>  		break;
>  	}
> +	case R600_QUERY_CS_THREAD_BUSY:
> +		ws_id = winsys_id_from_type(query->b.type);
> +		query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
> +		query->begin_time = os_time_get_nano();
> +		break;
>  	case R600_QUERY_GPU_LOAD:
>  	case R600_QUERY_GPU_SHADERS_BUSY:
>  	case R600_QUERY_GPU_TA_BUSY:
>  	case R600_QUERY_GPU_GDS_BUSY:
>  	case R600_QUERY_GPU_VGT_BUSY:
>  	case R600_QUERY_GPU_IA_BUSY:
>  	case R600_QUERY_GPU_SX_BUSY:
>  	case R600_QUERY_GPU_WD_BUSY:
>  	case R600_QUERY_GPU_BCI_BUSY:
>  	case R600_QUERY_GPU_SC_BUSY:
> @@ -193,20 +205,21 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
>  		unreachable("r600_query_sw_begin: bad query type");
>  	}
>
>  	return true;
>  }
>
>  static bool r600_query_sw_end(struct r600_common_context *rctx,
>  			      struct r600_query *rquery)
>  {
>  	struct r600_query_sw *query = (struct r600_query_sw *)rquery;
> +	enum radeon_value_id ws_id;
>
>  	switch(query->b.type) {
>  	case PIPE_QUERY_TIMESTAMP_DISJOINT:
>  		break;
>  	case PIPE_QUERY_GPU_FINISHED:
>  		rctx->b.flush(&rctx->b, &query->fence, PIPE_FLUSH_DEFERRED);
>  		break;
>  	case R600_QUERY_DRAW_CALLS:
>  		query->end_result = rctx->num_draw_calls;
>  		break;
> @@ -256,20 +269,25 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
>  	case R600_QUERY_BUFFER_WAIT_TIME:
>  	case R600_QUERY_NUM_MAPPED_BUFFERS:
>  	case R600_QUERY_NUM_GFX_IBS:
>  	case R600_QUERY_NUM_SDMA_IBS:
>  	case R600_QUERY_NUM_BYTES_MOVED:
>  	case R600_QUERY_NUM_EVICTIONS: {
>  		enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
>  		query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
>  		break;
>  	}
> +	case R600_QUERY_CS_THREAD_BUSY:
> +		ws_id = winsys_id_from_type(query->b.type);
> +		query->end_result = rctx->ws->query_value(rctx->ws, ws_id);
> +		query->end_time = os_time_get_nano();
> +		break;
>  	case R600_QUERY_GPU_LOAD:
>  	case R600_QUERY_GPU_SHADERS_BUSY:
>  	case R600_QUERY_GPU_TA_BUSY:
>  	case R600_QUERY_GPU_GDS_BUSY:
>  	case R600_QUERY_GPU_VGT_BUSY:
>  	case R600_QUERY_GPU_IA_BUSY:
>  	case R600_QUERY_GPU_SX_BUSY:
>  	case R600_QUERY_GPU_WD_BUSY:
>  	case R600_QUERY_GPU_BCI_BUSY:
>  	case R600_QUERY_GPU_SC_BUSY:
> @@ -330,20 +348,24 @@ static bool r600_query_sw_get_result(struct r600_common_context *rctx,
>  			(uint64_t)rctx->screen->info.clock_crystal_freq * 1000;
>  		result->timestamp_disjoint.disjoint = false;
>  		return true;
>  	case PIPE_QUERY_GPU_FINISHED: {
>  		struct pipe_screen *screen = rctx->b.screen;
>  		result->b = screen->fence_finish(screen, &rctx->b, query->fence,
>  						 wait ? PIPE_TIMEOUT_INFINITE : 0);
>  		return result->b;
>  	}
>
> +	case R600_QUERY_CS_THREAD_BUSY:
> +		result->u64 = (query->end_result - query->begin_result) * 100 /
> +			      (query->end_time - query->begin_time);
> +		return true;
>  	case R600_QUERY_GPIN_ASIC_ID:
>  		result->u32 = 0;
>  		return true;
>  	case R600_QUERY_GPIN_NUM_SIMD:
>  		result->u32 = rctx->screen->info.num_good_compute_units;
>  		return true;
>  	case R600_QUERY_GPIN_NUM_RB:
>  		result->u32 = rctx->screen->info.num_render_backends;
>  		return true;
>  	case R600_QUERY_GPIN_NUM_SPI:
> @@ -1735,20 +1757,21 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
>  	X("compute-calls",		COMPUTE_CALLS,		UINT64, AVERAGE),
>  	X("spill-compute-calls",	SPILL_COMPUTE_CALLS,	UINT64, AVERAGE),
>  	X("dma-calls",			DMA_CALLS,		UINT64, AVERAGE),
>  	X("cp-dma-calls",		CP_DMA_CALLS,		UINT64, AVERAGE),
>  	X("num-vs-flushes",		NUM_VS_FLUSHES,		UINT64, AVERAGE),
>  	X("num-ps-flushes",		NUM_PS_FLUSHES,		UINT64, AVERAGE),
>  	X("num-cs-flushes",		NUM_CS_FLUSHES,		UINT64, AVERAGE),
>  	X("num-fb-cache-flushes",	NUM_FB_CACHE_FLUSHES,	UINT64, AVERAGE),
>  	X("num-L2-invalidates",		NUM_L2_INVALIDATES,	UINT64, AVERAGE),
>  	X("num-L2-writebacks",		NUM_L2_WRITEBACKS,	UINT64, AVERAGE),
> +	X("CS-thread-busy",		CS_THREAD_BUSY,		UINT64, AVERAGE),

This should logically be PERCENTAGE instead of UINT64.

I don't think the HUD handles that correctly today (according to 
p_defines, PERCENTAGE should be a float), but then we should either fix 
the HUD or nuke PERCENTAGE.

Nicolai


>  	X("requested-VRAM",		REQUESTED_VRAM,		BYTES, AVERAGE),
>  	X("requested-GTT",		REQUESTED_GTT,		BYTES, AVERAGE),
>  	X("mapped-VRAM",		MAPPED_VRAM,		BYTES, AVERAGE),
>  	X("mapped-GTT",			MAPPED_GTT,		BYTES, AVERAGE),
>  	X("buffer-wait-time",		BUFFER_WAIT_TIME,	MICROSECONDS, CUMULATIVE),
>  	X("num-mapped-buffers",		NUM_MAPPED_BUFFERS,	UINT64, AVERAGE),
>  	X("num-GFX-IBs",		NUM_GFX_IBS,		UINT64, AVERAGE),
>  	X("num-SDMA-IBs",		NUM_SDMA_IBS,		UINT64, AVERAGE),
>  	X("num-bytes-moved",		NUM_BYTES_MOVED,	BYTES, CUMULATIVE),
>  	X("num-evictions",		NUM_EVICTIONS,		UINT64, CUMULATIVE),
> diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
> index 5de80d9..84b834c 100644
> --- a/src/gallium/drivers/radeon/r600_query.h
> +++ b/src/gallium/drivers/radeon/r600_query.h
> @@ -48,20 +48,21 @@ enum {
>  	R600_QUERY_COMPUTE_CALLS,
>  	R600_QUERY_SPILL_COMPUTE_CALLS,
>  	R600_QUERY_DMA_CALLS,
>  	R600_QUERY_CP_DMA_CALLS,
>  	R600_QUERY_NUM_VS_FLUSHES,
>  	R600_QUERY_NUM_PS_FLUSHES,
>  	R600_QUERY_NUM_CS_FLUSHES,
>  	R600_QUERY_NUM_FB_CACHE_FLUSHES,
>  	R600_QUERY_NUM_L2_INVALIDATES,
>  	R600_QUERY_NUM_L2_WRITEBACKS,
> +	R600_QUERY_CS_THREAD_BUSY,
>  	R600_QUERY_REQUESTED_VRAM,
>  	R600_QUERY_REQUESTED_GTT,
>  	R600_QUERY_MAPPED_VRAM,
>  	R600_QUERY_MAPPED_GTT,
>  	R600_QUERY_BUFFER_WAIT_TIME,
>  	R600_QUERY_NUM_MAPPED_BUFFERS,
>  	R600_QUERY_NUM_GFX_IBS,
>  	R600_QUERY_NUM_SDMA_IBS,
>  	R600_QUERY_NUM_BYTES_MOVED,
>  	R600_QUERY_NUM_EVICTIONS,
> diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
> index 881bd5f..432550d 100644
> --- a/src/gallium/drivers/radeon/radeon_winsys.h
> +++ b/src/gallium/drivers/radeon/radeon_winsys.h
> @@ -87,20 +87,21 @@ enum radeon_value_id {
>      RADEON_NUM_SDMA_IBS,
>      RADEON_NUM_BYTES_MOVED,
>      RADEON_NUM_EVICTIONS,
>      RADEON_VRAM_USAGE,
>      RADEON_VRAM_VIS_USAGE,
>      RADEON_GTT_USAGE,
>      RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */
>      RADEON_CURRENT_SCLK,
>      RADEON_CURRENT_MCLK,
>      RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
> +    RADEON_CS_THREAD_TIME,
>  };
>
>  /* Each group of four has the same priority. */
>  enum radeon_bo_priority {
>      RADEON_PRIO_FENCE = 0,
>      RADEON_PRIO_TRACE,
>      RADEON_PRIO_SO_FILLED_SIZE,
>      RADEON_PRIO_QUERY,
>
>      RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> index c3dfda5..db0087c 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
> @@ -458,20 +458,22 @@ static uint64_t amdgpu_query_value(struct radeon_winsys *rws,
>     case RADEON_GTT_USAGE:
>        amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
>        return heap.heap_usage;
>     case RADEON_GPU_TEMPERATURE:
>     case RADEON_CURRENT_SCLK:
>     case RADEON_CURRENT_MCLK:
>        return 0;
>     case RADEON_GPU_RESET_COUNTER:
>        assert(0);
>        return 0;
> +   case RADEON_CS_THREAD_TIME:
> +      return util_queue_get_thread_time_nano(&ws->cs_queue, 0);
>     }
>     return 0;
>  }
>
>  static bool amdgpu_read_registers(struct radeon_winsys *rws,
>                                    unsigned reg_offset,
>                                    unsigned num_registers, uint32_t *out)
>  {
>     struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
>
> diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> index cacd683..bdcf194 100644
> --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
> @@ -662,20 +662,22 @@ static uint64_t radeon_query_value(struct radeon_winsys *rws,
>                               "current-gpu-sclk", (uint32_t*)&retval);
>          return retval;
>      case RADEON_CURRENT_MCLK:
>          radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK,
>                               "current-gpu-mclk", (uint32_t*)&retval);
>          return retval;
>      case RADEON_GPU_RESET_COUNTER:
>          radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER,
>                               "gpu-reset-counter", (uint32_t*)&retval);
>          return retval;
> +    case RADEON_CS_THREAD_TIME:
> +        return util_queue_get_thread_time_nano(&ws->cs_queue, 0);
>      }
>      return 0;
>  }
>
>  static bool radeon_read_registers(struct radeon_winsys *rws,
>                                    unsigned reg_offset,
>                                    unsigned num_registers, uint32_t *out)
>  {
>      struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
>      unsigned i;
>



More information about the mesa-dev mailing list