[Mesa-dev] [PATCH 3/4] gallium/radeon: add a HUD query for monitoring the CS thread activity

Nicolai Hähnle nhaehnle at gmail.com
Wed Feb 15 10:58:18 UTC 2017


On 14.02.2017 19:10, Marek Olšák wrote:
> On Mon, Feb 13, 2017 at 4:57 PM, Marek Olšák <maraeo at gmail.com> wrote:
>> On Mon, Feb 13, 2017 at 4:37 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>>> On 11.02.2017 20:58, Marek Olšák wrote:
>>>>
>>>> From: Marek Olšák <marek.olsak at amd.com>
>>>>
>>>> ---
>>>>  src/gallium/drivers/radeon/r600_query.c           | 25
>>>> ++++++++++++++++++++++-
>>>>  src/gallium/drivers/radeon/r600_query.h           |  1 +
>>>>  src/gallium/drivers/radeon/radeon_winsys.h        |  1 +
>>>>  src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c     |  2 ++
>>>>  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c |  2 ++
>>>>  5 files changed, 30 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/src/gallium/drivers/radeon/r600_query.c
>>>> b/src/gallium/drivers/radeon/r600_query.c
>>>> index 05741d3..8009416 100644
>>>> --- a/src/gallium/drivers/radeon/r600_query.c
>>>> +++ b/src/gallium/drivers/radeon/r600_query.c
>>>> @@ -19,37 +19,41 @@
>>>>   * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
>>>>   * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>>>   * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>>>> THE
>>>>   * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>>>   */
>>>>
>>>>  #include "r600_query.h"
>>>>  #include "r600_cs.h"
>>>>  #include "util/u_memory.h"
>>>>  #include "util/u_upload_mgr.h"
>>>> -
>>>> +#include "os/os_time.h"
>>>>  #include "tgsi/tgsi_text.h"
>>>>
>>>>  struct r600_hw_query_params {
>>>>         unsigned start_offset;
>>>>         unsigned end_offset;
>>>>         unsigned fence_offset;
>>>>         unsigned pair_stride;
>>>>         unsigned pair_count;
>>>>  };
>>>>
>>>>  /* Queries without buffer handling or suspend/resume. */
>>>>  struct r600_query_sw {
>>>>         struct r600_query b;
>>>>
>>>>         uint64_t begin_result;
>>>>         uint64_t end_result;
>>>> +
>>>> +       uint64_t begin_time;
>>>> +       uint64_t end_time;
>>>> +
>>>>         /* Fence for GPU_FINISHED. */
>>>>         struct pipe_fence_handle *fence;
>>>>  };
>>>>
>>>>  static void r600_query_sw_destroy(struct r600_common_context *rctx,
>>>>                                   struct r600_query *rquery)
>>>>  {
>>>>         struct pipe_screen *screen = rctx->b.screen;
>>>>         struct r600_query_sw *query = (struct r600_query_sw *)rquery;
>>>>
>>>> @@ -69,28 +73,30 @@ static enum radeon_value_id
>>>> winsys_id_from_type(unsigned type)
>>>>         case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
>>>>         case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
>>>>         case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
>>>>         case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
>>>>         case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
>>>>         case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
>>>>         case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
>>>>         case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
>>>>         case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
>>>>         case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
>>>> +       case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
>>>>         default: unreachable("query type does not correspond to winsys
>>>> id");
>>>>         }
>>>>  }
>>>>
>>>>  static bool r600_query_sw_begin(struct r600_common_context *rctx,
>>>>                                 struct r600_query *rquery)
>>>>  {
>>>>         struct r600_query_sw *query = (struct r600_query_sw *)rquery;
>>>> +       enum radeon_value_id ws_id;
>>>>
>>>>         switch(query->b.type) {
>>>>         case PIPE_QUERY_TIMESTAMP_DISJOINT:
>>>>         case PIPE_QUERY_GPU_FINISHED:
>>>>                 break;
>>>>         case R600_QUERY_DRAW_CALLS:
>>>>                 query->begin_result = rctx->num_draw_calls;
>>>>                 break;
>>>>         case R600_QUERY_SPILL_DRAW_CALLS:
>>>>                 query->begin_result = rctx->num_spill_draw_calls;
>>>> @@ -139,22 +145,28 @@ static bool r600_query_sw_begin(struct
>>>> r600_common_context *rctx,
>>>>         case R600_QUERY_NUM_MAPPED_BUFFERS:
>>>>                 query->begin_result = 0;
>>>>                 break;
>>>>         case R600_QUERY_BUFFER_WAIT_TIME:
>>>>         case R600_QUERY_NUM_GFX_IBS:
>>>>         case R600_QUERY_NUM_SDMA_IBS:
>>>>         case R600_QUERY_NUM_BYTES_MOVED:
>>>>         case R600_QUERY_NUM_EVICTIONS: {
>>>>                 enum radeon_value_id ws_id =
>>>> winsys_id_from_type(query->b.type);
>>>>                 query->begin_result = rctx->ws->query_value(rctx->ws,
>>>> ws_id);
>>>> +               query->begin_time = os_time_get_nano();
>>>>                 break;
>>>>         }
>>>> +       case R600_QUERY_CS_THREAD_BUSY:
>>>> +               ws_id = winsys_id_from_type(query->b.type);
>>>> +               query->begin_result = rctx->ws->query_value(rctx->ws,
>>>> ws_id);
>>>> +               query->begin_time = os_time_get_nano();
>>>> +               break;
>>>>         case R600_QUERY_GPU_LOAD:
>>>>         case R600_QUERY_GPU_SHADERS_BUSY:
>>>>         case R600_QUERY_GPU_TA_BUSY:
>>>>         case R600_QUERY_GPU_GDS_BUSY:
>>>>         case R600_QUERY_GPU_VGT_BUSY:
>>>>         case R600_QUERY_GPU_IA_BUSY:
>>>>         case R600_QUERY_GPU_SX_BUSY:
>>>>         case R600_QUERY_GPU_WD_BUSY:
>>>>         case R600_QUERY_GPU_BCI_BUSY:
>>>>         case R600_QUERY_GPU_SC_BUSY:
>>>> @@ -193,20 +205,21 @@ static bool r600_query_sw_begin(struct
>>>> r600_common_context *rctx,
>>>>                 unreachable("r600_query_sw_begin: bad query type");
>>>>         }
>>>>
>>>>         return true;
>>>>  }
>>>>
>>>>  static bool r600_query_sw_end(struct r600_common_context *rctx,
>>>>                               struct r600_query *rquery)
>>>>  {
>>>>         struct r600_query_sw *query = (struct r600_query_sw *)rquery;
>>>> +       enum radeon_value_id ws_id;
>>>>
>>>>         switch(query->b.type) {
>>>>         case PIPE_QUERY_TIMESTAMP_DISJOINT:
>>>>                 break;
>>>>         case PIPE_QUERY_GPU_FINISHED:
>>>>                 rctx->b.flush(&rctx->b, &query->fence,
>>>> PIPE_FLUSH_DEFERRED);
>>>>                 break;
>>>>         case R600_QUERY_DRAW_CALLS:
>>>>                 query->end_result = rctx->num_draw_calls;
>>>>                 break;
>>>> @@ -256,20 +269,25 @@ static bool r600_query_sw_end(struct
>>>> r600_common_context *rctx,
>>>>         case R600_QUERY_BUFFER_WAIT_TIME:
>>>>         case R600_QUERY_NUM_MAPPED_BUFFERS:
>>>>         case R600_QUERY_NUM_GFX_IBS:
>>>>         case R600_QUERY_NUM_SDMA_IBS:
>>>>         case R600_QUERY_NUM_BYTES_MOVED:
>>>>         case R600_QUERY_NUM_EVICTIONS: {
>>>>                 enum radeon_value_id ws_id =
>>>> winsys_id_from_type(query->b.type);
>>>>                 query->end_result = rctx->ws->query_value(rctx->ws,
>>>> ws_id);
>>>>                 break;
>>>>         }
>>>> +       case R600_QUERY_CS_THREAD_BUSY:
>>>> +               ws_id = winsys_id_from_type(query->b.type);
>>>> +               query->end_result = rctx->ws->query_value(rctx->ws,
>>>> ws_id);
>>>> +               query->end_time = os_time_get_nano();
>>>> +               break;
>>>>         case R600_QUERY_GPU_LOAD:
>>>>         case R600_QUERY_GPU_SHADERS_BUSY:
>>>>         case R600_QUERY_GPU_TA_BUSY:
>>>>         case R600_QUERY_GPU_GDS_BUSY:
>>>>         case R600_QUERY_GPU_VGT_BUSY:
>>>>         case R600_QUERY_GPU_IA_BUSY:
>>>>         case R600_QUERY_GPU_SX_BUSY:
>>>>         case R600_QUERY_GPU_WD_BUSY:
>>>>         case R600_QUERY_GPU_BCI_BUSY:
>>>>         case R600_QUERY_GPU_SC_BUSY:
>>>> @@ -330,20 +348,24 @@ static bool r600_query_sw_get_result(struct
>>>> r600_common_context *rctx,
>>>>                         (uint64_t)rctx->screen->info.clock_crystal_freq *
>>>> 1000;
>>>>                 result->timestamp_disjoint.disjoint = false;
>>>>                 return true;
>>>>         case PIPE_QUERY_GPU_FINISHED: {
>>>>                 struct pipe_screen *screen = rctx->b.screen;
>>>>                 result->b = screen->fence_finish(screen, &rctx->b,
>>>> query->fence,
>>>>                                                  wait ?
>>>> PIPE_TIMEOUT_INFINITE : 0);
>>>>                 return result->b;
>>>>         }
>>>>
>>>> +       case R600_QUERY_CS_THREAD_BUSY:
>>>> +               result->u64 = (query->end_result - query->begin_result) *
>>>> 100 /
>>>> +                             (query->end_time - query->begin_time);
>>>> +               return true;
>>>>         case R600_QUERY_GPIN_ASIC_ID:
>>>>                 result->u32 = 0;
>>>>                 return true;
>>>>         case R600_QUERY_GPIN_NUM_SIMD:
>>>>                 result->u32 = rctx->screen->info.num_good_compute_units;
>>>>                 return true;
>>>>         case R600_QUERY_GPIN_NUM_RB:
>>>>                 result->u32 = rctx->screen->info.num_render_backends;
>>>>                 return true;
>>>>         case R600_QUERY_GPIN_NUM_SPI:
>>>> @@ -1735,20 +1757,21 @@ static struct pipe_driver_query_info
>>>> r600_driver_query_list[] = {
>>>>         X("compute-calls",              COMPUTE_CALLS,          UINT64,
>>>> AVERAGE),
>>>>         X("spill-compute-calls",        SPILL_COMPUTE_CALLS,    UINT64,
>>>> AVERAGE),
>>>>         X("dma-calls",                  DMA_CALLS,              UINT64,
>>>> AVERAGE),
>>>>         X("cp-dma-calls",               CP_DMA_CALLS,           UINT64,
>>>> AVERAGE),
>>>>         X("num-vs-flushes",             NUM_VS_FLUSHES,         UINT64,
>>>> AVERAGE),
>>>>         X("num-ps-flushes",             NUM_PS_FLUSHES,         UINT64,
>>>> AVERAGE),
>>>>         X("num-cs-flushes",             NUM_CS_FLUSHES,         UINT64,
>>>> AVERAGE),
>>>>         X("num-fb-cache-flushes",       NUM_FB_CACHE_FLUSHES,   UINT64,
>>>> AVERAGE),
>>>>         X("num-L2-invalidates",         NUM_L2_INVALIDATES,     UINT64,
>>>> AVERAGE),
>>>>         X("num-L2-writebacks",          NUM_L2_WRITEBACKS,      UINT64,
>>>> AVERAGE),
>>>> +       X("CS-thread-busy",             CS_THREAD_BUSY,         UINT64,
>>>> AVERAGE),
>>>
>>>
>>> This should logically be PERCENTAGE instead of UINT64.
>>>
>>> I don't think the HUD handles that correctly today (according to p_defines,
>>> PERCENTAGE should be a float), but then we should either fix the HUD or nuke
>>> PERCENTAGE.
>>
>> PERCENTAGE is used by nouveau for AMD_performance_monitor. We don't
>> have any other queries using PERCENTAGE. Certainly the HUD doesn't
>> handle it correctly, but that doesn't bother me much.
>
> So does this have your Rb?

Yes, it's fine.

Cheers,
Nicolai

> Marek
>



More information about the mesa-dev mailing list