[Mesa-dev] [PATCH 3/4] gallium/radeon: add a HUD query for monitoring the CS thread activity
Marek Olšák
maraeo at gmail.com
Tue Feb 14 18:10:40 UTC 2017
On Mon, Feb 13, 2017 at 4:57 PM, Marek Olšák <maraeo at gmail.com> wrote:
> On Mon, Feb 13, 2017 at 4:37 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>> On 11.02.2017 20:58, Marek Olšák wrote:
>>>
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> ---
>>> src/gallium/drivers/radeon/r600_query.c | 25
>>> ++++++++++++++++++++++-
>>> src/gallium/drivers/radeon/r600_query.h | 1 +
>>> src/gallium/drivers/radeon/radeon_winsys.h | 1 +
>>> src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 2 ++
>>> src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 ++
>>> 5 files changed, 30 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/drivers/radeon/r600_query.c
>>> b/src/gallium/drivers/radeon/r600_query.c
>>> index 05741d3..8009416 100644
>>> --- a/src/gallium/drivers/radeon/r600_query.c
>>> +++ b/src/gallium/drivers/radeon/r600_query.c
>>> @@ -19,37 +19,41 @@
>>> * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
>>> * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>>> * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
>>> THE
>>> * USE OR OTHER DEALINGS IN THE SOFTWARE.
>>> */
>>>
>>> #include "r600_query.h"
>>> #include "r600_cs.h"
>>> #include "util/u_memory.h"
>>> #include "util/u_upload_mgr.h"
>>> -
>>> +#include "os/os_time.h"
>>> #include "tgsi/tgsi_text.h"
>>>
>>> struct r600_hw_query_params {
>>> unsigned start_offset;
>>> unsigned end_offset;
>>> unsigned fence_offset;
>>> unsigned pair_stride;
>>> unsigned pair_count;
>>> };
>>>
>>> /* Queries without buffer handling or suspend/resume. */
>>> struct r600_query_sw {
>>> struct r600_query b;
>>>
>>> uint64_t begin_result;
>>> uint64_t end_result;
>>> +
>>> + uint64_t begin_time;
>>> + uint64_t end_time;
>>> +
>>> /* Fence for GPU_FINISHED. */
>>> struct pipe_fence_handle *fence;
>>> };
>>>
>>> static void r600_query_sw_destroy(struct r600_common_context *rctx,
>>> struct r600_query *rquery)
>>> {
>>> struct pipe_screen *screen = rctx->b.screen;
>>> struct r600_query_sw *query = (struct r600_query_sw *)rquery;
>>>
>>> @@ -69,28 +73,30 @@ static enum radeon_value_id
>>> winsys_id_from_type(unsigned type)
>>> case R600_QUERY_NUM_GFX_IBS: return RADEON_NUM_GFX_IBS;
>>> case R600_QUERY_NUM_SDMA_IBS: return RADEON_NUM_SDMA_IBS;
>>> case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
>>> case R600_QUERY_NUM_EVICTIONS: return RADEON_NUM_EVICTIONS;
>>> case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
>>> case R600_QUERY_VRAM_VIS_USAGE: return RADEON_VRAM_VIS_USAGE;
>>> case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
>>> case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
>>> case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
>>> case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
>>> + case R600_QUERY_CS_THREAD_BUSY: return RADEON_CS_THREAD_TIME;
>>> default: unreachable("query type does not correspond to winsys
>>> id");
>>> }
>>> }
>>>
>>> static bool r600_query_sw_begin(struct r600_common_context *rctx,
>>> struct r600_query *rquery)
>>> {
>>> struct r600_query_sw *query = (struct r600_query_sw *)rquery;
>>> + enum radeon_value_id ws_id;
>>>
>>> switch(query->b.type) {
>>> case PIPE_QUERY_TIMESTAMP_DISJOINT:
>>> case PIPE_QUERY_GPU_FINISHED:
>>> break;
>>> case R600_QUERY_DRAW_CALLS:
>>> query->begin_result = rctx->num_draw_calls;
>>> break;
>>> case R600_QUERY_SPILL_DRAW_CALLS:
>>> query->begin_result = rctx->num_spill_draw_calls;
>>> @@ -139,22 +145,28 @@ static bool r600_query_sw_begin(struct
>>> r600_common_context *rctx,
>>> case R600_QUERY_NUM_MAPPED_BUFFERS:
>>> query->begin_result = 0;
>>> break;
>>> case R600_QUERY_BUFFER_WAIT_TIME:
>>> case R600_QUERY_NUM_GFX_IBS:
>>> case R600_QUERY_NUM_SDMA_IBS:
>>> case R600_QUERY_NUM_BYTES_MOVED:
>>> case R600_QUERY_NUM_EVICTIONS: {
>>> enum radeon_value_id ws_id =
>>> winsys_id_from_type(query->b.type);
>>> query->begin_result = rctx->ws->query_value(rctx->ws,
>>> ws_id);
>>> + query->begin_time = os_time_get_nano();
>>> break;
>>> }
>>> + case R600_QUERY_CS_THREAD_BUSY:
>>> + ws_id = winsys_id_from_type(query->b.type);
>>> + query->begin_result = rctx->ws->query_value(rctx->ws,
>>> ws_id);
>>> + query->begin_time = os_time_get_nano();
>>> + break;
>>> case R600_QUERY_GPU_LOAD:
>>> case R600_QUERY_GPU_SHADERS_BUSY:
>>> case R600_QUERY_GPU_TA_BUSY:
>>> case R600_QUERY_GPU_GDS_BUSY:
>>> case R600_QUERY_GPU_VGT_BUSY:
>>> case R600_QUERY_GPU_IA_BUSY:
>>> case R600_QUERY_GPU_SX_BUSY:
>>> case R600_QUERY_GPU_WD_BUSY:
>>> case R600_QUERY_GPU_BCI_BUSY:
>>> case R600_QUERY_GPU_SC_BUSY:
>>> @@ -193,20 +205,21 @@ static bool r600_query_sw_begin(struct
>>> r600_common_context *rctx,
>>> unreachable("r600_query_sw_begin: bad query type");
>>> }
>>>
>>> return true;
>>> }
>>>
>>> static bool r600_query_sw_end(struct r600_common_context *rctx,
>>> struct r600_query *rquery)
>>> {
>>> struct r600_query_sw *query = (struct r600_query_sw *)rquery;
>>> + enum radeon_value_id ws_id;
>>>
>>> switch(query->b.type) {
>>> case PIPE_QUERY_TIMESTAMP_DISJOINT:
>>> break;
>>> case PIPE_QUERY_GPU_FINISHED:
>>> rctx->b.flush(&rctx->b, &query->fence,
>>> PIPE_FLUSH_DEFERRED);
>>> break;
>>> case R600_QUERY_DRAW_CALLS:
>>> query->end_result = rctx->num_draw_calls;
>>> break;
>>> @@ -256,20 +269,25 @@ static bool r600_query_sw_end(struct
>>> r600_common_context *rctx,
>>> case R600_QUERY_BUFFER_WAIT_TIME:
>>> case R600_QUERY_NUM_MAPPED_BUFFERS:
>>> case R600_QUERY_NUM_GFX_IBS:
>>> case R600_QUERY_NUM_SDMA_IBS:
>>> case R600_QUERY_NUM_BYTES_MOVED:
>>> case R600_QUERY_NUM_EVICTIONS: {
>>> enum radeon_value_id ws_id =
>>> winsys_id_from_type(query->b.type);
>>> query->end_result = rctx->ws->query_value(rctx->ws,
>>> ws_id);
>>> break;
>>> }
>>> + case R600_QUERY_CS_THREAD_BUSY:
>>> + ws_id = winsys_id_from_type(query->b.type);
>>> + query->end_result = rctx->ws->query_value(rctx->ws,
>>> ws_id);
>>> + query->end_time = os_time_get_nano();
>>> + break;
>>> case R600_QUERY_GPU_LOAD:
>>> case R600_QUERY_GPU_SHADERS_BUSY:
>>> case R600_QUERY_GPU_TA_BUSY:
>>> case R600_QUERY_GPU_GDS_BUSY:
>>> case R600_QUERY_GPU_VGT_BUSY:
>>> case R600_QUERY_GPU_IA_BUSY:
>>> case R600_QUERY_GPU_SX_BUSY:
>>> case R600_QUERY_GPU_WD_BUSY:
>>> case R600_QUERY_GPU_BCI_BUSY:
>>> case R600_QUERY_GPU_SC_BUSY:
>>> @@ -330,20 +348,24 @@ static bool r600_query_sw_get_result(struct
>>> r600_common_context *rctx,
>>> (uint64_t)rctx->screen->info.clock_crystal_freq *
>>> 1000;
>>> result->timestamp_disjoint.disjoint = false;
>>> return true;
>>> case PIPE_QUERY_GPU_FINISHED: {
>>> struct pipe_screen *screen = rctx->b.screen;
>>> result->b = screen->fence_finish(screen, &rctx->b,
>>> query->fence,
>>> wait ?
>>> PIPE_TIMEOUT_INFINITE : 0);
>>> return result->b;
>>> }
>>>
>>> + case R600_QUERY_CS_THREAD_BUSY:
>>> + result->u64 = (query->end_result - query->begin_result) *
>>> 100 /
>>> + (query->end_time - query->begin_time);
>>> + return true;
>>> case R600_QUERY_GPIN_ASIC_ID:
>>> result->u32 = 0;
>>> return true;
>>> case R600_QUERY_GPIN_NUM_SIMD:
>>> result->u32 = rctx->screen->info.num_good_compute_units;
>>> return true;
>>> case R600_QUERY_GPIN_NUM_RB:
>>> result->u32 = rctx->screen->info.num_render_backends;
>>> return true;
>>> case R600_QUERY_GPIN_NUM_SPI:
>>> @@ -1735,20 +1757,21 @@ static struct pipe_driver_query_info
>>> r600_driver_query_list[] = {
>>> X("compute-calls", COMPUTE_CALLS, UINT64,
>>> AVERAGE),
>>> X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64,
>>> AVERAGE),
>>> X("dma-calls", DMA_CALLS, UINT64,
>>> AVERAGE),
>>> X("cp-dma-calls", CP_DMA_CALLS, UINT64,
>>> AVERAGE),
>>> X("num-vs-flushes", NUM_VS_FLUSHES, UINT64,
>>> AVERAGE),
>>> X("num-ps-flushes", NUM_PS_FLUSHES, UINT64,
>>> AVERAGE),
>>> X("num-cs-flushes", NUM_CS_FLUSHES, UINT64,
>>> AVERAGE),
>>> X("num-fb-cache-flushes", NUM_FB_CACHE_FLUSHES, UINT64,
>>> AVERAGE),
>>> X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64,
>>> AVERAGE),
>>> X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64,
>>> AVERAGE),
>>> + X("CS-thread-busy", CS_THREAD_BUSY, UINT64,
>>> AVERAGE),
>>
>>
>> This should logically be PERCENTAGE instead of UINT64.
>>
>> I don't think the HUD handles that correctly today (according to p_defines,
>> PERCENTAGE should be a float), but then we should either fix the HUD or nuke
>> PERCENTAGE.
>
> PERCENTAGE is used by nouveau for AMD_performance_monitor. We don't
> have any other queries using PERCENTAGE. Certainly the HUD doesn't
> handle it correctly, but that doesn't bother me much.
So does this have your Rb?
Marek
More information about the mesa-dev
mailing list