the kernel ftrace can better help analyze the kernel running status. add some trace events to support TTM.
add trace events list:
ttm:ttm_bo_add_mem_to_lru ttm:ttm_bo_del_from_lru ttm:ttm_bo_move_mem ttm:ttm_bo_wait ttm:ttm_bo_evict ttm:ttm_bo_swapout ttm:ttm_bo_device_init ttm:ttm_bo_device_release ttm:ttm_bo_init_reserved ttm:ttm_bo_validate ttm:ttm_bo_release ttm:ttm_bo_mmap ttm:ttm_bo_vm_fault ttm:ttm_bo_vm_access ttm:ttm_shrink ttm:ttm_mem_global_reserve ttm:ttm_mem_global_free
Kevin Wang (3): drm/ttm: add ttm bo trace event support drm/ttm: add ttm vm bo trace event support drm/ttm: add ttm mem trace event support
drivers/gpu/drm/ttm/ttm_bo.c | 23 ++ drivers/gpu/drm/ttm/ttm_bo_vm.c | 12 +- drivers/gpu/drm/ttm/ttm_memory.c | 7 + drivers/gpu/drm/ttm/ttm_module.c | 3 + drivers/gpu/drm/ttm/ttm_trace.h | 469 +++++++++++++++++++++++++++++++ 5 files changed, 512 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/ttm/ttm_trace.h
add ttm bo related trace event support
trace events: ttm:ttm_bo_add_mem_to_lru ttm:ttm_bo_del_from_lru ttm:ttm_bo_move_mem ttm:ttm_bo_wait ttm:ttm_bo_evict ttm:ttm_bo_swapout ttm:ttm_bo_device_init ttm:ttm_bo_device_release ttm:ttm_bo_init_reserved ttm:ttm_bo_validate ttm:ttm_bo_release
Signed-off-by: Kevin Wang kevin1.wang@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 23 +++ drivers/gpu/drm/ttm/ttm_module.c | 3 + drivers/gpu/drm/ttm/ttm_trace.h | 321 +++++++++++++++++++++++++++++++ 3 files changed, 347 insertions(+) create mode 100644 drivers/gpu/drm/ttm/ttm_trace.h
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index e3931e515906..074afd05aaa8 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -43,6 +43,8 @@ #include <linux/atomic.h> #include <linux/dma-resv.h>
+#include "ttm_trace.h" + static void ttm_bo_global_kobj_release(struct kobject *kobj);
/** @@ -143,6 +145,8 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo, if (mem->placement & TTM_PL_FLAG_NO_EVICT) return;
+ trace_ttm_bo_add_mem_to_lru(bo, mem); + man = ttm_manager_type(bdev, mem->mem_type); list_add_tail(&bo->lru, &man->lru[bo->priority]);
@@ -167,6 +171,8 @@ static void ttm_bo_del_from_lru(struct ttm_buffer_object *bo) notify = true; }
+ trace_ttm_bo_del_from_lru(bo, notify); + if (notify && bdev->driver->del_from_lru_notify) bdev->driver->del_from_lru_notify(bo); } @@ -299,6 +305,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, } }
+ trace_ttm_bo_move_mem(bo, mem, evict); + if (bdev->driver->move_notify) bdev->driver->move_notify(bo, evict, mem);
@@ -542,6 +550,8 @@ static void ttm_bo_release(struct kref *kref) size_t acc_size = bo->acc_size; int ret;
+ trace_ttm_bo_release(bo); + if (!bo->deleted) { ret = ttm_bo_individualize_resv(bo); if (ret) { @@ -668,6 +678,8 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, ttm_resource_free(bo, &evict_mem); goto out; } + + trace_ttm_bo_evict(bo, &evict_mem); bo->evicted = true; out: return ret; @@ -1151,6 +1163,8 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
dma_resv_assert_held(bo->base.resv);
+ trace_ttm_bo_validate(bo); + /* * Remove the backing store if no placement is given. */ @@ -1263,6 +1277,8 @@ int ttm_bo_init_reserved(struct ttm_bo_device *bdev, } atomic_inc(&ttm_bo_glob.bo_count);
+ trace_ttm_bo_init_reserved(bo, size); + /* * For ttm_bo_type_device buffers, allocate * address space from the device. @@ -1487,6 +1503,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev) if (!ret) ttm_bo_global_release();
+ trace_ttm_bo_device_release(bdev); + return ret; } EXPORT_SYMBOL(ttm_bo_device_release); @@ -1537,6 +1555,8 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&ttm_global_mutex);
+ trace_ttm_bo_device_init(bdev); + return 0; } EXPORT_SYMBOL(ttm_bo_device_init); @@ -1580,6 +1600,8 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
timeout = dma_resv_wait_timeout_rcu(bo->base.resv, true, interruptible, timeout); + + trace_ttm_bo_wait(bo, interruptible, timeout); if (timeout < 0) return timeout;
@@ -1670,6 +1692,7 @@ int ttm_bo_swapout(struct ttm_bo_global *glob, struct ttm_operation_ctx *ctx) * anyone tries to access a ttm page. */
+ trace_ttm_bo_swapout(bo, i); if (bo->bdev->driver->swap_notify) bo->bdev->driver->swap_notify(bo);
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index 6ff40c041d79..8b70e8aebecb 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -35,6 +35,9 @@ #include <drm/ttm/ttm_module.h> #include <drm/drm_sysfs.h>
+#define CREATE_TRACE_POINTS +#include "ttm_trace.h" + static DECLARE_WAIT_QUEUE_HEAD(exit_q); static atomic_t device_released;
diff --git a/drivers/gpu/drm/ttm/ttm_trace.h b/drivers/gpu/drm/ttm/ttm_trace.h new file mode 100644 index 000000000000..7c5e55725e8e --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_trace.h @@ -0,0 +1,321 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Kevin Wang + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ttm + +#if !defined(_TRACE_TTM_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TTM_H_ + +#include <drm/ttm/ttm_bo_api.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_module.h> +#include <drm/ttm/ttm_page_alloc.h> +#include <linux/types.h> +#include <linux/tracepoint.h> + +#define TTM_PLACEMENT_FLAGS_TRACE \ + { TTM_PL_FLAG_SYSTEM, "SYSTEM" },\ + { TTM_PL_FLAG_TT, "TT" },\ + { TTM_PL_FLAG_VRAM, "VRAM" },\ + { TTM_PL_FLAG_PRIV, "PRIV" },\ + { TTM_PL_FLAG_CACHED, "CACHED" },\ + { TTM_PL_FLAG_UNCACHED, "UNCACHED" },\ + { TTM_PL_FLAG_WC, "WC" },\ + { TTM_PL_FLAG_CONTIGUOUS, "CONTIGUOUS" },\ + { TTM_PL_FLAG_NO_EVICT, "NO_EVICT" },\ + { TTM_PL_FLAG_TOPDOWN, "TOPDOWN" } + +#define __show_ttm_pl_flags(flags, mask) \ + (flags & mask) ? __print_flags(flags, "|", \ + TTM_PLACEMENT_FLAGS_TRACE \ + ) : "none" + +#define show_ttm_pl_flags(flags) \ + __show_ttm_pl_flags(flags, ~0UL) + +#define show_ttm_pl_mem_flags(flags) \ + __show_ttm_pl_flags(flags, TTM_PL_MASK_MEM) + + +TRACE_EVENT(ttm_bo_add_mem_to_lru, + TP_PROTO(struct ttm_buffer_object *bo, struct ttm_resource *res), + TP_ARGS(bo, res), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + __field(unsigned long, size) + __field(uint32_t, placement) + __field(enum ttm_bo_type, bo_type) + __field(uint32_t, mem_type) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->bo_type = bo->type; + __entry->size = bo->mem.size; + __entry->placement = res->placement; + __entry->mem_type = res->mem_type; + ), + + TP_printk("bo:%p, size=%lx, bo_type=%d, mtype=%d, placement=%s(%x)", + __entry->bo, __entry->size, __entry->bo_type, __entry->mem_type, + show_ttm_pl_flags(__entry->placement), __entry->placement) +); + +TRACE_EVENT(ttm_bo_del_from_lru, + TP_PROTO(struct ttm_buffer_object *bo, bool notify), + TP_ARGS(bo, notify), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + __field(unsigned long, size) + __field(uint32_t, placement) + __field(enum ttm_bo_type, bo_type) + __field(uint32_t, mem_type) + __field(bool, notify) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->bo_type = bo->type; + __entry->size = bo->mem.size; + __entry->placement = bo->mem.placement; + __entry->mem_type = bo->mem.mem_type; + __entry->notify = notify; + ), + + TP_printk("bo:%p, size=%lx, bo_type=%d, mtype=%d, placement=%s(%x), notify=%s", + __entry->bo, __entry->size, __entry->bo_type, __entry->mem_type, + show_ttm_pl_flags(__entry->placement), __entry->placement, + __entry->notify ? "true" : "false") +); + +TRACE_EVENT(ttm_bo_move_mem, + TP_PROTO(struct ttm_buffer_object *bo, struct ttm_resource *res, bool evict), + TP_ARGS(bo, res, evict), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + __field(unsigned long, start) + __field(unsigned long, size) + __field(uint32_t, old_placement) + __field(uint32_t, new_placement) + __field(uint32_t, old_mem_type) + __field(uint32_t, new_mem_type) + __field(bool, evict) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->start = res->start; + __entry->size = res->size; + __entry->old_placement = bo->mem.placement; + __entry->new_placement = res->placement; + __entry->old_mem_type = bo->mem.mem_type; + __entry->new_mem_type = res->mem_type; + __entry->evict = evict; + ), + + TP_printk("bo:%p, start=%lx, size=%lx, mtype: %d -> %d, placement: %s(%x) -> %s(%x), evict=%s", + __entry->bo, __entry->start, __entry->size, + __entry->old_mem_type, __entry->new_mem_type, + show_ttm_pl_flags(__entry->old_placement), __entry->old_placement, + show_ttm_pl_flags(__entry->new_placement), __entry->new_placement, + __entry->evict ? "true" : "false") +); + +TRACE_EVENT(ttm_bo_wait, + TP_PROTO(struct ttm_buffer_object *bo, bool interruptible, long timeout), + TP_ARGS(bo, interruptible, timeout), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + __field(bool, interruptible) + __field(long, timeout) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->interruptible = interruptible; + __entry->timeout = timeout; + ), + + TP_printk("bo:%p, interruptible=%s, timeout=%ld", __entry->bo, + __entry->interruptible ? "true" : "false", __entry->timeout) +); + +TRACE_EVENT(ttm_bo_evict, + TP_PROTO(struct ttm_buffer_object *bo, struct ttm_resource *res), + TP_ARGS(bo, res), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + __field(enum ttm_bo_type, bo_type) + __field(unsigned long, size) + __field(uint32_t, old_placement) + __field(uint32_t, new_placement) + __field(uint32_t, old_mem_type) + __field(uint32_t, new_mem_type) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->bo_type = bo->type; + __entry->size = res->size; + __entry->old_placement = bo->mem.placement; + __entry->new_placement = res->placement; + __entry->old_mem_type = bo->mem.mem_type; + __entry->new_mem_type = res->mem_type; + ), + + TP_printk("bo:%p, size=%lx, bo_type=%d, mtype: %d -> %d, placement: %s(%x) -> %s(%x)", + __entry->bo, __entry->size, __entry->bo_type, + __entry->old_mem_type, __entry->new_mem_type, + show_ttm_pl_flags(__entry->old_placement), __entry->old_placement, + show_ttm_pl_flags(__entry->new_placement), __entry->new_placement) + +); + +TRACE_EVENT(ttm_bo_swapout, + TP_PROTO(struct ttm_buffer_object *bo, int priority), + TP_ARGS(bo, priority), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + __field(enum ttm_bo_type, bo_type) + __field(unsigned long, size) + __field(uint32_t, placement) + __field(uint32_t, mem_type) + __field(int, priority) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->priority = priority; + __entry->bo_type = bo->type; + __entry->size = bo->mem.size; + __entry->placement = bo->mem.placement; + __entry->mem_type = bo->mem.mem_type; + ), + + TP_printk("bo:%p, size=%lx, bo_type=%d, mtype=%d, placement=%s(%x), prio=%d", + __entry->bo, __entry->size, __entry->bo_type, __entry->mem_type, + show_ttm_pl_flags(__entry->placement), __entry->placement, + __entry->priority) +); + +TRACE_EVENT(ttm_bo_device_init, + TP_PROTO(struct ttm_bo_device *bdev), + TP_ARGS(bdev), + TP_STRUCT__entry( + __field(struct ttm_bo_device *, bdev) + __field(struct ttm_bo_driver *, driver) + __field(bool, need_dma32) + ), + + TP_fast_assign( + __entry->bdev = bdev; + __entry->driver = bdev->driver; + __entry->need_dma32 = bdev->need_dma32; + ), + + TP_printk("bdev:%p, driver=%p, need_dma32=%s", __entry->bdev, __entry->driver, + __entry->need_dma32 ? "true" : "false") +); + +TRACE_EVENT(ttm_bo_device_release, + TP_PROTO(struct ttm_bo_device *bdev), + TP_ARGS(bdev), + TP_STRUCT__entry( + __field(struct ttm_bo_device *, bdev) + ), + + TP_fast_assign( + __entry->bdev = bdev; + ), + + TP_printk("bdev:%p", __entry->bdev) +); + +TRACE_EVENT(ttm_bo_init_reserved, + TP_PROTO(struct ttm_buffer_object *bo, unsigned long size), + TP_ARGS(bo, size), + TP_STRUCT__entry( + __field(struct ttm_bo_device *, bdev) + __field(struct ttm_buffer_object *, bo) + __field(enum ttm_bo_type, bo_type) + __field(uint32_t, page_alignment) + __field(uint32_t, placement) + __field(unsigned long, size) + __field(size_t, acc_size) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->bdev = bo->bdev; + __entry->bo_type = bo->type; + __entry->placement = bo->mem.placement; + __entry->page_alignment = bo->mem.page_alignment; + __entry->size = size; + __entry->acc_size = bo->acc_size; + ), + + TP_printk("bo:%p, bdev=%p, bo_type=%d, %s(%x) size=%lx, accsize=%lx, alignment=%d", + __entry->bo, __entry->bdev, __entry->bo_type, + show_ttm_pl_flags(__entry->placement), __entry->placement, + __entry->size, __entry->acc_size, __entry->page_alignment) +); + +TRACE_EVENT(ttm_bo_validate, + TP_PROTO(struct ttm_buffer_object *bo), + TP_ARGS(bo), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + ), + + TP_fast_assign( + __entry->bo = bo; + ), + + TP_printk("bo:%p", __entry->bo) +); + +TRACE_EVENT(ttm_bo_release, + TP_PROTO(struct ttm_buffer_object *bo), + TP_ARGS(bo), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + ), + + TP_fast_assign( + __entry->bo = bo; + ), + + TP_printk("bo:%p", __entry->bo) +); + +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/ttm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE ttm_trace + +/* This part must be outside protection */ +#include <trace/define_trace.h>
add ttm bo VM related trace event support
trace events: ttm:ttm_bo_mmap ttm:ttm_bo_vm_fault ttm:ttm_bo_vm_access
Signed-off-by: Kevin Wang kevin1.wang@amd.com --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 12 ++++- drivers/gpu/drm/ttm/ttm_trace.h | 78 +++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 01693e8f24b7..aece2024c1fd 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -42,6 +42,8 @@ #include <linux/uaccess.h> #include <linux/mem_encrypt.h>
+#include "ttm_trace.h" + static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_fault *vmf) { @@ -429,15 +431,17 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
ret = ttm_bo_vm_reserve(bo, vmf); if (ret) - return ret; + goto out;
prot = vma->vm_page_prot; ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1); if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) - return ret; + goto out;
dma_resv_unlock(bo->base.resv);
+out: + trace_ttm_bo_vm_fault(bo, vmf, ret); return ret; } EXPORT_SYMBOL(ttm_bo_vm_fault); @@ -516,6 +520,8 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, if (ret) return ret;
+ trace_ttm_bo_vm_access(bo, !!write, offset, len); + switch (bo->mem.mem_type) { case TTM_PL_SYSTEM: if (unlikely(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { @@ -618,6 +624,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, goto out_unref;
ttm_bo_mmap_vma_setup(bo, vma); + trace_ttm_bo_mmap(bo, vma); return 0; out_unref: ttm_bo_put(bo); @@ -629,6 +636,7 @@ int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo) { ttm_bo_get(bo); ttm_bo_mmap_vma_setup(bo, vma); + trace_ttm_bo_mmap(bo, vma); return 0; } EXPORT_SYMBOL(ttm_bo_mmap_obj); diff --git a/drivers/gpu/drm/ttm/ttm_trace.h b/drivers/gpu/drm/ttm/ttm_trace.h index 7c5e55725e8e..9f7cc34b243b 100644 --- a/drivers/gpu/drm/ttm/ttm_trace.h +++ b/drivers/gpu/drm/ttm/ttm_trace.h @@ -34,6 +34,7 @@ #include <drm/ttm/ttm_page_alloc.h> #include <linux/types.h> #include <linux/tracepoint.h> +#include <trace/events/mmflags.h>
#define TTM_PLACEMENT_FLAGS_TRACE \ { TTM_PL_FLAG_SYSTEM, "SYSTEM" },\ @@ -310,6 +311,83 @@ TRACE_EVENT(ttm_bo_release, TP_printk("bo:%p", __entry->bo) );
+TRACE_EVENT(ttm_bo_mmap, + TP_PROTO(struct ttm_buffer_object *bo, struct vm_area_struct *vma), + TP_ARGS(bo, vma), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + __field(unsigned long, vm_start) + __field(unsigned long, vm_end) + __field(unsigned long, vm_pgoff) + __field(unsigned long, vm_flags) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->vm_start = vma->vm_start; + __entry->vm_end = vma->vm_end; + __entry->vm_pgoff = vma->vm_pgoff; + __entry->vm_flags = vma->vm_flags; + ), + + TP_printk("bo:%p, vm_start=%lx, vm_end=%lx, vm_pgoff=%lx, vm_flags=%s", + __entry->bo, + __entry->vm_start, __entry->vm_end, __entry->vm_pgoff, + show_vma_flags(__entry->vm_flags)) +); + +TRACE_EVENT(ttm_bo_vm_fault, + TP_PROTO(struct ttm_buffer_object *bo, struct vm_fault *vmf, int result), + TP_ARGS(bo, vmf, result), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + __field(struct vm_area_struct *, vma) + __field(unsigned long, fault_address) + __field(unsigned long, fault_pgoff) + __field(int, result) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->vma = vmf->vma; + __entry->fault_address = vmf->address; + __entry->fault_pgoff = vmf->pgoff; + __entry->flags = vmf->flags; + __entry->result = result; + ), + + TP_printk("bo:%p, vma=%p, fault_address=%lx, fault_pgoff=%lx, fault_flags=%s %s", + __entry->bo, __entry->vma, + __entry->fault_address, __entry->fault_pgoff, + __entry->flags ? __print_flags(__entry->flags, "|", FAULT_FLAG_TRACE) : "none", + __print_flags(__entry->result, "|", VM_FAULT_RESULT_TRACE)) +); + +TRACE_EVENT(ttm_bo_vm_access, + TP_PROTO(struct ttm_buffer_object *bo, bool write, unsigned long offset, unsigned long len), + TP_ARGS(bo, write, offset, len), + TP_STRUCT__entry( + __field(struct ttm_buffer_object *, bo) + __field(bool, write) + __field(unsigned long, offset) + __field(unsigned long, len) + __field(uint32_t, mem_type) + ), + + TP_fast_assign( + __entry->bo = bo; + __entry->write = write; + __entry->offset = offset; + __entry->len = len; + __entry->mem_type = bo->mem.mem_type; + ), + + TP_printk("bo:%p, %s offset=%lx, len=%lx, mtype=%d", + __entry->bo, __entry->write ? "write" : "read", + __entry->offset, __entry->len, __entry->mem_type) +); + #endif
#undef TRACE_INCLUDE_PATH
add ttm memory related trace event support
trace events: ttm:ttm_shrink ttm:ttm_mem_global_reserve ttm:ttm_mem_global_free
Signed-off-by: Kevin Wang kevin1.wang@amd.com --- drivers/gpu/drm/ttm/ttm_memory.c | 7 ++++ drivers/gpu/drm/ttm/ttm_trace.h | 70 ++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+)
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c index acd63b70d814..27470b1f1f13 100644 --- a/drivers/gpu/drm/ttm/ttm_memory.c +++ b/drivers/gpu/drm/ttm/ttm_memory.c @@ -39,6 +39,8 @@ #include <linux/slab.h> #include <linux/swap.h>
+#include "ttm_trace.h" + #define TTM_MEMORY_ALLOC_RETRIES 4
struct ttm_mem_global ttm_mem_glob; @@ -272,6 +274,7 @@ static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq, int ret;
spin_lock(&glob->lock); + trace_ttm_shrink(from_wq, extra, ctx);
while (ttm_zones_above_swap_target(glob, from_wq, extra)) { spin_unlock(&glob->lock); @@ -518,6 +521,8 @@ static void ttm_mem_global_free_zone(struct ttm_mem_global *glob, zone->used_mem -= amount; } spin_unlock(&glob->lock); + trace_ttm_mem_global_free(single_zone->name, amount, + single_zone->used_mem, single_zone->max_mem); }
void ttm_mem_global_free(struct ttm_mem_global *glob, @@ -590,6 +595,8 @@ static int ttm_mem_global_reserve(struct ttm_mem_global *glob, ret = 0; out_unlock: spin_unlock(&glob->lock); + trace_ttm_mem_global_reserve(single_zone->name, amount, + single_zone->used_mem, single_zone->max_mem); ttm_check_swapping(glob);
return ret; diff --git a/drivers/gpu/drm/ttm/ttm_trace.h b/drivers/gpu/drm/ttm/ttm_trace.h index 9f7cc34b243b..e25b8a2c423c 100644 --- a/drivers/gpu/drm/ttm/ttm_trace.h +++ b/drivers/gpu/drm/ttm/ttm_trace.h @@ -388,6 +388,76 @@ TRACE_EVENT(ttm_bo_vm_access, __entry->offset, __entry->len, __entry->mem_type) );
+TRACE_EVENT(ttm_shrink, + TP_PROTO(bool from_wq, uint64_t extra, struct ttm_operation_ctx *ctx), + TP_ARGS(from_wq, extra, ctx), + TP_STRUCT__entry( + __field(bool, from_wq) + __field(bool, interruptible) + __field(bool, wait_gpu) + __field(uint64_t, extra) + ), + + TP_fast_assign( + __entry->from_wq = from_wq; + __entry->extra = extra; + __entry->interruptible= ctx->interruptible; + __entry->wait_gpu = !ctx->no_wait_gpu; + ), + + TP_printk("ttm_shrink: from_wq=%s, interruptible=%s, wait_gpu=%s, extra=0x%llx(%lld)", + __entry->from_wq ? "true" : "false", + __entry->interruptible ? "true" : "false", + __entry->wait_gpu? "true" : "false", + __entry->extra, __entry->extra) +); + +TRACE_EVENT(ttm_mem_global_reserve, + TP_PROTO(const char *zone_name, uint64_t amount, + uint64_t used_mem, uint64_t max_mem), + TP_ARGS(zone_name, amount, used_mem, max_mem), + TP_STRUCT__entry( + __string(zone, zone_name) + __field(uint64_t, amount) + __field(uint64_t, used_mem) + __field(uint64_t, max_mem) + ), + + TP_fast_assign( + __assign_str(zone, zone_name); + __entry->amount = amount; + __entry->used_mem = used_mem; + __entry->max_mem = max_mem; + ), + + TP_printk("zone:%s, amount=%lld, used=%lld/%lld", + __get_str(zone), __entry->amount, + __entry->used_mem, __entry->max_mem) +); + +TRACE_EVENT(ttm_mem_global_free, + TP_PROTO(const char *zone_name, uint64_t amount, + uint64_t used_mem, uint64_t max_mem), + TP_ARGS(zone_name, amount, used_mem, max_mem), + TP_STRUCT__entry( + __string(zone, zone_name) + __field(uint64_t, amount) + __field(uint64_t, used_mem) + __field(uint64_t, max_mem) + ), + + TP_fast_assign( + __assign_str(zone, zone_name); + __entry->amount = amount; + __entry->used_mem = used_mem; + __entry->max_mem = max_mem; + ), + + TP_printk("zone:%s, amount=%lld, used=%lld/%lld", + __get_str(zone), __entry->amount, + __entry->used_mem, __entry->max_mem) +); + #endif
#undef TRACE_INCLUDE_PATH
Not a bad start, but that needs quite some more work.
First of all please rebase on top of current drm-misc-next, a whole bunch of the stuff you want to trace here was already removed or is about to be removed.
Then concentrate on the necessary trace points, for example ttm:ttm_bo_device_init/release are overkill.
On the other hand I'm missing important events like pool shrink, tt swapout/swapin (ttm_bo_swapout can be dropped) and especially BO init.
I would separate the patches into one for each trace point. Not a must have, but it could make it easier to review and we can discuss for each one separately.
Thanks, Christian.
Am 28.01.21 um 08:13 schrieb Kevin Wang:
the kernel ftrace can better help analyze the kernel running status. add some trace events to support TTM.
add trace events list:
ttm:ttm_bo_add_mem_to_lru ttm:ttm_bo_del_from_lru ttm:ttm_bo_move_mem ttm:ttm_bo_wait ttm:ttm_bo_evict ttm:ttm_bo_swapout ttm:ttm_bo_device_init ttm:ttm_bo_device_release ttm:ttm_bo_init_reserved ttm:ttm_bo_validate ttm:ttm_bo_release ttm:ttm_bo_mmap ttm:ttm_bo_vm_fault ttm:ttm_bo_vm_access ttm:ttm_shrink ttm:ttm_mem_global_reserve ttm:ttm_mem_global_free
Kevin Wang (3): drm/ttm: add ttm bo trace event support drm/ttm: add ttm vm bo trace event support drm/ttm: add ttm mem trace event support
drivers/gpu/drm/ttm/ttm_bo.c | 23 ++ drivers/gpu/drm/ttm/ttm_bo_vm.c | 12 +- drivers/gpu/drm/ttm/ttm_memory.c | 7 + drivers/gpu/drm/ttm/ttm_module.c | 3 + drivers/gpu/drm/ttm/ttm_trace.h | 469 +++++++++++++++++++++++++++++++ 5 files changed, 512 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/ttm/ttm_trace.h
[AMD Official Use Only - Internal Distribution Only]
thanks comments.
I will refine these patches, then send out review later.
Best Regards, Kevin ________________________________ From: Christian König ckoenig.leichtzumerken@gmail.com Sent: Thursday, January 28, 2021 3:46 PM To: Wang, Kevin(Yang) Kevin1.Wang@amd.com; dri-devel@lists.freedesktop.org dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org amd-gfx@lists.freedesktop.org Cc: Huang, Ray Ray.Huang@amd.com; Koenig, Christian Christian.Koenig@amd.com Subject: Re: [RFC PATCH 0/3] add ttm trace event support
Not a bad start, but that needs quite some more work.
First of all please rebase on top of current drm-misc-next, a whole bunch of the stuff you want to trace here was already removed or is about to be removed.
Then concentrate on the necessary trace points, for example ttm:ttm_bo_device_init/release are overkill.
On the other hand I'm missing important events like pool shrink, tt swapout/swapin (ttm_bo_swapout can be dropped) and especially BO init.
I would separate the patches into one for each trace point. Not a must have, but it could make it easier to review and we can discuss for each one separately.
Thanks, Christian.
Am 28.01.21 um 08:13 schrieb Kevin Wang:
the kernel ftrace can better help analyze the kernel running status. add some trace events to support TTM.
add trace events list:
ttm:ttm_bo_add_mem_to_lru ttm:ttm_bo_del_from_lru ttm:ttm_bo_move_mem ttm:ttm_bo_wait ttm:ttm_bo_evict ttm:ttm_bo_swapout ttm:ttm_bo_device_init ttm:ttm_bo_device_release ttm:ttm_bo_init_reserved ttm:ttm_bo_validate ttm:ttm_bo_release ttm:ttm_bo_mmap ttm:ttm_bo_vm_fault ttm:ttm_bo_vm_access ttm:ttm_shrink ttm:ttm_mem_global_reserve ttm:ttm_mem_global_free
Kevin Wang (3): drm/ttm: add ttm bo trace event support drm/ttm: add ttm vm bo trace event support drm/ttm: add ttm mem trace event support
drivers/gpu/drm/ttm/ttm_bo.c | 23 ++ drivers/gpu/drm/ttm/ttm_bo_vm.c | 12 +- drivers/gpu/drm/ttm/ttm_memory.c | 7 + drivers/gpu/drm/ttm/ttm_module.c | 3 + drivers/gpu/drm/ttm/ttm_trace.h | 469 +++++++++++++++++++++++++++++++ 5 files changed, 512 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/ttm/ttm_trace.h
dri-devel@lists.freedesktop.org