[PATCH] drm/i915/gvt: rewrite the trace gvt:gvt_command using trace style approach
Zhenyu Wang
zhenyuw at linux.intel.com
Wed May 3 08:37:19 UTC 2017
On 2017.05.03 09:20:10 +0800, changbin.du at intel.com wrote:
> From: Changbin Du <changbin.du at intel.com>
>
> The gvt:gvt_command trace involve unnecessary overhead even this trace is
> not enabled. We need improve it.
>
> The kernel trace infrastructure provide a full api to define a trace event.
> We should leverage them if possible. And one important thing is that a trace
> point should store raw data but not format string.
>
> This patch include two part work:
> 1) Refactor the gvt_command trace definition, including:
> o only store raw trace data.
> o use __dynamic_array() to declare a variable size buffer.
> o use __print_array() to format raw cmd data.
> o rename vm_id as vgpu_id.
>
> 2) Improve the trace invoking, including:
> o remove the cycles calculation for handler. We can get this data
> by any perf tool.
> o do not make a backup for raw cmd data which just doesn't make sense.
>
> With this patch, this trace has no overhead if it is not enabled. And we are
> trace style now.
>
> The final output example:
> gvt workload 0-211 [000] ...1 120.555964: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e880, raw cmd {0x4000000}
> gvt workload 0-211 [000] ...1 120.556014: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e884, raw cmd {0x7a000004,0x1004000,0xe1511018,0x0,0x7d,0x0}
> gvt workload 0-211 [000] ...1 120.556062: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e89c, raw cmd {0x7a000004,0x140000,0x0,0x0,0x0,0x0}
> gvt workload 0-211 [000] ...1 120.556110: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e8b4, raw cmd {0x10400002,0xe1511018,0x0,0x7d}
>
Applied, thanks for clean this up!
> Signed-off-by: Changbin Du <changbin.du at intel.com>
> ---
> drivers/gpu/drm/i915/gvt/cmd_parser.c | 50 ++--------------------
> drivers/gpu/drm/i915/gvt/trace.h | 78 +++++++++++++----------------------
> 2 files changed, 32 insertions(+), 96 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> index 41b2c3a..5634eb1 100644
> --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> @@ -2414,53 +2414,13 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
> hash_add(gvt->cmd_table, &e->hlist, e->info->opcode);
> }
>
> -#define GVT_MAX_CMD_LENGTH 20 /* In Dword */
> -
> -static void trace_cs_command(struct parser_exec_state *s,
> - cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler)
> -{
> - /* This buffer is used by ftrace to store all commands copied from
> - * guest gma space. Sometimes commands can cross pages, this should
> - * not be handled in ftrace logic. So this is just used as a
> - * 'bounce buffer'
> - */
> - u32 cmd_trace_buf[GVT_MAX_CMD_LENGTH];
> - int i;
> - u32 cmd_len = cmd_length(s);
> - /* The chosen value of GVT_MAX_CMD_LENGTH are just based on
> - * following two considerations:
> - * 1) From observation, most common ring commands is not that long.
> - * But there are execeptions. So it indeed makes sence to observe
> - * longer commands.
> - * 2) From the performance and debugging point of view, dumping all
> - * contents of very commands is not necessary.
> - * We mgith shrink GVT_MAX_CMD_LENGTH or remove this trace event in
> - * future for performance considerations.
> - */
> - if (unlikely(cmd_len > GVT_MAX_CMD_LENGTH)) {
> - gvt_dbg_cmd("cmd length exceed tracing limitation!\n");
> - cmd_len = GVT_MAX_CMD_LENGTH;
> - }
> -
> - for (i = 0; i < cmd_len; i++)
> - cmd_trace_buf[i] = cmd_val(s, i);
> -
> - trace_gvt_command(s->vgpu->id, s->ring_id, s->ip_gma, cmd_trace_buf,
> - cmd_len, s->buf_type == RING_BUFFER_INSTRUCTION,
> - cost_pre_cmd_handler, cost_cmd_handler);
> -}
> -
> /* call the cmd handler, and advance ip */
> static int cmd_parser_exec(struct parser_exec_state *s)
> {
> + struct intel_vgpu *vgpu = s->vgpu;
> struct cmd_info *info;
> u32 cmd;
> int ret = 0;
> - cycles_t t0, t1, t2;
> - struct parser_exec_state s_before_advance_custom;
> - struct intel_vgpu *vgpu = s->vgpu;
> -
> - t0 = get_cycles();
>
> cmd = cmd_val(s, 0);
>
> @@ -2475,9 +2435,8 @@ static int cmd_parser_exec(struct parser_exec_state *s)
>
> s->info = info;
>
> - t1 = get_cycles();
> -
> - s_before_advance_custom = *s;
> + trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va,
> + cmd_length(s), s->buf_type);
>
> if (info->handler) {
> ret = info->handler(s);
> @@ -2486,9 +2445,6 @@ static int cmd_parser_exec(struct parser_exec_state *s)
> return ret;
> }
> }
> - t2 = get_cycles();
> -
> - trace_cs_command(&s_before_advance_custom, t1 - t0, t2 - t1);
>
> if (!(info->flag & F_IP_ADVANCE_CUSTOM)) {
> ret = cmd_advance_default(s);
> diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
> index 53a2d10..9171291 100644
> --- a/drivers/gpu/drm/i915/gvt/trace.h
> +++ b/drivers/gpu/drm/i915/gvt/trace.h
> @@ -224,57 +224,37 @@ TRACE_EVENT(oos_sync,
> TP_printk("%s", __entry->buf)
> );
>
> -#define MAX_CMD_STR_LEN 256
> TRACE_EVENT(gvt_command,
> - TP_PROTO(u8 vm_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, bool ring_buffer_cmd, cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler),
> -
> - TP_ARGS(vm_id, ring_id, ip_gma, cmd_va, cmd_len, ring_buffer_cmd, cost_pre_cmd_handler, cost_cmd_handler),
> -
> - TP_STRUCT__entry(
> - __field(u8, vm_id)
> - __field(u8, ring_id)
> - __field(int, i)
> - __array(char, tmp_buf, MAX_CMD_STR_LEN)
> - __array(char, cmd_str, MAX_CMD_STR_LEN)
> - ),
> -
> - TP_fast_assign(
> - __entry->vm_id = vm_id;
> - __entry->ring_id = ring_id;
> - __entry->cmd_str[0] = '\0';
> - snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "VM(%d) Ring(%d): %s ip(%08x) pre handler cost (%llu), handler cost (%llu) ", vm_id, ring_id, ring_buffer_cmd ? "RB":"BB", ip_gma, cost_pre_cmd_handler, cost_cmd_handler);
> - strcat(__entry->cmd_str, __entry->tmp_buf);
> - entry->i = 0;
> - while (cmd_len > 0) {
> - if (cmd_len >= 8) {
> - snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x %08x %08x %08x %08x ",
> - cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3],
> - cmd_va[__entry->i+4], cmd_va[__entry->i+5], cmd_va[__entry->i+6], cmd_va[__entry->i+7]);
> - __entry->i += 8;
> - cmd_len -= 8;
> - strcat(__entry->cmd_str, __entry->tmp_buf);
> - } else if (cmd_len >= 4) {
> - snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x ",
> - cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3]);
> - __entry->i += 4;
> - cmd_len -= 4;
> - strcat(__entry->cmd_str, __entry->tmp_buf);
> - } else if (cmd_len >= 2) {
> - snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x ", cmd_va[__entry->i], cmd_va[__entry->i+1]);
> - __entry->i += 2;
> - cmd_len -= 2;
> - strcat(__entry->cmd_str, __entry->tmp_buf);
> - } else if (cmd_len == 1) {
> - snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x ", cmd_va[__entry->i]);
> - __entry->i += 1;
> - cmd_len -= 1;
> - strcat(__entry->cmd_str, __entry->tmp_buf);
> - }
> - }
> - strcat(__entry->cmd_str, "\n");
> - ),
> + TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len,
> + u32 buf_type),
> +
> + TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type),
> +
> + TP_STRUCT__entry(
> + __field(u8, vgpu_id)
> + __field(u8, ring_id)
> + __field(u32, ip_gma)
> + __field(u32, buf_type)
> + __field(u32, cmd_len)
> + __dynamic_array(u32, raw_cmd, cmd_len)
> + ),
> +
> + TP_fast_assign(
> + __entry->vgpu_id = vgpu_id;
> + __entry->ring_id = ring_id;
> + __entry->ip_gma = ip_gma;
> + __entry->buf_type = buf_type;
> + __entry->cmd_len = cmd_len;
> + memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va));
> + ),
> +
>
> - TP_printk("%s", __entry->cmd_str)
> + TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s",
> + __entry->vgpu_id,
> + __entry->ring_id,
> + __entry->buf_type,
> + __entry->ip_gma,
> + __print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4))
> );
> #endif /* _GVT_TRACE_H_ */
>
> --
> 2.7.4
>
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
--
Open Source Technology Center, Intel ltd.
$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20170503/651a4b81/attachment-0001.sig>
More information about the intel-gvt-dev
mailing list