[PATCH] drm/i915/gvt: rewrite the trace gvt:gvt_command using trace style approach

Zhenyu Wang zhenyuw at linux.intel.com
Wed May 3 08:37:19 UTC 2017


On 2017.05.03 09:20:10 +0800, changbin.du at intel.com wrote:
> From: Changbin Du <changbin.du at intel.com>
> 
> The gvt:gvt_command trace involve unnecessary overhead even this trace is
> not enabled. We need improve it.
> 
> The kernel trace infrastructure provide a full api to define a trace event.
> We should leverage them if possible. And one important thing is that a trace
> point should store raw data but not format string.
> 
> This patch include two part work:
> 1) Refactor the gvt_command trace definition, including:
>   o only store raw trace data.
>   o use __dynamic_array() to declare a variable size buffer.
>   o use __print_array() to format raw cmd data.
>   o rename vm_id as vgpu_id.
> 
> 2) Improve the trace invoking, including:
>   o remove the cycles calculation for handler. We can get this data
>     by any perf tool.
>   o do not make a backup for raw cmd data which just doesn't make sense.
> 
> With this patch, this trace has no overhead if it is not enabled. And we are
> trace style now.
> 
> The final output example:
>   gvt workload 0-211   [000] ...1   120.555964: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e880, raw cmd {0x4000000}
>   gvt workload 0-211   [000] ...1   120.556014: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e884, raw cmd {0x7a000004,0x1004000,0xe1511018,0x0,0x7d,0x0}
>   gvt workload 0-211   [000] ...1   120.556062: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e89c, raw cmd {0x7a000004,0x140000,0x0,0x0,0x0,0x0}
>   gvt workload 0-211   [000] ...1   120.556110: gvt_command: vgpu1 ring 0: buf_type 0, ip_gma e161e8b4, raw cmd {0x10400002,0xe1511018,0x0,0x7d}
>

Applied, thanks for clean this up!

> Signed-off-by: Changbin Du <changbin.du at intel.com>
> ---
>  drivers/gpu/drm/i915/gvt/cmd_parser.c | 50 ++--------------------
>  drivers/gpu/drm/i915/gvt/trace.h      | 78 +++++++++++++----------------------
>  2 files changed, 32 insertions(+), 96 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> index 41b2c3a..5634eb1 100644
> --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
> +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
> @@ -2414,53 +2414,13 @@ static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
>  	hash_add(gvt->cmd_table, &e->hlist, e->info->opcode);
>  }
>  
> -#define GVT_MAX_CMD_LENGTH     20  /* In Dword */
> -
> -static void trace_cs_command(struct parser_exec_state *s,
> -		cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler)
> -{
> -	/* This buffer is used by ftrace to store all commands copied from
> -	 * guest gma space. Sometimes commands can cross pages, this should
> -	 * not be handled in ftrace logic. So this is just used as a
> -	 * 'bounce buffer'
> -	 */
> -	u32 cmd_trace_buf[GVT_MAX_CMD_LENGTH];
> -	int i;
> -	u32 cmd_len = cmd_length(s);
> -	/* The chosen value of GVT_MAX_CMD_LENGTH are just based on
> -	 * following two considerations:
> -	 * 1) From observation, most common ring commands is not that long.
> -	 *    But there are execeptions. So it indeed makes sence to observe
> -	 *    longer commands.
> -	 * 2) From the performance and debugging point of view, dumping all
> -	 *    contents of very commands is not necessary.
> -	 * We mgith shrink GVT_MAX_CMD_LENGTH or remove this trace event in
> -	 * future for performance considerations.
> -	 */
> -	if (unlikely(cmd_len > GVT_MAX_CMD_LENGTH)) {
> -		gvt_dbg_cmd("cmd length exceed tracing limitation!\n");
> -		cmd_len = GVT_MAX_CMD_LENGTH;
> -	}
> -
> -	for (i = 0; i < cmd_len; i++)
> -		cmd_trace_buf[i] = cmd_val(s, i);
> -
> -	trace_gvt_command(s->vgpu->id, s->ring_id, s->ip_gma, cmd_trace_buf,
> -			cmd_len, s->buf_type == RING_BUFFER_INSTRUCTION,
> -			cost_pre_cmd_handler, cost_cmd_handler);
> -}
> -
>  /* call the cmd handler, and advance ip */
>  static int cmd_parser_exec(struct parser_exec_state *s)
>  {
> +	struct intel_vgpu *vgpu = s->vgpu;
>  	struct cmd_info *info;
>  	u32 cmd;
>  	int ret = 0;
> -	cycles_t t0, t1, t2;
> -	struct parser_exec_state s_before_advance_custom;
> -	struct intel_vgpu *vgpu = s->vgpu;
> -
> -	t0 = get_cycles();
>  
>  	cmd = cmd_val(s, 0);
>  
> @@ -2475,9 +2435,8 @@ static int cmd_parser_exec(struct parser_exec_state *s)
>  
>  	s->info = info;
>  
> -	t1 = get_cycles();
> -
> -	s_before_advance_custom = *s;
> +	trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va,
> +			  cmd_length(s), s->buf_type);
>  
>  	if (info->handler) {
>  		ret = info->handler(s);
> @@ -2486,9 +2445,6 @@ static int cmd_parser_exec(struct parser_exec_state *s)
>  			return ret;
>  		}
>  	}
> -	t2 = get_cycles();
> -
> -	trace_cs_command(&s_before_advance_custom, t1 - t0, t2 - t1);
>  
>  	if (!(info->flag & F_IP_ADVANCE_CUSTOM)) {
>  		ret = cmd_advance_default(s);
> diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
> index 53a2d10..9171291 100644
> --- a/drivers/gpu/drm/i915/gvt/trace.h
> +++ b/drivers/gpu/drm/i915/gvt/trace.h
> @@ -224,57 +224,37 @@ TRACE_EVENT(oos_sync,
>  	TP_printk("%s", __entry->buf)
>  );
>  
> -#define MAX_CMD_STR_LEN	256
>  TRACE_EVENT(gvt_command,
> -		TP_PROTO(u8 vm_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, bool ring_buffer_cmd, cycles_t cost_pre_cmd_handler, cycles_t cost_cmd_handler),
> -
> -		TP_ARGS(vm_id, ring_id, ip_gma, cmd_va, cmd_len, ring_buffer_cmd, cost_pre_cmd_handler, cost_cmd_handler),
> -
> -		TP_STRUCT__entry(
> -			__field(u8, vm_id)
> -			__field(u8, ring_id)
> -			__field(int, i)
> -			__array(char, tmp_buf, MAX_CMD_STR_LEN)
> -			__array(char, cmd_str, MAX_CMD_STR_LEN)
> -			),
> -
> -		TP_fast_assign(
> -			__entry->vm_id = vm_id;
> -			__entry->ring_id = ring_id;
> -			__entry->cmd_str[0] = '\0';
> -			snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "VM(%d) Ring(%d): %s ip(%08x) pre handler cost (%llu), handler cost (%llu) ", vm_id, ring_id, ring_buffer_cmd ? "RB":"BB", ip_gma, cost_pre_cmd_handler, cost_cmd_handler);
> -			strcat(__entry->cmd_str, __entry->tmp_buf);
> -			entry->i = 0;
> -			while (cmd_len > 0) {
> -				if (cmd_len >= 8) {
> -					snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x %08x %08x %08x %08x ",
> -						cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3],
> -						cmd_va[__entry->i+4], cmd_va[__entry->i+5], cmd_va[__entry->i+6], cmd_va[__entry->i+7]);
> -					__entry->i += 8;
> -					cmd_len -= 8;
> -					strcat(__entry->cmd_str, __entry->tmp_buf);
> -				} else if (cmd_len >= 4) {
> -					snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x %08x %08x ",
> -						cmd_va[__entry->i], cmd_va[__entry->i+1], cmd_va[__entry->i+2], cmd_va[__entry->i+3]);
> -					__entry->i += 4;
> -					cmd_len -= 4;
> -					strcat(__entry->cmd_str, __entry->tmp_buf);
> -				} else if (cmd_len >= 2) {
> -					snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x %08x ", cmd_va[__entry->i], cmd_va[__entry->i+1]);
> -					__entry->i += 2;
> -					cmd_len -= 2;
> -					strcat(__entry->cmd_str, __entry->tmp_buf);
> -				} else if (cmd_len == 1) {
> -					snprintf(__entry->tmp_buf, MAX_CMD_STR_LEN, "%08x ", cmd_va[__entry->i]);
> -					__entry->i += 1;
> -					cmd_len -= 1;
> -					strcat(__entry->cmd_str, __entry->tmp_buf);
> -				}
> -			}
> -			strcat(__entry->cmd_str, "\n");
> -		),
> +	TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len,
> +		 u32 buf_type),
> +
> +	TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type),
> +
> +	TP_STRUCT__entry(
> +		__field(u8, vgpu_id)
> +		__field(u8, ring_id)
> +		__field(u32, ip_gma)
> +		__field(u32, buf_type)
> +		__field(u32, cmd_len)
> +		__dynamic_array(u32, raw_cmd, cmd_len)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->vgpu_id = vgpu_id;
> +		__entry->ring_id = ring_id;
> +		__entry->ip_gma = ip_gma;
> +		__entry->buf_type = buf_type;
> +		__entry->cmd_len = cmd_len;
> +		memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va));
> +	),
> +
>  
> -		TP_printk("%s", __entry->cmd_str)
> +	TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s",
> +		__entry->vgpu_id,
> +		__entry->ring_id,
> +		__entry->buf_type,
> +		__entry->ip_gma,
> +		__print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4))
>  );
>  #endif /* _GVT_TRACE_H_ */
>  
> -- 
> 2.7.4
> 
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev

-- 
Open Source Technology Center, Intel ltd.

$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20170503/651a4b81/attachment-0001.sig>


More information about the intel-gvt-dev mailing list