[PATCH 17/26] drm/amd/display: implement PERF_TRACE on Linux

Bhawanpreet Lakha Bhawanpreet.Lakha at amd.com
Wed Oct 10 22:09:13 UTC 2018


From: David Francis <David.Francis at amd.com>

[Why]
A quick-and-dirty way of getting performance data for the amdgpu
driver would make performance improvements easier

[How]
The PERF_TRACE functionality is a tic-toc style debug method.
Put PERF_TRACE calls on either side of the code you want to test.
PERF_TRACE requires access to struct dc_context.  PERF_TRACE()
will pick up the CTX macro, and PERF_TRACE_CTX(struct dc_context)
allows you to pass the context explicitly.

The last 20 results can be read through the debugfs entry
amdgpu_perf_trace.  Each result contains the time in ns and
number of GPU read/writes since the result before it.

In my experimentation, each PERF_TRACE() call uses at most 700ns

Signed-off-by: David Francis <David.Francis at amd.com>
Reviewed-by: Tony Cheng <Tony.Cheng at amd.com>
Acked-by: Bhawanpreet Lakha <Bhawanpreet.Lakha at amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c  |  4 +-
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c  | 74 +++++++++++++++++++++-
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h  |  2 +-
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_services.c | 21 +++++-
 drivers/gpu/drm/amd/display/dc/core/dc.c           | 36 +++++++++++
 drivers/gpu/drm/amd/display/dc/dc_types.h          | 22 +++++++
 .../gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c |  4 +-
 drivers/gpu/drm/amd/display/dc/dm_services.h       | 13 ++--
 8 files changed, 165 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 492230c41b4a..252bfe330c1e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -471,8 +471,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	}
 
 #if defined(CONFIG_DEBUG_FS)
-	if (dtn_debugfs_init(adev))
-		DRM_ERROR("amdgpu: failed initialize dtn debugfs support.\n");
+	if (amdgpu_dm_debugfs_init(adev))
+		DRM_ERROR("amdgpu: failed to initialize dm debugfs entries.\n");
 #endif
 
 	DRM_DEBUG_DRIVER("KMS initialized.\n");
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 0ef4a40d2247..6b73c0e6bf67 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -782,7 +782,63 @@ static ssize_t dtn_log_write(
 	return size;
 }
 
-int dtn_debugfs_init(struct amdgpu_device *adev)
+static ssize_t perf_trace_read(struct file *f, char __user *buf,
+				 size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = file_inode(f)->i_private;
+	struct perf_trace *trace = adev->dm.dc->ctx->perf_trace;
+	struct perf_trace_entry *entry;
+	char *rd_buf = NULL;
+	char *rd_buf_pos = NULL;
+	const uint32_t line_buf_size = 200;
+	const uint32_t rd_buf_size = line_buf_size * trace->num_entries;
+	int r;
+	int result = 0;
+	int i;
+
+	if (*pos & 3 || size & 3)
+		return -EINVAL;
+
+	rd_buf = kcalloc(rd_buf_size, sizeof(char), GFP_KERNEL);
+	if (!rd_buf)
+		return -EINVAL;
+	rd_buf_pos = rd_buf;
+
+	rd_buf_pos += snprintf(rd_buf_pos, rd_buf_size, "PERF_TRACE:\n");
+
+	for (i = 0; i < trace->num_entries; i++) {
+		entry = &trace->entries[(trace->next_entry + i) % trace->num_entries];
+
+		if (strcmp(entry->func_name, ""))
+			rd_buf_pos += snprintf(rd_buf_pos, line_buf_size, "%s:%d Reads:%d\t Writes:%d\t ns:%lld\n",
+				entry->func_name,
+				entry->line_number,
+				entry->read_count,
+				entry->write_count,
+				entry->time_delta);
+	}
+
+	while (size) {
+		if (*pos >= rd_buf_size)
+			break;
+
+		r = put_user((*(rd_buf + result)), buf);
+		if (r) {
+			kfree(rd_buf);
+			return r;
+		}
+
+		buf += 1;
+		size -= 1;
+		*pos += 1;
+		result += 1;
+	}
+
+	kfree(rd_buf);
+	return result;
+}
+
+int amdgpu_dm_debugfs_init(struct amdgpu_device *adev)
 {
 	static const struct file_operations dtn_log_fops = {
 		.owner = THIS_MODULE,
@@ -791,6 +847,12 @@ int dtn_debugfs_init(struct amdgpu_device *adev)
 		.llseek = default_llseek
 	};
 
+	static const struct file_operations perf_trace_fops = {
+		.owner = THIS_MODULE,
+		.read = perf_trace_read,
+		.llseek = default_llseek
+	};
+
 	struct drm_minor *minor = adev->ddev->primary;
 	struct dentry *root = minor->debugfs_root;
 
@@ -801,5 +863,15 @@ int dtn_debugfs_init(struct amdgpu_device *adev)
 		adev,
 		&dtn_log_fops);
 
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	ent = debugfs_create_file(
+		"amdgpu_perf_trace",
+		0644,
+		root,
+		adev,
+		&perf_trace_fops);
+
 	return PTR_ERR_OR_ZERO(ent);
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h
index bdef1587b0a0..2d0cdbaa1a8e 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.h
@@ -30,6 +30,6 @@
 #include "amdgpu_dm.h"
 
 int connector_debugfs_init(struct amdgpu_dm_connector *connector);
-int dtn_debugfs_init(struct amdgpu_device *adev);
+int amdgpu_dm_debugfs_init(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
index 516795342dd2..77699e752680 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c
@@ -44,9 +44,28 @@ unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx,
 	return current_time_stamp - last_time_stamp;
 }
 
-void dm_perf_trace_timestamp(const char *func_name, unsigned int line)
+#ifdef CONFIG_DEBUG_FS
+void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc_context *ctx)
+{
+	struct perf_trace *trace = ctx->perf_trace;
+	unsigned long long timestamp = ktime_get_raw_ns();
+
+	trace->entries[trace->next_entry].line_number = line;
+	strlcpy(trace->entries[trace->next_entry].func_name, func_name, 40);
+	trace->entries[trace->next_entry].read_count = trace->read_count;
+	trace->read_count = 0;
+	trace->entries[trace->next_entry].write_count = trace->write_count;
+	trace->write_count = 0;
+	trace->entries[trace->next_entry].time_delta = timestamp - trace->timestamp;
+	trace->timestamp = timestamp;
+
+	trace->next_entry = (trace->next_entry + 1) % trace->num_entries;
+}
+#else
+void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc_context *ctx)
 {
 }
+#endif
 
 bool dm_write_persistent_data(struct dc_context *ctx,
 		const struct dc_sink *sink,
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index a8d8358058ff..99db82802c62 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -500,6 +500,33 @@ void dc_link_disable_hpd(const struct dc_link *link)
 	dc_link_dp_disable_hpd(link);
 }
 
+static void perf_trace_destruct(struct perf_trace **trace)
+{
+	kfree(*trace);
+	*trace = NULL;
+}
+
+static struct perf_trace *perf_trace_construct(struct dc_context *ctx)
+{
+	struct perf_trace *trace = kzalloc(sizeof(struct perf_trace), GFP_KERNEL);
+	uint32_t i;
+
+	if (!trace)
+		return NULL;
+
+	trace->next_entry = 0;
+	trace->timestamp = dm_get_timestamp(ctx);
+	trace->read_count = 0;
+	trace->write_count = 0;
+	trace->num_entries = AMDGPU_PERF_TRACE_NUM_ENTRIES;
+	for (i = 0; i < trace->num_entries; i++) {
+		trace->entries[i].line_number = 0;
+		trace->entries[i].read_count = 0;
+		trace->entries[i].time_delta = 0;
+		trace->entries[i].write_count = 0;
+	}
+	return trace;
+}
 
 void dc_link_set_test_pattern(struct dc_link *link,
 			      enum dp_test_pattern test_pattern,
@@ -534,6 +561,9 @@ static void destruct(struct dc *dc)
 	if (dc->ctx->created_bios)
 		dal_bios_parser_destroy(&dc->ctx->dc_bios);
 
+	if (dc->ctx->perf_trace)
+		perf_trace_destruct(&dc->ctx->perf_trace);
+
 	kfree(dc->ctx);
 	dc->ctx = NULL;
 
@@ -621,6 +651,12 @@ static bool construct(struct dc *dc,
 
 	/* Create logger */
 
+	dc_ctx->perf_trace = perf_trace_construct(dc_ctx);
+	if (!dc_ctx->perf_trace) {
+		dm_error("%s: failed to create perf_trace!\n", __func__);
+		goto fail;
+	}
+
 	dc_ctx->dce_environment = init_params->dce_environment;
 
 	dc_version = resource_parse_asic_id(init_params->asic_id);
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 6e12d640d020..7aa54dd9dd74 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -73,6 +73,27 @@ struct hw_asic_id {
 	void *atombios_base_address;
 };
 
+#ifndef AMDGPU_PERF_TRACE_NUM_ENTRIES
+#define AMDGPU_PERF_TRACE_NUM_ENTRIES 20
+#endif
+
+struct perf_trace_entry {
+	unsigned long long time_delta;
+	uint32_t write_count;
+	uint32_t read_count;
+	uint32_t line_number;
+	char func_name[40];
+};
+
+struct perf_trace {
+	uint32_t num_entries;
+	uint32_t next_entry;
+	struct perf_trace_entry entries[AMDGPU_PERF_TRACE_NUM_ENTRIES];
+	unsigned long long timestamp;
+	uint32_t write_count;
+	uint32_t read_count;
+};
+
 struct dc_context {
 	struct dc *dc;
 
@@ -85,6 +106,7 @@ struct dc_context {
 	/* todo: below should probably move to dc.  to facilitate removal
 	 * of AS we will store these here
 	 */
+	struct perf_trace *perf_trace;
 	enum dce_version dce_version;
 	struct dc_bios *dc_bios;
 	bool created_bios;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
index 97c059934feb..0f1f12de2a53 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
@@ -259,7 +259,7 @@ bool cm_helper_translate_curve_to_hw_format(
 	if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS)
 		return false;
 
-	PERF_TRACE();
+	PERF_TRACE_CTX(output_tf->ctx);
 
 	arr_points = lut_params->arr_points;
 	rgb_resulted = lut_params->rgb_resulted;
@@ -441,7 +441,7 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
 	if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS)
 		return false;
 
-	PERF_TRACE();
+	PERF_TRACE_CTX(output_tf->ctx);
 
 	arr_points = lut_params->arr_points;
 	rgb_resulted = lut_params->rgb_resulted;
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h
index 28128c02de00..a1d6c99a00b0 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services.h
@@ -70,7 +70,9 @@ static inline uint32_t dm_read_reg_func(
 	}
 #endif
 	value = cgs_read_register(ctx->cgs_device, address);
-
+#ifdef CONFIG_DEBUG_FS
+	ctx->perf_trace->read_count++;
+#endif
 	return value;
 }
 
@@ -90,6 +92,9 @@ static inline void dm_write_reg_func(
 	}
 #endif
 	cgs_write_register(ctx->cgs_device, address, value);
+#ifdef CONFIG_DEBUG_FS
+	ctx->perf_trace->write_count++;
+#endif
 }
 
 static inline uint32_t dm_read_index_reg(
@@ -351,9 +356,9 @@ unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx,
 /*
  * performance tracing
  */
-void dm_perf_trace_timestamp(const char *func_name, unsigned int line);
-#define PERF_TRACE()	dm_perf_trace_timestamp(__func__, __LINE__)
-
+void dm_perf_trace_timestamp(const char *func_name, unsigned int line, struct dc_context *ctx);
+#define PERF_TRACE()	dm_perf_trace_timestamp(__func__, __LINE__, CTX)
+#define PERF_TRACE_CTX(__context)	dm_perf_trace_timestamp(__func__, __LINE__, __context)
 
 /*
  * Debug and verification hooks
-- 
2.14.1



More information about the amd-gfx mailing list