[Intel-gfx] [PATCH 1/2] i915/perf: Move gen specific OA formats to single array

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Fri Dec 18 02:32:49 UTC 2020


On Wed, Dec 16, 2020 at 09:25:35AM +0000, Tvrtko Ursulin wrote:
>
>On 15/12/2020 21:49, Umesh Nerlige Ramappa wrote:
>>Variations in OA formats in the different gens has led to creation of
>>several sparse arrays to store the formats.
>>
>>Move oa formats into a single array and add the supported range of
>>platforms for the oa formats.
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>---
>>  drivers/gpu/drm/i915/i915_perf.c       | 56 ++++++++++++--------------
>>  drivers/gpu/drm/i915/i915_perf_types.h |  3 ++
>>  2 files changed, 28 insertions(+), 31 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index f553caf4b06d..afa92cf075c4 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -298,28 +298,27 @@ static u32 i915_oa_max_sample_rate = 100000;
>>  /* XXX: beware if future OA HW adds new report formats that the current
>>   * code assumes all reports have a power-of-two size and ~(size - 1) can
>>- * be used as a mask to align the OA tail pointer.
>>+ * be used as a mask to align the OA tail pointer. Note that the platforms
>>+ * in this array specify a range (inclusive of start and end).
>>   */
>>-static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
>>-	[I915_OA_FORMAT_A13]	    = { 0, 64 },
>>-	[I915_OA_FORMAT_A29]	    = { 1, 128 },
>>-	[I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
>>-	/* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
>>-	[I915_OA_FORMAT_B4_C8]	    = { 4, 64 },
>>-	[I915_OA_FORMAT_A45_B8_C8]  = { 5, 256 },
>>-	[I915_OA_FORMAT_B4_C8_A16]  = { 6, 128 },
>>-	[I915_OA_FORMAT_C4_B8]	    = { 7, 64 },
>>-};
>>-
>>-static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
>>-	[I915_OA_FORMAT_A12]		    = { 0, 64 },
>>-	[I915_OA_FORMAT_A12_B8_C8]	    = { 2, 128 },
>>-	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>>-	[I915_OA_FORMAT_C4_B8]		    = { 7, 64 },
>>-};
>>-
>>-static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
>>-	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
>>+static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
>>+	/* haswell */
>>+	[I915_OA_FORMAT_A13]                    = { 0, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_A29]                    = { 1, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_A13_B8_C8]              = { 2, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_B4_C8]                  = { 4, 64, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_A45_B8_C8]              = { 5, 256, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+	[I915_OA_FORMAT_B4_C8_A16]              = { 6, 128, INTEL_IVYBRIDGE, INTEL_HASWELL },
>>+
>>+	/* haswell+ upto but not including tigerlake */
>>+	[I915_OA_FORMAT_C4_B8]                  = { 7, 64, INTEL_IVYBRIDGE, INTEL_TIGERLAKE - 1 },
>>+
>>+	/* gen8+ upto but not including tigerlake */
>>+	[I915_OA_FORMAT_A12]                    = { 0, 64, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
>>+	[I915_OA_FORMAT_A12_B8_C8]              = { 2, 128, INTEL_BROADWELL, INTEL_TIGERLAKE - 1 },
>>+
>>+	/* gen8+ */
>>+	[I915_OA_FORMAT_A32u40_A4u32_B8_C8]     = { 5, 256, INTEL_BROADWELL, INTEL_MAX_PLATFORMS - 1 },
>>  };
>>  #define SAMPLE_OA_REPORT      (1<<0)
>>@@ -3575,6 +3574,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
>>  	for (i = 0; i < n_props; i++) {
>>  		u64 oa_period, oa_freq_hz;
>>  		u64 id, value;
>>+		enum intel_platform p = INTEL_INFO(perf->i915)->platform;
>>  		ret = get_user(id, uprop);
>>  		if (ret)
>>@@ -3611,8 +3611,9 @@ static int read_properties_unlocked(struct i915_perf *perf,
>>  					  value);
>>  				return -EINVAL;
>>  			}
>>-			if (!perf->oa_formats[value].size) {
>>-				DRM_DEBUG("Unsupported OA report format %llu\n",
>>+			if (p < perf->oa_formats[value].start ||
>>+			    p > perf->oa_formats[value].end) {
>>+				DRM_DEBUG("OA report format not supported %llu\n",
>>  					  value);
>>  				return -EINVAL;
>>  			}
>>@@ -4270,6 +4271,7 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  	/* XXX const struct i915_perf_ops! */
>>+	perf->oa_formats = oa_formats;
>>  	if (IS_HASWELL(i915)) {
>>  		perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
>>  		perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
>>@@ -4280,8 +4282,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  		perf->ops.oa_disable = gen7_oa_disable;
>>  		perf->ops.read = gen7_oa_read;
>>  		perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
>>-
>>-		perf->oa_formats = hsw_oa_formats;
>>  	} else if (HAS_LOGICAL_RING_CONTEXTS(i915)) {
>>  		/* Note: that although we could theoretically also support the
>>  		 * legacy ringbuffer mode on BDW (and earlier iterations of
>>@@ -4292,8 +4292,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  		perf->ops.read = gen8_oa_read;
>>  		if (IS_GEN_RANGE(i915, 8, 9)) {
>>-			perf->oa_formats = gen8_plus_oa_formats;
>>-
>>  			perf->ops.is_valid_b_counter_reg =
>>  				gen7_is_valid_b_counter_addr;
>>  			perf->ops.is_valid_mux_reg =
>>@@ -4324,8 +4322,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  				perf->gen8_valid_ctx_bit = BIT(16);
>>  			}
>>  		} else if (IS_GEN_RANGE(i915, 10, 11)) {
>>-			perf->oa_formats = gen8_plus_oa_formats;
>>-
>>  			perf->ops.is_valid_b_counter_reg =
>>  				gen7_is_valid_b_counter_addr;
>>  			perf->ops.is_valid_mux_reg =
>>@@ -4348,8 +4344,6 @@ void i915_perf_init(struct drm_i915_private *i915)
>>  			}
>>  			perf->gen8_valid_ctx_bit = BIT(16);
>>  		} else if (IS_GEN(i915, 12)) {
>>-			perf->oa_formats = gen12_oa_formats;
>>-
>>  			perf->ops.is_valid_b_counter_reg =
>>  				gen12_is_valid_b_counter_addr;
>>  			perf->ops.is_valid_mux_reg =
>>diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
>>index a36a455ae336..84dceb743ebc 100644
>>--- a/drivers/gpu/drm/i915/i915_perf_types.h
>>+++ b/drivers/gpu/drm/i915/i915_perf_types.h
>>@@ -19,6 +19,7 @@
>>  #include "gt/intel_sseu.h"
>>  #include "i915_reg.h"
>>  #include "intel_wakeref.h"
>>+#include "intel_device_info.h"
>>  struct drm_i915_private;
>>  struct file;
>>@@ -32,6 +33,8 @@ struct intel_engine_cs;
>>  struct i915_oa_format {
>>  	u32 format;
>>  	int size;
>>+	enum intel_platform start;
>>+	enum intel_platform end;
>
>As a drive by only - I question the concept of thinking about 
>platforms enums as ordered and having things like INTEL_TIGERLAKE - 1 
>in the code. It's completely novel concept and I think fragile. Order 
>there is purely the order of upstreaming and not stable.
>
>Suggestions wise, if the rules are Gen based you could use a mask, but 
>if they are truly platform based then it is more tricky. However 
>current code does use gen ranges to assign them, so would a genmask 
>really not fit?

Hmm, the rules are platform-based. A mask would be ideal, but the last I 
thought about it, it felt more complex than this solution. This code 
would have to maintain a mask of supported platforms (and subplatforms) 
for each entry (preferably in the same format as runtime info -> 
platform_mask).  Then we would just AND this mask with the 
platform_mask/s to check for support. Let me know if that's what you 
have in mind, then I can give it a try.

Thanks,
Umesh

>
>Regards,
>
>Tvrtko


More information about the Intel-gfx mailing list