[Intel-gfx] [RFC 2/2] drm/i915/bxt: Enable pooled EUs for BXT
Arun Siluvery
arun.siluvery at linux.intel.com
Fri Jul 10 10:35:20 PDT 2015
From: Armin Reese <armin.c.reese at intel.com>
The pooled EU feature for BXT will be enabled by the GEN9
golden context BB. Pooling EUs allows more execution units
to be available for rendering operations and should result
in improved performance. The golden context batch buffer
is used to enable the feature so it will be available as
the system's default render state.
v1 - Original patch
v2 - Rebased
v3 - Allow the driver to modify multiple 'modification ranges'
within the golden context batch buffer.
Signed-off-by: Armin Reese <armin.c.reese at intel.com>
---
drivers/gpu/drm/i915/i915_gem_render_state.c | 120 +++++++++++++++++++-------
drivers/gpu/drm/i915/intel_renderstate_gen6.c | 2 +-
drivers/gpu/drm/i915/intel_renderstate_gen7.c | 2 +-
drivers/gpu/drm/i915/intel_renderstate_gen8.c | 2 +-
drivers/gpu/drm/i915/intel_renderstate_gen9.c | 16 ++--
5 files changed, 102 insertions(+), 40 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index 818233d..7e25610 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -45,44 +45,104 @@ render_state_get_rodata(struct drm_device *dev, const int gen)
return NULL;
}
+/* This function writes the platform's golden context 'modification values'
+ * to the specified ranges in the golden context batch buffer
+ */
+static int gc_write_values( const struct intel_renderstate_rodata *rodata,
+ int mod_index,
+ int range_offset,
+ int num_vals,
+ u32 *mod_val,
+ u32 *d)
+{
+ /* mod_index - The index to the i'th 'mod_value_info' element
+ * in intel_renderstate_genx.c
+ * range_offset - If the 'mod_value_info' element specifies a range of
+ * values (more than one value can be modified), the
+ * range_offset is the u32 position in this range at
+ * which writing new values occurs. Normally, range
+ * _offset is '0'.
+ * num_vals - Number of u32 values to be written in the range
+ * mod_val - Array of u32 values to write into this golden
+ * context BB range
+ * d - Pointer to the golden context batch buffer
+ *
+ */
+ if (mod_index < rodata->mod_value_items) {
+ int mod_offset, mod_max_cnt, i;
+
+ /* Start byte offset of modification range in golden context
+ * batch buffer */
+ mod_offset =
+ rodata->mod_values[mod_index].offset;
+ /* Max # of u32 values which can be written for this range */
+ mod_max_cnt =
+ rodata->mod_values[mod_index].max_cnt;
+
+ /* Check for DWORD aligned address, et al */
+ if ((num_vals + range_offset > mod_max_cnt) ||
+ (mod_offset <= 0) ||
+ (mod_offset > PAGE_SIZE - (mod_max_cnt * sizeof(u32))) ||
+ ((mod_offset & (sizeof(u32) - 1)) != 0))
+ return -EINVAL;
+
+ /* Copy the new values into golden context BB */
+ for (i = 0; i < num_vals; i++) {
+ d[(mod_offset/sizeof(u32)) + range_offset + i] = mod_val[i];
+ }
+ } else
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* Offsets for golden context "value modifications" defined in
* intel_renderstate_genx.c are locations in the batch buffer
* where the driver is allowed to modify one or more DWORDs
* to customize instructions for the GEN platform present.
*/
-static int gc_modify_values(struct drm_device *dev,
+static int gc_modify_platform_values(struct drm_device *dev,
const struct intel_renderstate_rodata *rodata,
u32 *d)
{
- /* Init index to "nothing to modify" value */
- int mod_index = -1;
- int mod_val[1];
- int num_vals = 0;
-
- /* Write required value(s) to the specified offset(s) */
- if ((mod_index >= 0) &&
- (num_vals > 0)) {
- if (mod_index < rodata->mod_value_items) {
- int mod_offset, mod_max_cnt, i;
-
- mod_offset =
- rodata->mod_values[mod_index].offset;
- mod_max_cnt =
- rodata->mod_values[mod_index].max_cnt;
-
- /* Check for DWORD aligned address, et al */
- if ((num_vals > mod_max_cnt) ||
- (mod_offset <= 0) ||
- (mod_offset > PAGE_SIZE - (mod_max_cnt * sizeof(u32))) ||
- ((mod_offset & (sizeof(u32) - 1)) != 0))
- return -EINVAL;
-
- for (i = 0; i < num_vals; i++) {
- d[(mod_offset/sizeof(u32)) + i] = mod_val[i];
- }
- } else
- return -EINVAL;
+ /* Modifications to golden context BB for Broxton */
+ if (IS_BROXTON(dev)) {
+ int ret, mod_val[1];
+ /* Step 1 ...
+ * Remove SKL's MI_BATCH_BUFFER_END command in the mod_index 0
+ * range to allow the golden context batch buffer to proceed to
+ * the BXT specific commands setting up pooled EUs
+ */
+ mod_val[0] = 0;
+ ret = gc_write_values(rodata,
+ 0, /* mod_index 0 */
+ 0, /* range_offset */
+ 1, /* num_vals */
+ mod_val, d);
+ if (ret < 0)
+ return ret;
+
+ /* TODO - Integrate Jeff McGee's patches to determine
+ * whether the BXT is 2x6 or 3x6. For now, assume 2x6
+ */
+ #define IS_BROXTON_2x6 1 /* Eventually get rid of this */
+
+ if (IS_BROXTON_2x6) {
+ /* Step 2 ...
+ * Replace existing PoolBitField-Slice0 value in the
+ * mod_index 1 range with '0', the value for 2x6
+ * pooled EUs
+ */
+ mod_val[0] = 0;
+ ret = gc_write_values(rodata,
+ 1, /* mod_index 1 */
+ 0, /* range_offset, u32 */
+ 1, /* num_vals */
+ mod_val, d);
+ if (ret < 0)
+ return ret;
+ }
}
return 0;
@@ -156,7 +216,7 @@ static int render_state_setup(struct render_state *so, struct drm_device *dev)
/* Any golden context BB entries to modify? */
if ((rodata->mod_values[0].offset != -1) &&
(rodata->mod_values[0].max_cnt > 0))
- ret = gc_modify_values(dev, rodata, d);
+ ret = gc_modify_platform_values(dev, rodata, d);
else
ret = 0;
diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen6.c b/drivers/gpu/drm/i915/intel_renderstate_gen6.c
index ef29069..ef861d1 100644
--- a/drivers/gpu/drm/i915/intel_renderstate_gen6.c
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen6.c
@@ -35,7 +35,7 @@ static const u32 gen6_null_state_relocs[] = {
};
static const struct mod_value_info gen6_mod_values[] = {
- {-1, 0},
+ {-1, 0}
};
static const u32 gen6_null_state_batch[] = {
diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen7.c b/drivers/gpu/drm/i915/intel_renderstate_gen7.c
index 408e901..53013a2 100644
--- a/drivers/gpu/drm/i915/intel_renderstate_gen7.c
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen7.c
@@ -34,7 +34,7 @@ static const u32 gen7_null_state_relocs[] = {
};
static const struct mod_value_info gen7_mod_values[] = {
- {-1, 0},
+ {-1, 0}
};
static const u32 gen7_null_state_batch[] = {
diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.c b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
index d5b1383..0115efd 100644
--- a/drivers/gpu/drm/i915/intel_renderstate_gen8.c
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.c
@@ -34,7 +34,7 @@ static const u32 gen8_null_state_relocs[] = {
};
static const struct mod_value_info gen8_mod_values[] = {
- {-1, 0},
+ {-1, 0}
};
static const u32 gen8_null_state_batch[] = {
diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen9.c b/drivers/gpu/drm/i915/intel_renderstate_gen9.c
index 7cb43a2..0475f40 100644
--- a/drivers/gpu/drm/i915/intel_renderstate_gen9.c
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen9.c
@@ -34,7 +34,9 @@ static const u32 gen9_null_state_relocs[] = {
};
static const struct mod_value_info gen9_mod_values[] = {
- {-1, 0},
+ {0x00000dd4, 2},
+ {0x00000de8, 4},
+ {-1, 0}
};
static const u32 gen9_null_state_batch[] = {
@@ -923,16 +925,16 @@ static const u32 gen9_null_state_batch[] = {
0x00000001,
0x00000000,
0x00000000,
- 0x05000000, /* cmds end */
- 0x00000000,
- 0x00000000,
- 0x00000000,
- 0x00000000,
- 0x00000000,
+ 0x05000000, /* 2 mod_values */
0x00000000,
+ 0x69040302,
+ 0x70050004,
+ 0x80000000,
+ 0x00777000, /* 4 mod_values */
0x00000000,
0x00000000,
0x00000000,
+ 0x05000000, /* cmds end */
0x00000000,
0x00000000, /* state start */
0x00000000,
--
1.9.1
More information about the Intel-gfx
mailing list