[Intel-gfx] [PATCH 5/6] drm/i915: Remove inline from sseu helper functions
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Wed May 1 20:04:30 UTC 2019
Can you elaborate a bit more on what's the rationale for this? do you
just want to avoid having too many inlines since the paths they're used
in are not critical, or do you have some more functional reason? This is
not a critic to the patch, I just want to understand where you're coming
from ;)
BTW, looking at this patch I realized there are a few more
DIV_ROUND_UP(..., BITS_PER_BYTE) that could be converted to
GEN_SSEU_STRIDE() in patch 2. I noticed you update them to a new
variable in the next patch, but for consistency it might still be worth
updating them all in patch 2 or at least mention in the commit message
of patch 2 that the remaining cases are updated by a follow-up patch in
the series. Patch 2 is quite small, so you could also just squash it
into patch 6 to avoid the split.
Daniele
On 5/1/19 8:34 AM, Stuart Summers wrote:
> Additionally, ensure these are all prefixed with intel_sseu_*
> to match the convention of other functions in i915.
>
> Signed-off-by: Stuart Summers <stuart.summers at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_sseu.c | 54 +++++++++++++++++++
> drivers/gpu/drm/i915/gt/intel_sseu.h | 57 +++-----------------
> drivers/gpu/drm/i915/i915_debugfs.c | 6 +--
> drivers/gpu/drm/i915/i915_drv.c | 2 +-
> drivers/gpu/drm/i915/intel_device_info.c | 69 ++++++++++++------------
> 5 files changed, 102 insertions(+), 86 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> index 7f448f3bea0b..4a0b82fc108c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
> @@ -8,6 +8,60 @@
> #include "intel_lrc_reg.h"
> #include "intel_sseu.h"
>
> +unsigned int
> +intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
> +{
> + unsigned int i, total = 0;
> +
> + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
> + total += hweight8(sseu->subslice_mask[i]);
> +
> + return total;
> +}
> +
> +unsigned int
> +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
> +{
> + return hweight8(sseu->subslice_mask[slice]);
> +}
> +
> +static int intel_sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
> + int subslice)
> +{
> + int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
> + BITS_PER_BYTE);
> + int slice_stride = sseu->max_subslices * subslice_stride;
> +
> + return slice * slice_stride + subslice * subslice_stride;
> +}
> +
> +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
> + int subslice)
> +{
> + int i, offset = intel_sseu_eu_idx(sseu, slice, subslice);
> + u16 eu_mask = 0;
> +
> + for (i = 0;
> + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
> + eu_mask |= ((u16)sseu->eu_mask[offset + i]) <<
> + (i * BITS_PER_BYTE);
> + }
> +
> + return eu_mask;
> +}
> +
> +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
> + u16 eu_mask)
> +{
> + int i, offset = intel_sseu_eu_idx(sseu, slice, subslice);
> +
> + for (i = 0;
> + i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
> + sseu->eu_mask[offset + i] =
> + (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
> + }
> +}
> +
> u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
> const struct intel_sseu *req_sseu)
> {
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> index 029e71d8f140..56e3721ae83f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> @@ -63,58 +63,17 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
> return value;
> }
>
> -static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
> -{
> - unsigned int i, total = 0;
> -
> - for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
> - total += hweight8(sseu->subslice_mask[i]);
> +unsigned int
> +intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
>
> - return total;
> -}
> +unsigned int
> +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
>
> -static inline unsigned int
> -sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
> -{
> - return hweight8(sseu->subslice_mask[slice]);
> -}
> -
> -static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
> - int slice, int subslice)
> -{
> - int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
> - BITS_PER_BYTE);
> - int slice_stride = sseu->max_subslices * subslice_stride;
> -
> - return slice * slice_stride + subslice * subslice_stride;
> -}
> +u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
> + int subslice);
>
> -static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu,
> - int slice, int subslice)
> -{
> - int i, offset = sseu_eu_idx(sseu, slice, subslice);
> - u16 eu_mask = 0;
> -
> - for (i = 0;
> - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
> - eu_mask |= ((u16)sseu->eu_mask[offset + i]) <<
> - (i * BITS_PER_BYTE);
> - }
> -
> - return eu_mask;
> -}
> -
> -static inline void sseu_set_eus(struct sseu_dev_info *sseu,
> - int slice, int subslice, u16 eu_mask)
> -{
> - int i, offset = sseu_eu_idx(sseu, slice, subslice);
> -
> - for (i = 0;
> - i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
> - sseu->eu_mask[offset + i] =
> - (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
> - }
> -}
> +void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
> + u16 eu_mask);
>
> u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
> const struct intel_sseu *req_sseu);
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index fe854c629a32..3f3ee83ac315 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4158,7 +4158,7 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
> RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s];
> }
> sseu->eu_total = sseu->eu_per_subslice *
> - sseu_subslice_total(sseu);
> + intel_sseu_subslice_total(sseu);
>
> /* subtract fused off EU(s) from enabled slice(s) */
> for (s = 0; s < fls(sseu->slice_mask); s++) {
> @@ -4182,10 +4182,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
> seq_printf(m, " %s Slice Total: %u\n", type,
> hweight8(sseu->slice_mask));
> seq_printf(m, " %s Subslice Total: %u\n", type,
> - sseu_subslice_total(sseu));
> + intel_sseu_subslice_total(sseu));
> for (s = 0; s < fls(sseu->slice_mask); s++) {
> seq_printf(m, " %s Slice%i subslices: %u\n", type,
> - s, sseu_subslices_per_slice(sseu, s));
> + s, intel_sseu_subslices_per_slice(sseu, s));
> }
> seq_printf(m, " %s EU Total: %u\n", type,
> sseu->eu_total);
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index c376244c19c4..130c5140db0d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -378,7 +378,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
> value = i915_cmd_parser_get_version(dev_priv);
> break;
> case I915_PARAM_SUBSLICE_TOTAL:
> - value = sseu_subslice_total(sseu);
> + value = intel_sseu_subslice_total(sseu);
> if (!value)
> return -ENODEV;
> break;
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index 559cf0d0628e..e1dbccf04cd9 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -90,10 +90,10 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
>
> drm_printf(p, "slice total: %u, mask=%04x\n",
> hweight8(sseu->slice_mask), sseu->slice_mask);
> - drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
> + drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
> for (s = 0; s < sseu->max_slices; s++) {
> drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
> - s, sseu_subslices_per_slice(sseu, s),
> + s, intel_sseu_subslices_per_slice(sseu, s),
> sseu->subslice_mask[s]);
> }
> drm_printf(p, "EU total: %u\n", sseu->eu_total);
> @@ -126,11 +126,11 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
>
> for (s = 0; s < sseu->max_slices; s++) {
> drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
> - s, sseu_subslices_per_slice(sseu, s),
> + s, intel_sseu_subslices_per_slice(sseu, s),
> sseu->subslice_mask[s]);
>
> for (ss = 0; ss < sseu->max_subslices; ss++) {
> - u16 enabled_eus = sseu_get_eus(sseu, s, ss);
> + u16 enabled_eus = intel_sseu_get_eus(sseu, s, ss);
>
> drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
> ss, hweight16(enabled_eus), enabled_eus);
> @@ -180,7 +180,7 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
> sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask;
> for (ss = 0; ss < sseu->max_subslices; ss++) {
> if (sseu->subslice_mask[s] & BIT(ss))
> - sseu_set_eus(sseu, s, ss, eu_en);
> + intel_sseu_set_eus(sseu, s, ss, eu_en);
> }
> }
> }
> @@ -222,32 +222,32 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
> /* Slice0 */
> eu_en = ~I915_READ(GEN8_EU_DISABLE0);
> for (ss = 0; ss < sseu->max_subslices; ss++)
> - sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask);
> + intel_sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask);
> /* Slice1 */
> - sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask);
> + intel_sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask);
> eu_en = ~I915_READ(GEN8_EU_DISABLE1);
> - sseu_set_eus(sseu, 1, 1, eu_en & eu_mask);
> + intel_sseu_set_eus(sseu, 1, 1, eu_en & eu_mask);
> /* Slice2 */
> - sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask);
> - sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask);
> + intel_sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask);
> + intel_sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask);
> /* Slice3 */
> - sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask);
> + intel_sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask);
> eu_en = ~I915_READ(GEN8_EU_DISABLE2);
> - sseu_set_eus(sseu, 3, 1, eu_en & eu_mask);
> + intel_sseu_set_eus(sseu, 3, 1, eu_en & eu_mask);
> /* Slice4 */
> - sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask);
> - sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask);
> + intel_sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask);
> + intel_sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask);
> /* Slice5 */
> - sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask);
> + intel_sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask);
> eu_en = ~I915_READ(GEN10_EU_DISABLE3);
> - sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
> + intel_sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
>
> /* Do a second pass where we mark the subslices disabled if all their
> * eus are off.
> */
> for (s = 0; s < sseu->max_slices; s++) {
> for (ss = 0; ss < sseu->max_subslices; ss++) {
> - if (sseu_get_eus(sseu, s, ss) == 0)
> + if (intel_sseu_get_eus(sseu, s, ss) == 0)
> sseu->subslice_mask[s] &= ~BIT(ss);
> }
> }
> @@ -260,9 +260,10 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
> * EU in any one subslice may be fused off for die
> * recovery.
> */
> - sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
> + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
> DIV_ROUND_UP(sseu->eu_total,
> - sseu_subslice_total(sseu)) : 0;
> + intel_sseu_subslice_total(sseu)) :
> + 0;
>
> /* No restrictions on Power Gating */
> sseu->has_slice_pg = 1;
> @@ -290,7 +291,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
>
> sseu->subslice_mask[0] |= BIT(0);
> - sseu_set_eus(sseu, 0, 0, ~disabled_mask);
> + intel_sseu_set_eus(sseu, 0, 0, ~disabled_mask);
> }
>
> if (!(fuse & CHV_FGT_DISABLE_SS1)) {
> @@ -301,7 +302,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
> CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
>
> sseu->subslice_mask[0] |= BIT(1);
> - sseu_set_eus(sseu, 0, 1, ~disabled_mask);
> + intel_sseu_set_eus(sseu, 0, 1, ~disabled_mask);
> }
>
> sseu->eu_total = compute_eu_total(sseu);
> @@ -310,8 +311,8 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
> * CHV expected to always have a uniform distribution of EU
> * across subslices.
> */
> - sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
> - sseu->eu_total / sseu_subslice_total(sseu) :
> + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
> + sseu->eu_total / intel_sseu_subslice_total(sseu) :
> 0;
> /*
> * CHV supports subslice power gating on devices with more than
> @@ -319,7 +320,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
> * more than one EU pair per subslice.
> */
> sseu->has_slice_pg = 0;
> - sseu->has_subslice_pg = sseu_subslice_total(sseu) > 1;
> + sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
> sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
> }
>
> @@ -369,7 +370,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>
> eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask;
>
> - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
> + intel_sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
>
> eu_per_ss = sseu->max_eus_per_subslice -
> hweight8(eu_disabled_mask);
> @@ -393,9 +394,10 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
> * recovery. BXT is expected to be perfectly uniform in EU
> * distribution.
> */
> - sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
> + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
> DIV_ROUND_UP(sseu->eu_total,
> - sseu_subslice_total(sseu)) : 0;
> + intel_sseu_subslice_total(sseu)) :
> + 0;
> /*
> * SKL+ supports slice power gating on devices with more than
> * one slice, and supports EU power gating on devices with
> @@ -407,7 +409,7 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
> sseu->has_slice_pg =
> !IS_GEN9_LP(dev_priv) && hweight8(sseu->slice_mask) > 1;
> sseu->has_subslice_pg =
> - IS_GEN9_LP(dev_priv) && sseu_subslice_total(sseu) > 1;
> + IS_GEN9_LP(dev_priv) && intel_sseu_subslice_total(sseu) > 1;
> sseu->has_eu_pg = sseu->eu_per_subslice > 2;
>
> if (IS_GEN9_LP(dev_priv)) {
> @@ -477,7 +479,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
> eu_disabled_mask =
> eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
>
> - sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
> + intel_sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
>
> n_disabled = hweight8(eu_disabled_mask);
>
> @@ -496,9 +498,10 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
> * subslices with the exception that any one EU in any one subslice may
> * be fused off for die recovery.
> */
> - sseu->eu_per_subslice = sseu_subslice_total(sseu) ?
> + sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
> DIV_ROUND_UP(sseu->eu_total,
> - sseu_subslice_total(sseu)) : 0;
> + intel_sseu_subslice_total(sseu)) :
> + 0;
>
> /*
> * BDW supports slice power gating on devices with more than
> @@ -561,8 +564,8 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
>
> for (s = 0; s < sseu->max_slices; s++) {
> for (ss = 0; ss < sseu->max_subslices; ss++) {
> - sseu_set_eus(sseu, s, ss,
> - (1UL << sseu->eu_per_subslice) - 1);
> + intel_sseu_set_eus(sseu, s, ss,
> + (1UL << sseu->eu_per_subslice) - 1);
> }
> }
>
>
More information about the Intel-gfx
mailing list