[Intel-gfx] [PATCH v3 3/3] drm/i915: Give proper names to MOCS entries

Zhao Yakui yakui.zhao at intel.com
Thu Jul 14 01:38:39 UTC 2016


On 07/13/2016 06:04 PM, Deak, Imre wrote:
> Hi Yakui,
>
> thanks for taking a look at these, see my comment below.
>
> On ke, 2016-07-13 at 10:22 +0800, Zhao Yakui wrote:
>> On 07/01/2016 09:40 PM, Deak, Imre wrote:
>>> The purpose for each MOCS entry isn't well defined atm. Defining these
>>> is important to remove any uncertainty about the use of these entries
>>> for example in terms of performance and GPU/CPU coherency.
>>>
>>> Suggested by Ville.
>>>
>>> CC: Rong R Yang<rong.r.yang at intel.com>
>>> CC: Yakui Zhao<yakui.zhao at intel.com>
>>> CC: Ville Syrjälä<ville.syrjala at linux.intel.com>
>>> CC: Chris Wilson<chris at chris-wilson.co.uk>
>>> Signed-off-by: Imre Deak<imre.deak at intel.com>
>>
>> This looks readable and meaningful after giving proper names to MOCS
>> entry index.
>>
>> But not sure whether the comment of I915_MOCS_CACHE has one typo?
>>
>>> ---
>>>    drivers/gpu/drm/i915/intel_mocs.c | 13 +++++++------
>>>    include/uapi/drm/i915_drm.h       | 24 ++++++++++++++++++++++++
>>>    2 files changed, 31 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
>>> index 927825f..86adc11 100644
>>> --- a/drivers/gpu/drm/i915/intel_mocs.c
>>> +++ b/drivers/gpu/drm/i915/intel_mocs.c
>>> @@ -97,7 +97,8 @@ struct drm_i915_mocs_table {
>>>     *       end.
>>>     */
>>>    static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>>> -	{ /* 0x00000009 */
>>> +	[I915_MOCS_UNCACHED] = {
>>> +	  /* 0x00000009 */
>>>    	  .control_value = LE_CACHEABILITY(LE_UC) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>>    			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
>>> @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>>>    	  /* 0x0010 */
>>>    	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
>>>    	},
>>> -	{
>>> +	[I915_MOCS_AUTO] = {
>>>    	  /* 0x00000038 */
>>>    	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>> @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>>>    	  /* 0x0030 */
>>>    	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
>>>    	},
>>> -	{
>>> +	[I915_MOCS_CACHED] = {
>>>    	  /* 0x0000003b */
>>>    	  .control_value = LE_CACHEABILITY(LE_WB) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>> @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>>>
>>>    /* NOTE: the LE_TGT_CACHE is not used on Broxton */
>>>    static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>>> -	{
>>> +	[I915_MOCS_UNCACHED] = {
>>>    	  /* 0x00000009 */
>>>    	  .control_value = LE_CACHEABILITY(LE_UC) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>> @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>>>    	  /* 0x0010 */
>>>    	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
>>>    	},
>>> -	{
>>> +	[I915_MOCS_AUTO] = {
>>>    	  /* 0x00000038 */
>>>    	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>> @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>>>    	  /* 0x0030 */
>>>    	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
>>>    	},
>>> -	{
>>> +	[I915_MOCS_CACHED] = {
>>>    	  /* 0x00000039 */
>>>    	  .control_value = LE_CACHEABILITY(LE_UC) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index c17d63d..a5d116f 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -62,6 +62,30 @@ extern "C" {
>>>    #define I915_ERROR_UEVENT		"ERROR"
>>>    #define I915_RESET_UEVENT		"RESET"
>>>
>>> +/*
>>> + * MOCS indexes used for GPU surfaces, defining the cacheability of the
>>> + * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
>>> + */
>>> +enum i915_mocs_table_index {
>>> +	/*
>>> +	 * Not cached anywhere, coherency between CPU and GPU accesses is
>>> +	 * guaranteed.
>>> +	 */
>>> +	I915_MOCS_UNCACHED,
>>> +	/*
>>> +	 * Cacheability and coherency controlled by the kernel automatically
>>> +	 * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
>>> +	 * usage of the surface (used for display scanout or not).
>>> +	 */
>>> +	I915_MOCS_AUTO,
>>> +	/*
>>> +	 * Cached in all GPU caches available on the platform.
>>> +	 * Coherency between CPU and GPU accesses to the surface is not
>>> +	 * guaranteed without extra synchronization.
>>> +	 */
>>
>> IMO the coherency is guaranteed without extra synchronization for the
>> MOCS_CACHED.
>
> No. On BXT it will make the data cached in GPU caches but will not keep
> the data coherent between GPU and CPU without extra synchronization.
> For that we would need to enable snooping, but that has considerable
> overhead, so we turn that off in patch 2/3. On SKL using this entry
> happens to give you a coherent mapping, but that's just because the HW
> doesn't allow us to turn off snooping on that platform (supposedly
> because there snooping doesn't have a considerable overhead thanks to
> LLC).

thanks for the detailed explanation.
Now it is clear to me.

Thanks
    Yakui
>
> --Imre
>
>>
>>> +	I915_MOCS_CACHED,
>>> +};
>>> +
>>>    /* Each region is a minimum of 16k, and there are at most 255 of them.
>>>     */
>>>    #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
>>



More information about the Intel-gfx mailing list