[Intel-gfx] [PATCH v2 9/9] drm/i915: Add render decompression support
Ben Widawsky
ben at bwidawsk.net
Tue Feb 28 21:00:17 UTC 2017
On 17-02-28 12:18:39, Jason Ekstrand wrote:
>On Thu, Jan 5, 2017 at 7:14 AM, <ville.syrjala at linux.intel.com> wrote:
>
>> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
>>
>> SKL+ display engine can scan out certain kinds of compressed surfaces
>> produced by the render engine. This involved telling the display engine
>> the location of the color control surfae (CCS) which describes
>> which parts of the main surface are compressed and which are not. The
>> location of CCS is provided by userspace as just another plane with its
>> own offset.
>>
>> Add the required stuff to validate the user provided AUX plane metadata
>> and convert the user provided linear offset into something the hardware
>> can consume.
>>
>> Due to hardware limitations we require that the main surface and
>> the AUX surface (CCS) be part of the same bo. The hardware also
>> makes life hard by not allowing you to provide separate x/y offsets
>> for the main and AUX surfaces (excpet with NV12), so finding suitable
>> offsets for both requires a bit of work. Assuming we still want keep
>> playing tricks with the offsets. I've just gone with a dumb "search
>> backward for suitable offsets" approach, which is far from optimal,
>> but it works.
>>
>> Also not all planes will be capable of scanning out compressed surfaces,
>> and eg. 90/270 degree rotation is not supported in combination with
>> decompression either.
>>
>> This patch may contain work from at least the following people:
>> * Vandana Kannan <vandana.kannan at intel.com>
>> * Daniel Vetter <daniel at ffwll.ch>
>> * Ben Widawsky <ben at bwidawsk.net>
>>
>> v2: Deal with display workarounds 0390, 0531, 1125 (Paulo)
>>
>> Cc: Paulo Zanoni <paulo.r.zanoni at intel.com>
>> Cc: Vandana Kannan <vandana.kannan at intel.com>
>> Cc: Daniel Vetter <daniel at ffwll.ch>
>> Cc: Ben Widawsky <ben at bwidawsk.net>
>> Cc: Jason Ekstrand <jason at jlekstrand.net>
>> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_reg.h | 23 ++++
>> drivers/gpu/drm/i915/intel_display.c | 234 ++++++++++++++++++++++++++++++
>> ++---
>> drivers/gpu/drm/i915/intel_pm.c | 29 ++++-
>> drivers/gpu/drm/i915/intel_sprite.c | 5 +
>> 4 files changed, 274 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_
>> reg.h
>> index 00970aa77afa..6849ba93f1d9 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -6209,6 +6209,28 @@ enum {
>> _ID(id, _PS_ECC_STAT_1A, _PS_ECC_STAT_2A), \
>> _ID(id, _PS_ECC_STAT_1B, _PS_ECC_STAT_2B))
>>
>> +#define PLANE_AUX_DIST_1_A 0x701c0
>> +#define PLANE_AUX_DIST_2_A 0x702c0
>> +#define PLANE_AUX_DIST_1_B 0x711c0
>> +#define PLANE_AUX_DIST_2_B 0x712c0
>> +#define _PLANE_AUX_DIST_1(pipe) \
>> + _PIPE(pipe, PLANE_AUX_DIST_1_A, PLANE_AUX_DIST_1_B)
>> +#define _PLANE_AUX_DIST_2(pipe) \
>> + _PIPE(pipe, PLANE_AUX_DIST_2_A, PLANE_AUX_DIST_2_B)
>> +#define PLANE_AUX_DIST(pipe, plane) \
>> + _MMIO_PLANE(plane, _PLANE_AUX_DIST_1(pipe),
>> _PLANE_AUX_DIST_2(pipe))
>> +
>> +#define PLANE_AUX_OFFSET_1_A 0x701c4
>> +#define PLANE_AUX_OFFSET_2_A 0x702c4
>> +#define PLANE_AUX_OFFSET_1_B 0x711c4
>> +#define PLANE_AUX_OFFSET_2_B 0x712c4
>> +#define _PLANE_AUX_OFFSET_1(pipe) \
>> + _PIPE(pipe, PLANE_AUX_OFFSET_1_A, PLANE_AUX_OFFSET_1_B)
>> +#define _PLANE_AUX_OFFSET_2(pipe) \
>> + _PIPE(pipe, PLANE_AUX_OFFSET_2_A, PLANE_AUX_OFFSET_2_B)
>> +#define PLANE_AUX_OFFSET(pipe, plane) \
>> + _MMIO_PLANE(plane, _PLANE_AUX_OFFSET_1(pipe),
>> _PLANE_AUX_OFFSET_2(pipe))
>> +
>> /* legacy palette */
>> #define _LGC_PALETTE_A 0x4a000
>> #define _LGC_PALETTE_B 0x4a800
>> @@ -6433,6 +6455,7 @@ enum {
>> # define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2)
>>
>> #define CHICKEN_PAR1_1 _MMIO(0x42080)
>> +#define SKL_RC_HASH_OUTSIDE (1 << 15)
>> #define DPA_MASK_VBLANK_SRD (1 << 15)
>> #define FORCE_ARB_IDLE_PLANES (1 << 14)
>> #define SKL_EDP_PSR_FIX_RDWRAP (1 << 3)
>> diff --git a/drivers/gpu/drm/i915/intel_display.c
>> b/drivers/gpu/drm/i915/intel_display.c
>> index 38de9df0ec60..2236abebd8bc 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -2064,11 +2064,19 @@ intel_tile_width_bytes(const struct
>> drm_framebuffer *fb, int plane)
>> return 128;
>> else
>> return 512;
>> + case I915_FORMAT_MOD_Y_TILED_CCS:
>> + if (plane == 1)
>> + return 64;
>> + /* fall through */
>> case I915_FORMAT_MOD_Y_TILED:
>> if (IS_GEN2(dev_priv) || HAS_128_BYTE_Y_TILING(dev_priv))
>> return 128;
>> else
>> return 512;
>> + case I915_FORMAT_MOD_Yf_TILED_CCS:
>> + if (plane == 1)
>> + return 64;
>>
>
>I've said it before but reading through Ben's patches again make me want to
>be peskier about it. I would really like the UABI to treat the CCS as if
>it's Y-tiled with a tile size of 128B x 32 rows. Why? Because this is
>what all the docs say it is. From the display docs for "Color Control
>Surface":
>
>"The Color Control Surface (CCS) contains the compression status of the
>cache-line pairs. The
>compression state of the cache-line pair is specified by 2 bits in the CCS.
>Each CCS cache-line represents
>an area on the main surface of 16 x16 sets of 128 byte Y-tiled
>cache-line-pairs. CCS is always Y tiled."
>
>This contains 95% of the information needed to know the relation between
>the CCS and the main surface. The other 5% (which is badly documented) is
>that cache line pairs are horizontally adjacent. This gives a relationship
>of one cache line in the CCS maps to 32x64 cache lines in the main surface.
>
>But it's not actually Y-tiled? Of course not. I've worked out the exact
>tiling and it looks something like Y but isn't quite the same. However,
>this isn't unique to CCS. Stencil (W-tiled), HiZ, and gen7-8
>single-sampled MCS also each have their own tiling (Haswell MCS is
>especially exotic) but the docs call all of them Y-tiled and I think the
>hardware internally treats them as Y-tiled with the cache lines shuffled
>around a bit.
>
>Given the fact that they seem to like to change the MCS/CCS tiling around
>on every hardware generation, I'm reluctant to base UABI on the fact that
>the CCS appears to have 64x64 "pixels" per tile with each "pixel"
>corresponding to 16x8 pixels in the color surface. That's not what we had
>on any previous gen and may change on gen10 for no aparent reason. I'd
>much rather base it on Y-tiling and a relationship between cache lines
>which, even if they change the exact swizzle on gen10, will probably remain
>the same. (For the gen7-8 analogue of CCS, they changed the tiling every
>generation but the relationship of one MCS cache line maps to 32x128 color
>cache lines remained the same).
>
>Ok, I've said my peice. If we have to divide by 2 in userspace, we won't
>die, but I'd like to get the UABI right before we chissel it in stone.
>
>--Jason
>
>
>> + /* fall through */
>> case I915_FORMAT_MOD_Yf_TILED:
>> /*
>> * Bspec seems to suggest that the Yf tile width would
>> @@ -2156,7 +2164,7 @@ static unsigned int intel_surf_alignment(const
>> struct drm_framebuffer *fb,
>> struct drm_i915_private *dev_priv = to_i915(fb->dev);
>>
>> /* AUX_DIST needs only 4K alignment */
>> - if (fb->format->format == DRM_FORMAT_NV12 && plane == 1)
>> + if (plane == 1)
>> return 4096;
>>
>> switch (fb->modifier) {
>> @@ -2166,6 +2174,8 @@ static unsigned int intel_surf_alignment(const
>> struct drm_framebuffer *fb,
>> if (INTEL_GEN(dev_priv) >= 9)
>> return 256 * 1024;
>> return 0;
>> + case I915_FORMAT_MOD_Y_TILED_CCS:
>> + case I915_FORMAT_MOD_Yf_TILED_CCS:
>> case I915_FORMAT_MOD_Y_TILED:
>> case I915_FORMAT_MOD_Yf_TILED:
>> return 1 * 1024 * 1024;
>> @@ -2472,6 +2482,7 @@ static unsigned int intel_fb_modifier_to_tiling(uint64_t
>> fb_modifier)
>> case I915_FORMAT_MOD_X_TILED:
>> return I915_TILING_X;
>> case I915_FORMAT_MOD_Y_TILED:
>> + case I915_FORMAT_MOD_Y_TILED_CCS:
>> return I915_TILING_Y;
>> default:
>> return I915_TILING_NONE;
>> @@ -2536,6 +2547,35 @@ intel_fill_fb_info(struct drm_i915_private
>> *dev_priv,
>>
>> intel_fb_offset_to_xy(&x, &y, fb, i);
>>
>> + if ((fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
>> + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS) && i ==
>> 1) {
>> + int main_x, main_y;
>> + int ccs_x, ccs_y;
>> +
>> + /*
>> + * Each byte of CCS corresponds to a 16x8 area of
>> the main surface, and
>> + * each CCS tile is 64x64 bytes.
>> + */
>> + ccs_x = (x * 16) % (64 * 16);
>> + ccs_y = (y * 8) % (64 * 8);
>> + main_x = intel_fb->normal[0].x % (64 * 16);
>> + main_y = intel_fb->normal[0].y % (64 * 8);
>> +
>> + /*
>> + * CCS doesn't have its own x/y offset register,
>> so the intra CCS tile
>> + * x/y offsets must match between CCS and the main
>> surface.
>> + */
>> + if (main_x != ccs_x || main_y != ccs_y) {
>> + DRM_DEBUG_KMS("Bad CCS x/y (main %d,%d ccs
>> %d,%d) full (main %d,%d ccs %d,%d)\n",
>> + main_x, main_y,
>> + ccs_x, ccs_y,
>> + intel_fb->normal[0].x,
>> + intel_fb->normal[0].y,
>> + x, y);
>> + return -EINVAL;
>> + }
>> + }
>> +
>> /*
>> * The fence (if used) is aligned to the start of the
>> object
>> * so having the framebuffer wrap around across the edge
>> of the
>> @@ -2873,6 +2913,9 @@ static int skl_max_plane_width(const struct
>> drm_framebuffer *fb, int plane,
>> break;
>> }
>> break;
>> + case I915_FORMAT_MOD_Y_TILED_CCS:
>> + case I915_FORMAT_MOD_Yf_TILED_CCS:
>> + /* FIXME AUX plane? */
>> case I915_FORMAT_MOD_Y_TILED:
>> case I915_FORMAT_MOD_Yf_TILED:
>> switch (cpp) {
>> @@ -2895,6 +2938,42 @@ static int skl_max_plane_width(const struct
>> drm_framebuffer *fb, int plane,
>> return 2048;
>> }
>>
>> +static bool skl_check_main_ccs_coordinates(struct intel_plane_state
>> *plane_state,
>> + int main_x, int main_y, u32
>> main_offset)
>> +{
>> + const struct drm_framebuffer *fb = plane_state->base.fb;
>> + int aux_x = plane_state->aux.x;
>> + int aux_y = plane_state->aux.y;
>> + u32 aux_offset = plane_state->aux.offset;
>> + u32 alignment = intel_surf_alignment(fb, 1);
>> +
>> + while (aux_offset >= main_offset && aux_y <= main_y) {
>> + int x, y;
>> +
>> + if (aux_x == main_x && aux_y == main_y)
>> + break;
>> +
>> + if (aux_offset == 0)
>> + break;
>> +
>> + x = aux_x / 16;
>> + y = aux_y / 8;
>> + aux_offset = intel_adjust_tile_offset(&x, &y, plane_state,
>> 1,
>> + aux_offset,
>> aux_offset - alignment);
>> + aux_x = x * 16 + aux_x % 16;
>> + aux_y = y * 8 + aux_y % 8;
>> + }
>> +
>> + if (aux_x != main_x || aux_y != main_y)
>> + return false;
>> +
>> + plane_state->aux.offset = aux_offset;
>> + plane_state->aux.x = aux_x;
>> + plane_state->aux.y = aux_y;
>> +
>> + return true;
>> +}
>> +
>> static int skl_check_main_surface(struct intel_plane_state *plane_state)
>> {
>> const struct drm_framebuffer *fb = plane_state->base.fb;
>> @@ -2937,7 +3016,7 @@ static int skl_check_main_surface(struct
>> intel_plane_state *plane_state)
>>
>> while ((x + w) * cpp > fb->pitches[0]) {
>> if (offset == 0) {
>> - DRM_DEBUG_KMS("Unable to find suitable
>> display surface offset\n");
>> + DRM_DEBUG_KMS("Unable to find suitable
>> display surface offset due to X-tiling\n");
>> return -EINVAL;
>> }
>>
>> @@ -2946,6 +3025,26 @@ static int skl_check_main_surface(struct
>> intel_plane_state *plane_state)
>> }
>> }
>>
>> + /*
>> + * CCS AUX surface doesn't have its own x/y offsets, we must make
>> sure
>> + * they match with the main surface x/y offsets.
>> + */
>> + if (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
>> + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS) {
>> + while (!skl_check_main_ccs_coordinates(plane_state, x, y,
>> offset)) {
>> + if (offset == 0)
>> + break;
>> +
>> + offset = intel_adjust_tile_offset(&x, &y,
>> plane_state, 0,
>> + offset, offset -
>> alignment);
>> + }
>> +
>> + if (x != plane_state->aux.x || y != plane_state->aux.y) {
>> + DRM_DEBUG_KMS("Unable to find suitable display
>> surface offset due to CCS\n");
>> + return -EINVAL;
>> + }
>> + }
>> +
>> plane_state->main.offset = offset;
>> plane_state->main.x = x;
>> plane_state->main.y = y;
>> @@ -2982,6 +3081,47 @@ static int skl_check_nv12_aux_surface(struct
>> intel_plane_state *plane_state)
>> return 0;
>> }
>>
>> +static int skl_check_ccs_aux_surface(struct intel_plane_state
>> *plane_state)
>> +{
>> + struct intel_plane *plane = to_intel_plane(plane_state->
>> base.plane);
>> + struct intel_crtc *crtc = to_intel_crtc(plane_state->base.crtc);
>> + int src_x = plane_state->base.src.x1 >> 16;
>> + int src_y = plane_state->base.src.y1 >> 16;
>> + int x = src_x / 16;
>> + int y = src_y / 8;
>> + u32 offset;
>> +
>> + switch (plane->id) {
>> + case PLANE_PRIMARY:
>> + case PLANE_SPRITE0:
>> + break;
>> + default:
>> + DRM_DEBUG_KMS("RC support only on plane 1 and 2\n");
>> + return -EINVAL;
>> + }
>> +
>> + if (crtc->pipe == PIPE_C) {
>> + DRM_DEBUG_KMS("No RC support on pipe C\n");
>> + return -EINVAL;
>> + }
>> +
>> + if (plane_state->base.rotation &&
>> + plane_state->base.rotation & ~(DRM_ROTATE_0 | DRM_ROTATE_180))
>> {
>> + DRM_DEBUG_KMS("RC support only with 0/180 degree rotation
>> %x\n",
>> + plane_state->base.rotation);
>> + return -EINVAL;
>> + }
>> +
>> + intel_add_fb_offsets(&x, &y, plane_state, 1);
>> + offset = intel_compute_tile_offset(&x, &y, plane_state, 1);
>> +
>> + plane_state->aux.offset = offset;
>> + plane_state->aux.x = x * 16 + src_x % 16;
>> + plane_state->aux.y = y * 8 + src_y % 8;
>> +
>> + return 0;
>> +}
>> +
>> int skl_check_plane_surface(struct intel_plane_state *plane_state)
>> {
>> const struct drm_framebuffer *fb = plane_state->base.fb;
>> @@ -3002,6 +3142,11 @@ int skl_check_plane_surface(struct
>> intel_plane_state *plane_state)
>> ret = skl_check_nv12_aux_surface(plane_state);
>> if (ret)
>> return ret;
>> + } else if (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
>> + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS) {
>> + ret = skl_check_ccs_aux_surface(plane_state);
>> + if (ret)
>> + return ret;
>> } else {
>> plane_state->aux.offset = ~0xfff;
>> plane_state->aux.x = 0;
>> @@ -3357,8 +3502,12 @@ u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
>> return PLANE_CTL_TILED_X;
>> case I915_FORMAT_MOD_Y_TILED:
>> return PLANE_CTL_TILED_Y;
>> + case I915_FORMAT_MOD_Y_TILED_CCS:
>> + return PLANE_CTL_TILED_Y | PLANE_CTL_DECOMPRESSION_ENABLE;
>> case I915_FORMAT_MOD_Yf_TILED:
>> return PLANE_CTL_TILED_YF;
>> + case I915_FORMAT_MOD_Yf_TILED_CCS:
>> + return PLANE_CTL_TILED_YF | PLANE_CTL_DECOMPRESSION_
>> ENABLE;
>> default:
>> MISSING_CASE(fb_modifier);
>> }
>> @@ -3401,6 +3550,7 @@ static void skylake_update_primary_plane(struct
>> drm_plane *plane,
>> u32 plane_ctl;
>> unsigned int rotation = plane_state->base.rotation;
>> u32 stride = skl_plane_stride(fb, 0, rotation);
>> + u32 aux_stride = skl_plane_stride(fb, 1, rotation);
>> u32 surf_addr = plane_state->main.offset;
>> int scaler_id = plane_state->scaler_id;
>> int src_x = plane_state->main.x;
>> @@ -3436,6 +3586,10 @@ static void skylake_update_primary_plane(struct
>> drm_plane *plane,
>> I915_WRITE(PLANE_OFFSET(pipe, plane_id), (src_y << 16) | src_x);
>> I915_WRITE(PLANE_STRIDE(pipe, plane_id), stride);
>> I915_WRITE(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w);
>> + I915_WRITE(PLANE_AUX_DIST(pipe, plane_id),
>> + (plane_state->aux.offset - surf_addr) | aux_stride);
>> + I915_WRITE(PLANE_AUX_OFFSET(pipe, plane_id),
>> + (plane_state->aux.y << 16) | plane_state->aux.x);
>>
>> if (scaler_id >= 0) {
>> uint32_t ps_ctrl = 0;
>> @@ -9807,10 +9961,16 @@ skylake_get_initial_plane_config(struct
>> intel_crtc *crtc,
>> fb->modifier = I915_FORMAT_MOD_X_TILED;
>> break;
>> case PLANE_CTL_TILED_Y:
>> - fb->modifier = I915_FORMAT_MOD_Y_TILED;
>> + if (val & PLANE_CTL_DECOMPRESSION_ENABLE)
>> + fb->modifier = I915_FORMAT_MOD_Y_TILED_CCS;
>> + else
>> + fb->modifier = I915_FORMAT_MOD_Y_TILED;
>> break;
>> case PLANE_CTL_TILED_YF:
>> - fb->modifier = I915_FORMAT_MOD_Yf_TILED;
>> + if (val & PLANE_CTL_DECOMPRESSION_ENABLE)
>> + fb->modifier = I915_FORMAT_MOD_Yf_TILED_CCS;
>> + else
>> + fb->modifier = I915_FORMAT_MOD_Yf_TILED;
>> break;
>> default:
>> MISSING_CASE(tiling);
>> @@ -12014,7 +12174,7 @@ static void skl_do_mmio_flip(struct intel_crtc
>> *intel_crtc,
>> u32 ctl, stride = skl_plane_stride(fb, 0, rotation);
>>
>> ctl = I915_READ(PLANE_CTL(pipe, 0));
>> - ctl &= ~PLANE_CTL_TILED_MASK;
>> + ctl &= ~(PLANE_CTL_TILED_MASK | PLANE_CTL_DECOMPRESSION_ENABLE);
>> switch (fb->modifier) {
>> case DRM_FORMAT_MOD_NONE:
>> break;
>> @@ -12024,9 +12184,15 @@ static void skl_do_mmio_flip(struct intel_crtc
>> *intel_crtc,
>> case I915_FORMAT_MOD_Y_TILED:
>> ctl |= PLANE_CTL_TILED_Y;
>> break;
>> + case I915_FORMAT_MOD_Y_TILED_CCS:
>> + ctl |= PLANE_CTL_TILED_Y | PLANE_CTL_DECOMPRESSION_ENABLE;
>> + break;
>> case I915_FORMAT_MOD_Yf_TILED:
>> ctl |= PLANE_CTL_TILED_YF;
>> break;
>> + case I915_FORMAT_MOD_Yf_TILED_CCS:
>> + ctl |= PLANE_CTL_TILED_YF | PLANE_CTL_DECOMPRESSION_
>> ENABLE;
>> + break;
>> default:
>> MISSING_CASE(fb->modifier);
>> }
>> @@ -15925,9 +16091,10 @@ static int intel_framebuffer_init(struct
>> drm_device *dev,
>> struct drm_i915_gem_object *obj)
>> {
>> struct drm_i915_private *dev_priv = to_i915(dev);
>> + struct drm_framebuffer *fb = &intel_fb->base;
>> unsigned int tiling = i915_gem_object_get_tiling(obj);
>> - int ret;
>> - u32 pitch_limit, stride_alignment;
>> + int ret, i;
>> + u32 pitch_limit;
>> struct drm_format_name_buf format_name;
>>
>> WARN_ON(!mutex_is_locked(&dev->struct_mutex));
>> @@ -15953,6 +16120,19 @@ static int intel_framebuffer_init(struct
>> drm_device *dev,
>>
>> /* Passed in modifier sanity checking. */
>> switch (mode_cmd->modifier[0]) {
>> + case I915_FORMAT_MOD_Y_TILED_CCS:
>> + case I915_FORMAT_MOD_Yf_TILED_CCS:
>> + switch (mode_cmd->pixel_format) {
>> + case DRM_FORMAT_XBGR8888:
>> + case DRM_FORMAT_ABGR8888:
>> + case DRM_FORMAT_XRGB8888:
>> + case DRM_FORMAT_ARGB8888:
>> + break;
>> + default:
>> + DRM_DEBUG_KMS("RC supported only with RGB8888
>> formats\n");
>> + return -EINVAL;
>> + }
>> + /* fall through */
>> case I915_FORMAT_MOD_Y_TILED:
>> case I915_FORMAT_MOD_Yf_TILED:
>> if (INTEL_GEN(dev_priv) < 9) {
>> @@ -16059,22 +16239,46 @@ static int intel_framebuffer_init(struct
>> drm_device *dev,
>> if (mode_cmd->offsets[0] != 0)
>> return -EINVAL;
>>
>> - drm_helper_mode_fill_fb_struct(dev, &intel_fb->base, mode_cmd);
>> + drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd);
>>
>> - stride_alignment = intel_fb_stride_alignment(&intel_fb->base, 0);
>> - if (mode_cmd->pitches[0] & (stride_alignment - 1)) {
>> - DRM_DEBUG_KMS("pitch (%d) must be at least %u byte
>> aligned\n",
>> - mode_cmd->pitches[0], stride_alignment);
>> - return -EINVAL;
>> + for (i = 0; i < fb->format->num_planes; i++) {
>> + u32 stride_alignment;
>> +
>> + if (mode_cmd->handles[i] != mode_cmd->handles[0]) {
>> + DRM_DEBUG_KMS("bad plane %d handle\n", i);
>> + return -EINVAL;
>> + }
>> +
>> + stride_alignment = intel_fb_stride_alignment(fb, i);
>> +
>> + /*
>> + * Display WA #0531: skl,bxt,kbl,glk
>> + *
>> + * Render decompression and plane width > 3840
>> + * combined with horizontal panning requires the
>> + * plane stride to be a multiple of 4. We'll just
>> + * require the entire fb to accommodate that to avoid
>> + * potential runtime errors at plane configuration time.
>>
>
>Note to Ben: Userspace needs to know about this too. In this case, I
>believe "multiple of 4" means "multiple of 4 tiles". You've never hit this
>because the standard 1920x1080 is 60 tiles wide which is a multiple of 4.
>
>
I'm more likely not hitting it because the width must be > 3840. Okay, I'll
modify my patch - I suppose I can test this with Daniel's Weston branch,
otherwise I'm not sure how I'd hit it.
>> + */
>> + if (IS_GEN9(dev_priv) && i == 0 && fb->width > 3840 &&
>> + (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
>> + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS))
>> + stride_alignment *= 4;
>> +
>> + if (fb->pitches[i] & (stride_alignment - 1)) {
>> + DRM_DEBUG_KMS("plane %d pitch (%d) must be at
>> least %u byte aligned\n",
>> + i, fb->pitches[i], stride_alignment);
>> + return -EINVAL;
>> + }
>> }
>>
>> intel_fb->obj = obj;
>>
>> - ret = intel_fill_fb_info(dev_priv, &intel_fb->base);
>> + ret = intel_fill_fb_info(dev_priv, fb);
>> if (ret)
>> return ret;
>>
>> - ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs);
>> + ret = drm_framebuffer_init(dev, fb, &intel_fb_funcs);
>> if (ret) {
>> DRM_ERROR("framebuffer init failed %d\n", ret);
>> return ret;
>> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_
>> pm.c
>> index 249623d45be0..25782cd174c0 100644
>> --- a/drivers/gpu/drm/i915/intel_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_pm.c
>> @@ -62,6 +62,20 @@ static void gen9_init_clock_gating(struct
>> drm_i915_private *dev_priv)
>> I915_WRITE(CHICKEN_PAR1_1,
>> I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
>>
>> + /*
>> + * Display WA#0390: skl,bxt,kbl,glk
>> + *
>> + * Must match Sampler, Pixel Back End, and Media
>> + * (0xE194 bit 8, 0x7014 bit 13, 0x4DDC bits 27 and 31).
>> + *
>> + * Including bits outside the page in the hash would
>> + * require 2 (or 4?) MiB alignment of resources. Just
>> + * assume the defaul hashing mode which only uses bits
>> + * within the page.
>> + */
>> + I915_WRITE(CHICKEN_PAR1_1,
>> + I915_READ(CHICKEN_PAR1_1) & ~SKL_RC_HASH_OUTSIDE);
>> +
>> I915_WRITE(GEN8_CONFIG0,
>> I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES);
>>
>> @@ -3314,7 +3328,9 @@ skl_ddb_min_alloc(const struct drm_plane_state
>> *pstate,
>>
>> /* For Non Y-tile return 8-blocks */
>> if (fb->modifier != I915_FORMAT_MOD_Y_TILED &&
>> - fb->modifier != I915_FORMAT_MOD_Yf_TILED)
>> + fb->modifier != I915_FORMAT_MOD_Yf_TILED &&
>> + fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS &&
>> + fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS)
>> return 8;
>>
>> src_w = drm_rect_width(&intel_pstate->base.src) >> 16;
>> @@ -3590,7 +3606,9 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>> }
>>
>> y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED ||
>> - fb->modifier == I915_FORMAT_MOD_Yf_TILED;
>> + fb->modifier == I915_FORMAT_MOD_Yf_TILED ||
>> + fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
>> + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS;
>> x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED;
>>
>> /* Display WA #1141: kbl. */
>> @@ -3675,6 +3693,13 @@ static int skl_compute_plane_wm(const struct
>> drm_i915_private *dev_priv,
>> res_lines = DIV_ROUND_UP(selected_result.val,
>> plane_blocks_per_line.val);
>>
>> + /* Display WA #1125: skl,bxt,kbl,glk */
>> + if (level == 0 &&
>> + (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS ||
>> + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS))
>> + res_blocks += fixed_16_16_to_u32_round_up(y_tile_minimum);
>> +
>> + /* Display WA #1126: skl,bxt,kbl,glk */
>> if (level >= 1 && level <= 7) {
>> if (y_tiled) {
>> res_blocks += fixed_16_16_to_u32_round_up(y_
>> tile_minimum);
>> diff --git a/drivers/gpu/drm/i915/intel_sprite.c
>> b/drivers/gpu/drm/i915/intel_sprite.c
>> index 7031bc733d97..063a994815d0 100644
>> --- a/drivers/gpu/drm/i915/intel_sprite.c
>> +++ b/drivers/gpu/drm/i915/intel_sprite.c
>> @@ -210,6 +210,7 @@ skl_update_plane(struct drm_plane *drm_plane,
>> u32 surf_addr = plane_state->main.offset;
>> unsigned int rotation = plane_state->base.rotation;
>> u32 stride = skl_plane_stride(fb, 0, rotation);
>> + u32 aux_stride = skl_plane_stride(fb, 1, rotation);
>> int crtc_x = plane_state->base.dst.x1;
>> int crtc_y = plane_state->base.dst.y1;
>> uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
>> @@ -248,6 +249,10 @@ skl_update_plane(struct drm_plane *drm_plane,
>> I915_WRITE(PLANE_OFFSET(pipe, plane_id), (y << 16) | x);
>> I915_WRITE(PLANE_STRIDE(pipe, plane_id), stride);
>> I915_WRITE(PLANE_SIZE(pipe, plane_id), (src_h << 16) | src_w);
>> + I915_WRITE(PLANE_AUX_DIST(pipe, plane_id),
>> + (plane_state->aux.offset - surf_addr) | aux_stride);
>> + I915_WRITE(PLANE_AUX_OFFSET(pipe, plane_id),
>> + (plane_state->aux.y << 16) | plane_state->aux.x);
>>
>> /* program plane scaler */
>> if (plane_state->scaler_id >= 0) {
>> --
>> 2.10.2
>>
>>
More information about the Intel-gfx
mailing list