[PATCH v2 12/16] drm/msm/dpu: add an API to setup the CDM block for writeback

Fri Dec 8 22:48:38 UTC 2023

On 12/8/2023 12:55 PM, Dmitry Baryshkov wrote:
> On Fri, 8 Dec 2023 at 19:28, Abhinav Kumar <quic_abhinavk at quicinc.com> wrote:
>> On 12/8/2023 3:52 AM, Dmitry Baryshkov wrote:
>>> On Fri, 8 Dec 2023 at 07:07, Abhinav Kumar <quic_abhinavk at quicinc.com> wrote:
>>>>
>>>> Add an API dpu_encoder_helper_phys_setup_cdm() which can be used by
>>>> the writeback encoder to setup the CDM block.
>>>>
>>>> Currently, this is defined and used within the writeback's physical
>>>> encoder layer however, the function can be modified to be used to setup
>>>> the CDM block even for non-writeback interfaces.
>>>>
>>>> Until those modifications are planned and made, keep it local to
>>>> writeback.
>>>>
>>>> changes in v2:
>>>>           - add the RGB2YUV CSC matrix to dpu util as needed by CDM
>>>>           - use dpu_hw_get_csc_cfg() to get and program CSC
>>>>           - drop usage of setup_csc_data() and setup_cdwn() cdm ops
>>>>             as they both have been merged into enable()
>>>>           - drop reduntant hw_cdm and hw_pp checks
>>>>
>>>> Signed-off-by: Abhinav Kumar <quic_abhinavk at quicinc.com>
>>>> ---
>>>>    .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h  |  3 +
>>>>    .../drm/msm/disp/dpu1/dpu_encoder_phys_wb.c   | 96 ++++++++++++++++++-
>>>>    drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c   | 17 ++++
>>>>    drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h   |  1 +
>>>>    4 files changed, 116 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>>>> index 410f6225789c..1d6d1eb642b9 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>>>> @@ -16,6 +16,7 @@
>>>>    #include "dpu_hw_pingpong.h"
>>>>    #include "dpu_hw_ctl.h"
>>>>    #include "dpu_hw_top.h"
>>>> +#include "dpu_hw_cdm.h"
>>>>    #include "dpu_encoder.h"
>>>>    #include "dpu_crtc.h"
>>>>
>>>> @@ -210,6 +211,7 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys)
>>>>     * @wbirq_refcount:     Reference count of writeback interrupt
>>>>     * @wb_done_timeout_cnt: number of wb done irq timeout errors
>>>>     * @wb_cfg:  writeback block config to store fb related details
>>>> + * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration
>>>>     * @wb_conn: backpointer to writeback connector
>>>>     * @wb_job: backpointer to current writeback job
>>>>     * @dest:   dpu buffer layout for current writeback output buffer
>>>> @@ -219,6 +221,7 @@ struct dpu_encoder_phys_wb {
>>>>           atomic_t wbirq_refcount;
>>>>           int wb_done_timeout_cnt;
>>>>           struct dpu_hw_wb_cfg wb_cfg;
>>>> +       struct dpu_hw_cdm_cfg cdm_cfg;
>>>>           struct drm_writeback_connector *wb_conn;
>>>>           struct drm_writeback_job *wb_job;
>>>>           struct dpu_hw_fmt_layout dest;
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>>>> index 4665367cf14f..85429c62d727 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>>>> @@ -259,6 +259,99 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
>>>>           }
>>>>    }
>>>>
>>>> +/**
>>>> + * dpu_encoder_phys_wb_setup_cdp - setup chroma down sampling block
>>>> + * @phys_enc:Pointer to physical encoder
>>>> + */
>>>> +static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc)
>>>> +{
>>>> +       struct dpu_hw_cdm *hw_cdm;
>>>> +       struct dpu_hw_cdm_cfg *cdm_cfg;
>>>> +       struct dpu_hw_pingpong *hw_pp;
>>>> +       struct dpu_encoder_phys_wb *wb_enc;
>>>> +       const struct msm_format *format;
>>>> +       const struct dpu_format *dpu_fmt;
>>>> +       struct drm_writeback_job *wb_job;
>>>> +       int ret;
>>>> +
>>>> +       if (!phys_enc)
>>>> +               return;
>>>> +
>>>> +       wb_enc = to_dpu_encoder_phys_wb(phys_enc);
>>>> +       cdm_cfg = &wb_enc->cdm_cfg;
>>>> +       hw_pp = phys_enc->hw_pp;
>>>> +       hw_cdm = phys_enc->hw_cdm;
>>>> +       wb_job = wb_enc->wb_job;
>>>> +
>>>> +       format = msm_framebuffer_format(wb_enc->wb_job->fb);
>>>> +       dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier);
>>>> +
>>>> +       if (!hw_cdm)
>>>> +               return;
>>>> +
>>>> +       if (!DPU_FORMAT_IS_YUV(dpu_fmt)) {
>>>> +               DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent),
>>>> +                         dpu_fmt->base.pixel_format);
>>>> +               if (hw_cdm->ops.disable)
>>>> +                       hw_cdm->ops.disable(hw_cdm);
>>>> +
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg));
>>>> +
>>>> +       cdm_cfg->output_width = wb_job->fb->width;
>>>> +       cdm_cfg->output_height = wb_job->fb->height;
>>>> +       cdm_cfg->output_fmt = dpu_fmt;
>>>> +       cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB;
>>>> +       cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ?
>>>> +                       CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT;
>>>> +       cdm_cfg->csc_cfg = dpu_hw_get_csc_cfg(DPU_HW_RGB2YUV_601L_10BIT);
>>>> +       if (!cdm_cfg->csc_cfg) {
>>>> +               DPU_ERROR("valid csc not found\n");
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       /* enable 10 bit logic */
>>>> +       switch (cdm_cfg->output_fmt->chroma_sample) {
>>>> +       case DPU_CHROMA_RGB:
>>>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
>>>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>>>> +               break;
>>>> +       case DPU_CHROMA_H2V1:
>>>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
>>>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>>>> +               break;
>>>> +       case DPU_CHROMA_420:
>>>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
>>>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE;
>>>> +               break;
>>>> +       case DPU_CHROMA_H1V2:
>>>> +       default:
>>>> +               DPU_ERROR("[enc:%d] unsupported chroma sampling type\n",
>>>> +                         DRMID(phys_enc->parent));
>>>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
>>>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>>>
>>> If it is unsupported, we should return an error here.
>>>
>>
>> The caller of this API and the caller of the API even before that do not
>> have error checking as they are all void. Disabling CDWN is the
>> appropriate corrective action for this case and should be sufficient.
> 
> Ack. Could you please document that DPU_CHROMA_H1V2 is invalid for this API?
> 
>>
>>>> +               break;
>>>> +       }
>>>> +
>>>> +       DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n",
>>>> +                 DRMID(phys_enc->parent), cdm_cfg->output_width,
>>>> +                 cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format,
>>>> +                 cdm_cfg->output_type, cdm_cfg->output_bit_depth,
>>>> +                 cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type);
>>>> +
>>>> +       if (hw_cdm->ops.enable) {
>>>> +               cdm_cfg->pp_id = hw_pp->idx;
>>>> +               ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg);
>>>> +               if (ret < 0) {
>>>> +                       DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n",
>>>> +                                 DRMID(phys_enc->parent), ret);
>>>> +                       return;
>>>> +               }
>>>> +       }
>>>> +}
>>>> +
>>>>    /**
>>>>     * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states
>>>>     * @phys_enc:  Pointer to physical encoder
>>>> @@ -382,8 +475,9 @@ static void dpu_encoder_phys_wb_setup(
>>>>
>>>>           dpu_encoder_phys_wb_setup_fb(phys_enc, fb);
>>>>
>>>> -       dpu_encoder_phys_wb_setup_ctl(phys_enc);
>>>> +       dpu_encoder_helper_phys_setup_cdm(phys_enc);
>>>>
>>>> +       dpu_encoder_phys_wb_setup_ctl(phys_enc);
>>>>    }
>>>>
>>>>    /**
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>>>> index 59a153331194..34143491aba2 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>>>> @@ -87,6 +87,8 @@ static u32 dpu_hw_util_log_mask = DPU_DBG_MASK_NONE;
>>>>    #define QOS_QOS_CTRL_VBLANK_EN            BIT(16)
>>>>    #define QOS_QOS_CTRL_CREQ_VBLANK_MASK     GENMASK(21, 20)
>>>>
>>>> +#define TO_S15D16(_x_)((_x_) << 7)
>>>
>>> Huh? I don't understand why it is shifted by 7. If you have data in
>>> S8.9 format, I'd say that it makes things less obvious compared to
>>> S15.16 (where you can perform division on the fly).
>>>
>>
>> I was referring to below comment and also because the values are in
>> S15.16 in
>>
>> https://git.codelinaro.org/clo/la/platform/vendor/opensource/display-drivers/-/blob/clo/main/msm/sde/sde_encoder_phys_wb.c?ref_type=heads#L35
> 
> Yes, I saw that. My first thought was that <<7 looked
> counterintuitive. You can not perform maths easily with S4.9 values.
> At least I can not guess the actual value of 0x083 or 0xff37.
> 
> I thought about writing data in S15.16 directly (e.g. 0x4180 instead
> of 0x083, 0x8100 instead of 0x102). This way it would be easier to
> understand that the first row is 65.5 / 256, 128 / 256, 25 / 256. And
> this is what we had for existing DPU CSC tables.
> 
> But after looking at the dpu_hw_csc_setup() I actually fail to
> understand why we need the S15.16 values. The hardware works with S4.9
> in the end. So it looks pretty strange to convert S4.9 to S15.16 via
> the macro only to convert it back to S4.9 in the CSC setup code.
> 
> Maybe we can go the following route:
> - Merge this series using CSC tables as is (with your TO_S15D16 macro)
> - Drop the <<7 from dpu_hw_csc_setup() (and drop the macro too)
> - Define S4P9(floor, nom, den) and S4P9_NEG(floor, nom, den) macros to
> be used for the matrix values
> - Merge the DPU tables with MDP tables (which use S4.9 already)
> 
> I'd also like to check if we can drop two versions of clamp values
> (for 8bit and for 10bit) and convert those values on the fly.
> 

Yes, I am fine with this approach. That will also allow me to 
investigate the tables not just from WB context but from others as well.

>>
>> 428 struct dpu_csc_cfg {
>> 429     /* matrix coefficients in S15.16 format */
>> 430     uint32_t csc_mv[DPU_CSC_MATRIX_COEFF_SIZE];
>> 431     uint32_t csc_pre_bv[DPU_CSC_BIAS_SIZE];
>> 432     uint32_t csc_post_bv[DPU_CSC_BIAS_SIZE];
>> 433     uint32_t csc_pre_lv[DPU_CSC_CLAMP_SIZE];
>> 434     uint32_t csc_post_lv[DPU_CSC_CLAMP_SIZE];
>> 435 };
>> 436
>>
>>
>>>> +
>>>>    static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
>>>>           {
>>>>                   /* S15.16 format */
>>>> @@ -117,6 +119,18 @@ static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
>>>>           { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
>>>>    };
>>>>
>>>> +static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
>>>> +       {
>>>> +               TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
>>>> +               TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
>>>> +               TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
>>>> +       },
>>>> +       { 0x00, 0x00, 0x00 },
>>>> +       { 0x0040, 0x0200, 0x0200 },
>>>> +       { 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
>>>> +       { 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
>>>> +};
>>>> +
>>>>    /**
>>>>     * dpu_hw_get_csc_cfg - get the CSC matrix based on the request type
>>>>     * @type:              type of the requested CSC matrix from caller
>>>> @@ -133,6 +147,9 @@ const struct dpu_csc_cfg *dpu_hw_get_csc_cfg(enum dpu_hw_csc_cfg_type type)
>>>>           case DPU_HW_YUV2RGB_601L_10BIT:
>>>>                   csc_cfg = &dpu_csc10_YUV2RGB_601L;
>>>>                   break;
>>>> +       case DPU_HW_RGB2YUV_601L_10BIT:
>>>> +               csc_cfg = &dpu_csc10_rgb2yuv_601l;
>>>> +               break;
>>>>           default:
>>>>                   DPU_ERROR("unknown csc_cfg type\n");
>>>>                   break;
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>>>> index 49f2bcf6de15..ed153d66f660 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>>>> @@ -22,6 +22,7 @@
>>>>    enum dpu_hw_csc_cfg_type {
>>>>           DPU_HW_YUV2RGB_601L,
>>>>           DPU_HW_YUV2RGB_601L_10BIT,
>>>> +       DPU_HW_RGB2YUV_601L_10BIT,
>>>>    };
>>>>
>>>>    /*
>>>> --
>>>> 2.40.1
>>>>
>>>
>>>
> 
> 
> 
> --
> With best wishes
> Dmitry