Hi Nicolas,
Thanks for the review. I have changed the points I don't comment on here according to your suggestions.
+#include <linux/regmap.h> +#include <linux/mfd/syscon.h> +#include <linux/delay.h> +#include <linux/swab.h> +#include <linux/bitfield.h> +#include <drm/drm_debugfs.h> +#include <uapi/linux/videodev2.h> +#include <drm/drm_vblank.h> +#include <dt-bindings/soc/rockchip,vop2.h>
Alphabetically sort these includes? Not sure on what the convention is in the DRM subsystem.
Me neither. The first random file I looked at in drivers/gpu/drm/ has the includes sorted alphabetically, so I'll do the same.
+static void vop2_cfg_done(struct vop2_video_port *vp) +{
- struct vop2 *vop2 = vp->vop2;
- uint32_t val;
- val = vop2_readl(vop2, RK3568_REG_CFG_DONE);
- val &= 0x7;
GENMASK(2, 0) instead of 0x7, should that be a constant somewhere?
- vop2_writel(vop2, RK3568_REG_CFG_DONE,
val | BIT(vp->id) | RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
Replaced that with the following which doesn't need a mask:
regmap_set_bits(vop2->map, RK3568_REG_CFG_DONE, BIT(vp->id) | RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
+}
+static void vop2_win_disable(struct vop2_win *win) +{
- vop2_win_write(win, VOP2_WIN_ENABLE, 0);
- if (vop2_cluster_window(win))
vop2_win_write(win, VOP2_WIN_CLUSTER_ENABLE, 0);
+}
+static uint32_t vop2_afbc_transform_offset(struct drm_plane_state *pstate,
bool afbc_half_block_en)
+{
- struct drm_rect *src = &pstate->src;
- struct drm_framebuffer *fb = pstate->fb;
- uint32_t bpp = fb->format->cpp[0] * 8;
- uint32_t vir_width = (fb->pitches[0] << 3) / bpp;
- uint32_t width = drm_rect_width(src) >> 16;
- uint32_t height = drm_rect_height(src) >> 16;
- uint32_t act_xoffset = src->x1 >> 16;
- uint32_t act_yoffset = src->y1 >> 16;
- uint32_t align16_crop = 0;
- uint32_t align64_crop = 0;
- uint32_t height_tmp = 0;
- uint32_t transform_tmp = 0;
- uint8_t transform_xoffset = 0;
- uint8_t transform_yoffset = 0;
- uint8_t top_crop = 0;
- uint8_t top_crop_line_num = 0;
- uint8_t bottom_crop_line_num = 0;
- /* 16 pixel align */
- if (height & 0xf)
align16_crop = 16 - (height & 0xf);
- height_tmp = height + align16_crop;
- /* 64 pixel align */
- if (height_tmp & 0x3f)
align64_crop = 64 - (height_tmp & 0x3f);
- top_crop_line_num = top_crop << 2;
- if (top_crop == 0)
bottom_crop_line_num = align16_crop + align64_crop;
- else if (top_crop == 1)
bottom_crop_line_num = align16_crop + align64_crop + 12;
- else if (top_crop == 2)
bottom_crop_line_num = align16_crop + align64_crop + 8;
top_crop == 0 is always taken from what I can gather, rest is dead code. Is this intentional? It doesn't seem intentional.
It's the same in the downstream driver. I don't know what top_crop is good for. I just removed the dead code for now.
transform_xoffset = transform_tmp & 0xf;
transform_tmp = vir_width - width - act_xoffset;
transform_yoffset = transform_tmp & 0xf;
break;
- case 0:
transform_tmp = act_xoffset;
transform_xoffset = transform_tmp & 0xf;
transform_tmp = top_crop_line_num + act_yoffset;
if (afbc_half_block_en)
transform_yoffset = transform_tmp & 0x7;
else
transform_yoffset = transform_tmp & 0xf;
break;
- }
- return (transform_xoffset & 0xf) | ((transform_yoffset & 0xf) << 16);
+}
0xf, 0x3f, 0x7 might be more clear as GENMASK. if (afbc_half_block_en) branches can be moved out of cases and assign a transform mask variable instead.
Sure. Other than that the function can be simplified a bit more.
+/*
- A Cluster window has 2048 x 16 line buffer, which can
- works at 2048 x 16(Full) or 4096 x 8 (Half) mode.
- for Cluster_lb_mode register:
- 0: half mode, for plane input width range 2048 ~ 4096
- 1: half mode, for cluster work at 2 * 2048 plane mode
- 2: half mode, for rotate_90/270 mode
- */
+static int vop2_get_cluster_lb_mode(struct vop2_win *win, struct drm_plane_state *pstate) +{
- if ((pstate->rotation & DRM_MODE_ROTATE_270) || (pstate->rotation & DRM_MODE_ROTATE_90))
return 2;
- else
return 0;
+}
What about 1?
1 is used in the downstream driver for cluster sub windows. These are currently not supported, I don't know yet where this is leading to.
+/*
- bli_sd_factor = (src - 1) / (dst - 1) << 12;
- avg_sd_factor:
- bli_su_factor:
- bic_su_factor:
- = (src - 1) / (dst - 1) << 16;
- gt2 enable: dst get one line from two line of the src
- gt4 enable: dst get one line from four line of the src.
- */
+static uint16_t vop2_scale_factor(enum scale_mode mode,
int32_t filter_mode,
uint32_t src, uint32_t dst)
+{
- uint32_t fac;
- int i;
- if (mode == SCALE_NONE)
return 0;
- /*
* A workaround to avoid zero div.
*/
- if (dst == 1 || src == 1) {
dst++;
src++;
- }
- if ((mode == SCALE_DOWN) && (filter_mode == VOP2_SCALE_DOWN_BIL)) {
fac = ((src - 1) << 12) / (dst - 1);
for (i = 0; i < 100; i++) {
if (fac * (dst - 1) >> 12 < (src - 1))
break;
fac -= 1;
DRM_DEBUG("down fac cali: src:%d, dst:%d, fac:0x%x\n", src, dst, fac);
}
- } else {
fac = ((src - 1) << 16) / (dst - 1);
for (i = 0; i < 100; i++) {
if (fac * (dst - 1) >> 16 < (src - 1))
break;
fac -= 1;
DRM_DEBUG("up fac cali: src:%d, dst:%d, fac:0x%x\n", src, dst, fac);
}
- }
- return fac;
+}
src = 0 or dst = 0 causes an uint underflow here.
Right. Looking at it these loops are rather unnecessary and duplicating the code also. The whole function goes down to:
static uint16_t vop2_scale_factor(uint32_t src, uint32_t dst) { uint32_t fac; int shift;
if (src == dst) return 0;
if (dst < 2) return U16_MAX;
if (src < 2) return 0;
if (src > dst) shift = 12; else shift = 16;
src--; dst--;
fac = DIV_ROUND_UP(src << shift, dst) - 1;
if (fac > U16_MAX) return U16_MAX;
return fac; }
+static void vop2_enable(struct vop2 *vop2) +{
- int ret;
- uint32_t v;
- ret = pm_runtime_get_sync(vop2->dev);
- if (ret < 0) {
drm_err(vop2->drm, "failed to get pm runtime: %d\n", ret);
return;
- }
- ret = vop2_core_clks_prepare_enable(vop2);
- if (ret) {
pm_runtime_put_sync(vop2->dev);
return;
- }
- if (vop2->data->soc_id == 3566)
vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
- vop2_writel(vop2, RK3568_REG_CFG_DONE, RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN);
- /*
* Disable auto gating, this is a workaround to
* avoid display image shift when a window enabled.
*/
- v = vop2_readl(vop2, RK3568_SYS_AUTO_GATING_CTRL);
- v &= ~RK3568_SYS_AUTO_GATING_CTRL__AUTO_GATING_EN;
- vop2_writel(vop2, RK3568_SYS_AUTO_GATING_CTRL, v);
- vop2_writel(vop2, RK3568_SYS0_INT_CLR,
VOP2_INT_BUS_ERRPR << 16 | VOP2_INT_BUS_ERRPR);
- vop2_writel(vop2, RK3568_SYS0_INT_EN,
VOP2_INT_BUS_ERRPR << 16 | VOP2_INT_BUS_ERRPR);
- vop2_writel(vop2, RK3568_SYS1_INT_CLR,
VOP2_INT_BUS_ERRPR << 16 | VOP2_INT_BUS_ERRPR);
- vop2_writel(vop2, RK3568_SYS1_INT_EN,
VOP2_INT_BUS_ERRPR << 16 | VOP2_INT_BUS_ERRPR);
+}
Does reg writes but doesn't have the spinlock.
I didn't care about it at all when working on this driver, so we can be sure it's at the wrong places right now.
I think we can just drop the register spinlock. The driver uses regmap now, so read-modify-write accesses are covered by the regmap internal locking as long as we use regmap_update_bits(). The remaining accesses are either orthogonal anyway or are covered by the vop2_lock mutex.
- vop2_lock(vop2);
- ret = clk_prepare_enable(vp->dclk);
- if (ret < 0) {
drm_err(vop2->drm, "failed to enable dclk for video port%d - %d\n",
vp->id, ret);
return;
vop2_lock is never unlocked in this branch
I'll move the call to vop2_lock() below the clk_prepare_enable().
- }
- pm_runtime_put(vop2->dev);
- return ret;
+}
Does reg writes, does the ISR need the reg spinlock here? I'm not sure.
In general there appear to be a lot of issues with the register lock, so either I'm misunderstanding what it's supposed to protect or the code is very thread unsafe.
Yeah, the original driver had the spinlock and as said I kept it but didn't maintain it. Let's remove it for now.
Sascha