[PATCH v2 2/5] gpu: ipu-v3: Add mem2mem image conversion support to IC
Jean-Michel Hautbois
jean-michel.hautbois at veo-labs.com
Wed May 27 11:42:02 PDT 2015
Hi Philipp, Lucas and Sascha,
Thanks for that patch series.
2015-03-18 11:22 GMT+01:00 Philipp Zabel <p.zabel at pengutronix.de>:
>
> This patch adds support for mem2mem scaling and colorspace conversion
> using the IC module's post-processing task.
>
> Scaling images larger than 1024x1024 is supported by tiling over multiple
> IC scaling runs. Since the IDMAC and IC units have interesting and different
> alignment limitations for buffer base addresses (left edges) and burst size
> (row lengths), depending on input and output pixel formats, the tile rectangles
> and scaling coefficients are chosen to minimize distortion. Due to possible
> overlap, the tiles have to be rendered right to left and bottom to top.
> Up to 7 pixels (depending on frame sizes and scaling factor) have to be
> available after the end of the frame if the width is not burst size aligned.
> The tiling code has a parameter to optionally round frame sizes up or down
> and avoid overdraw in compositing scenarios.
Can you detail what you call "compositing scenarios" ?
>
> Signed-off-by: Sascha Hauer <s.hauer at pengutronix.de>
> Signed-off-by: Lucas Stach <l.stach at pengutronix.de>
> Signed-off-by: Philipp Zabel <p.zabel at pengutronix.de>
> ---
> Changes since v1:
> - Removed deinterlacer support left-overs
> ---
> drivers/gpu/ipu-v3/ipu-ic.c | 787 +++++++++++++++++++++++++++++++++++++++++++-
> include/video/imx-ipu-v3.h | 34 +-
> 2 files changed, 804 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c
> index ad75588..984f68f 100644
> --- a/drivers/gpu/ipu-v3/ipu-ic.c
> +++ b/drivers/gpu/ipu-v3/ipu-ic.c
> @@ -15,6 +15,7 @@
> #include <linux/errno.h>
> #include <linux/spinlock.h>
> #include <linux/bitrev.h>
> +#include <linux/interrupt.h>
> #include <linux/io.h>
> #include <linux/err.h>
> #include "ipu-prv.h"
> @@ -96,6 +97,15 @@ struct ic_task_bitfields {
> u32 ic_cmb_galpha_bit;
> };
>
> +struct ic_task_channels {
> + u8 in;
> + u8 out;
> + u8 rot_in;
> + u8 rot_out;
> + u8 in_prev;
> + u8 in_next;
> +};
> +
> static const struct ic_task_regoffs ic_task_reg[IC_NUM_TASKS] = {
> [IC_TASK_ENCODER] = {
> .rsc = IC_PRP_ENC_RSC,
> @@ -138,12 +148,53 @@ static const struct ic_task_bitfields ic_task_bit[IC_NUM_TASKS] = {
> },
> };
>
> +static const struct ic_task_channels ic_task_ch[IC_NUM_TASKS] = {
> + [IC_TASK_ENCODER] = {
> + .in = IPUV3_CHANNEL_MEM_IC_PRP_VF,
> + .out = IPUV3_CHANNEL_IC_PRP_ENC_MEM,
> + .rot_in = IPUV3_CHANNEL_MEM_ROT_ENC,
> + .rot_out = IPUV3_CHANNEL_ROT_ENC_MEM,
> + },
> + [IC_TASK_VIEWFINDER] = {
> + .in = IPUV3_CHANNEL_MEM_VDI_CUR,
> + .out = IPUV3_CHANNEL_IC_PRP_VF_MEM,
> + .rot_in = IPUV3_CHANNEL_MEM_ROT_VF,
> + .rot_out = IPUV3_CHANNEL_ROT_VF_MEM,
> + .in_prev = IPUV3_CHANNEL_MEM_VDI_PREV,
> + .in_next = IPUV3_CHANNEL_MEM_VDI_NEXT,
> + },
> + [IC_TASK_POST_PROCESSOR] = {
> + .in = IPUV3_CHANNEL_MEM_IC_PP,
> + .out = IPUV3_CHANNEL_IC_PP_MEM,
> + .rot_in = IPUV3_CHANNEL_MEM_ROT_PP,
> + .rot_out = IPUV3_CHANNEL_ROT_PP_MEM,
> + },
> +};
> +
> +struct image_convert_ctx {
> + void (*complete)(void *ctx, int err);
> + void *complete_context;
> +
> + struct list_head list;
> + struct ipu_image in;
> + struct ipu_image in_n;
> + struct ipu_image in_p;
> + struct ipu_image out;
> +
> + void *freep;
> +
> + bool rotate:1;
> +
> + u32 rsc;
> +};
> +
> struct ipu_ic_priv;
>
> struct ipu_ic {
> enum ipu_ic_task task;
> const struct ic_task_regoffs *reg;
> const struct ic_task_bitfields *bit;
> + const struct ic_task_channels *ch;
>
> enum ipu_color_space in_cs, g_in_cs;
> enum ipu_color_space out_cs;
> @@ -152,6 +203,19 @@ struct ipu_ic {
> bool in_use;
>
> struct ipu_ic_priv *priv;
> +
> + struct ipuv3_channel *input_channel_p;
> + struct ipuv3_channel *input_channel;
> + struct ipuv3_channel *input_channel_n;
> + struct ipuv3_channel *output_channel;
> + struct ipuv3_channel *rotation_input_channel;
> + struct ipuv3_channel *rotation_output_channel;
> +
> + struct list_head image_list;
> +
> + struct workqueue_struct *workqueue;
> + struct work_struct work;
> + struct completion complete;
> };
As this is a workqueue, it can sleep, and you don't know when it is
called exactly.
Can we be sure that it is "real-time" compatible ? If you have this
scaler after a capture source, and before the coda driver, you can be
starved of buffers ?
And you can even have multiple instances of the scaler, so you
probably can get into troubles if there is not enough buffers on the
capture and output queues, right ?
I have played with it a bit and have been successful having two
instances on IPU1 and two other on IPU2.
But I don't know if there can be side effects...
JM
>
> struct ipu_ic_priv {
> @@ -168,7 +232,8 @@ static inline u32 ipu_ic_read(struct ipu_ic *ic, unsigned offset)
> return readl(ic->priv->base + offset);
> }
>
> -static inline void ipu_ic_write(struct ipu_ic *ic, u32 value, unsigned offset)
> +static inline void ipu_ic_write(struct ipu_ic *ic, u32 value,
> + unsigned offset)
> {
> writel(value, ic->priv->base + offset);
> }
> @@ -446,32 +511,35 @@ int ipu_ic_task_init(struct ipu_ic *ic,
> int in_width, int in_height,
> int out_width, int out_height,
> enum ipu_color_space in_cs,
> - enum ipu_color_space out_cs)
> + enum ipu_color_space out_cs,
> + u32 rsc)
> {
> struct ipu_ic_priv *priv = ic->priv;
> - u32 reg, downsize_coeff, resize_coeff;
> + u32 downsize_coeff, resize_coeff;
> unsigned long flags;
> int ret = 0;
>
> - /* Setup vertical resizing */
> - ret = calc_resize_coeffs(ic, in_height, out_height,
> - &resize_coeff, &downsize_coeff);
> - if (ret)
> - return ret;
> + if (!rsc) {
> + /* Setup vertical resizing */
> + ret = calc_resize_coeffs(ic, in_height, out_height,
> + &resize_coeff, &downsize_coeff);
> + if (ret)
> + return ret;
>
> - reg = (downsize_coeff << 30) | (resize_coeff << 16);
> + rsc = (downsize_coeff << 30) | (resize_coeff << 16);
>
> - /* Setup horizontal resizing */
> - ret = calc_resize_coeffs(ic, in_width, out_width,
> - &resize_coeff, &downsize_coeff);
> - if (ret)
> - return ret;
> + /* Setup horizontal resizing */
> + ret = calc_resize_coeffs(ic, in_width, out_width,
> + &resize_coeff, &downsize_coeff);
> + if (ret)
> + return ret;
>
> - reg |= (downsize_coeff << 14) | resize_coeff;
> + rsc |= (downsize_coeff << 14) | resize_coeff;
> + }
>
> spin_lock_irqsave(&priv->lock, flags);
>
> - ipu_ic_write(ic, reg, ic->reg->rsc);
> + ipu_ic_write(ic, rsc, ic->reg->rsc);
>
> /* Setup color space conversion */
> ic->in_cs = in_cs;
> @@ -629,6 +697,675 @@ unlock:
> }
> EXPORT_SYMBOL_GPL(ipu_ic_task_idma_init);
>
> +static struct image_convert_ctx *ipu_image_convert_next(struct ipu_ic *ic)
> +{
> + struct ipu_ic_priv *priv = ic->priv;
> + struct ipuv3_channel *ch_in = ic->input_channel;
> + struct ipuv3_channel *ch_out = ic->output_channel;
> + struct image_convert_ctx *ctx;
> + struct ipu_image *in_p, *in, *in_n;
> + struct ipu_image *out;
> + int ret;
> + unsigned long flags;
> + unsigned int inburst, outburst;
> + unsigned int in_height;
> +
> + spin_lock_irqsave(&priv->lock, flags);
> +
> + if (list_empty(&ic->image_list)) {
> + spin_unlock_irqrestore(&priv->lock, flags);
> + return NULL;
> + }
> +
> + ctx = list_first_entry(&ic->image_list, struct image_convert_ctx, list);
> +
> + list_del(&ctx->list);
> +
> + spin_unlock_irqrestore(&priv->lock, flags);
> +
> + in_p = &ctx->in_p;
> + in = &ctx->in;
> + in_n = &ctx->in_n;
> + out = &ctx->out;
> +
> + ipu_cpmem_zero(ch_in);
> + ipu_cpmem_zero(ch_out);
> +
> + inburst = in->rect.width & 0xf ? 8 : 16;
> + outburst = out->rect.width & 0xf ? 8 : 16;
> +
> + ipu_ic_enable(ic);
> +
> + ipu_ic_task_idma_init(ic, ic->input_channel, in->rect.width,
> + in->rect.height, inburst, IPU_ROTATE_NONE);
> + ipu_ic_task_idma_init(ic, ic->output_channel, out->rect.width,
> + out->rect.height, outburst, IPU_ROTATE_NONE);
> +
> + ipu_cpmem_set_image(ch_in, &ctx->in);
> + ipu_cpmem_set_image(ch_out, &ctx->out);
> +
> + ipu_cpmem_set_burstsize(ch_in, inburst);
> + ipu_cpmem_set_burstsize(ch_out, outburst);
> +
> + in_height = in->rect.height;
> +
> + dev_dbg(priv->ipu->dev, "%s: %dx%d(%dx%d@%d,%d) -> %dx%d(%dx%d@%d,%d)\n",
> + __func__, in->pix.width, in->pix.height,
> + in->rect.width, in->rect.height, in->rect.left, in->rect.top,
> + out->pix.width, out->pix.height,
> + out->rect.width, out->rect.height,
> + out->rect.left, out->rect.top);
> +
> + dev_dbg(priv->ipu->dev,
> + "%s: hscale: >>%d, *8192/%d vscale: >>%d, *8192/%d\n",
> + __func__, (ctx->rsc >> 14) & 0x3, (ctx->rsc & 0x3fff),
> + ctx->rsc >> 30, (ctx->rsc >> 16) & 0x3fff);
> +
> + ret = ipu_ic_task_init(ic, in->rect.width, in_height,
> + out->rect.width, out->rect.height,
> + ipu_pixelformat_to_colorspace(in->pix.pixelformat),
> + ipu_pixelformat_to_colorspace(out->pix.pixelformat),
> + ctx->rsc);
> + if (ret) {
> + ipu_ic_disable(ic);
> + return ERR_PTR(ret);
> + }
> +
> + ipu_idmac_enable_channel(ic->input_channel);
> + ipu_idmac_enable_channel(ic->output_channel);
> +
> + ipu_ic_task_enable(ic);
> +
> + ipu_idmac_select_buffer(ic->input_channel, 0);
> + ipu_idmac_select_buffer(ic->output_channel, 0);
> +
> + return ctx;
> +}
> +
> +static void ipu_image_convert_work(struct work_struct *work)
> +{
> + struct ipu_ic *ic = container_of(work, struct ipu_ic, work);
> + struct image_convert_ctx *ctx;
> + int ret;
> +
> + while (1) {
> + int task_error = 0;
> +
> + ctx = ipu_image_convert_next(ic);
> + if (!ctx)
> + return;
> +
> + if (IS_ERR(ctx)) {
> + task_error = PTR_ERR(ctx);
> + } else {
> + ret = wait_for_completion_interruptible_timeout(
> + &ic->complete, 100 * HZ);
> + if (!ret)
> + task_error = -ETIMEDOUT;
> + }
> +
> + ipu_ic_task_disable(ic);
> + ipu_ic_disable(ic);
> +
> + if (ctx->complete)
> + ctx->complete(ctx->complete_context, task_error);
> + kfree(ctx->freep);
> + }
> +}
> +
> +static irqreturn_t ipu_image_convert_handler(int irq, void *context)
> +{
> + struct ipu_ic *ic = context;
> +
> + complete(&ic->complete);
> +
> + return IRQ_HANDLED;
> +}
> +
> +
> +/*
> + * IDMAC base addresses are 8-byte aligned
> + */
> +static int ipu_image_halign(u32 pixfmt)
> +{
> + switch (pixfmt) {
> + /* 2 RGB32 pixels correspond to 8 bytes */
> + case V4L2_PIX_FMT_RGB32:
> + case V4L2_PIX_FMT_BGR32:
> + return 2;
> + /* 4 RGB565 or YUYV pixels correspond to 8 bytes */
> + case V4L2_PIX_FMT_RGB565:
> + case V4L2_PIX_FMT_UYVY:
> + case V4L2_PIX_FMT_YUYV:
> + return 4;
> + /*
> + * 8 RGB24 pixels correspond to 24 bytes,
> + * 8 NV12 pixels correspond to 8 bytes, both in luma and chroma
> + */
> + case V4L2_PIX_FMT_RGB24:
> + case V4L2_PIX_FMT_BGR24:
> + case V4L2_PIX_FMT_NV12:
> + return 8;
> + /* 16 YUV420 pixels correspond to 16 bytes in luma, 8 bytes in chroma */
> + case V4L2_PIX_FMT_YUV420:
> + case V4L2_PIX_FMT_YVU420:
> + case V4L2_PIX_FMT_YUV422P:
> + return 16;
> + default:
> + return -EINVAL;
> + }
> +}
> +
> +/*
> + * Vertically chroma-subsampled formats are limited to even heights and vertical
> + * positions
> + */
> +static int ipu_image_valign(u32 pixfmt)
> +{
> + switch (pixfmt) {
> + case V4L2_PIX_FMT_RGB24:
> + case V4L2_PIX_FMT_BGR24:
> + case V4L2_PIX_FMT_RGB32:
> + case V4L2_PIX_FMT_BGR32:
> + case V4L2_PIX_FMT_RGB565:
> + case V4L2_PIX_FMT_UYVY:
> + case V4L2_PIX_FMT_YUYV:
> + case V4L2_PIX_FMT_YUV422P:
> + return 1;
> + case V4L2_PIX_FMT_NV12:
> + case V4L2_PIX_FMT_YUV420:
> + case V4L2_PIX_FMT_YVU420:
> + return 2;
> + default:
> + return -EINVAL;
> + }
> +}
> +
> +#define round_closest(x, y) round_down((x) + (y)/2, (y))
> +
> +struct image_convert_ctx *ipu_image_convert_prepare(struct ipu_soc *ipu,
> + struct ipu_image *in, struct ipu_image *out,
> + enum ipu_image_scale_ctrl ctrl, int *num_tiles)
> +{
> + struct image_convert_ctx *ctx, *c;
> + int htiles, vtiles;
> + int in_valign, in_halign, in_burst, out_valign, out_halign, out_burst;
> + int left, top;
> + int x, y;
> + int h_resize_opt, v_resize_opt;
> + u32 v_downsize_coeff = 0, h_downsize_coeff = 0;
> + u32 v_resize_coeff, h_resize_coeff;
> +
> + /* validate input */
> + if (in->rect.width < 16 || out->rect.width < 16 ||
> + (in->rect.width / 8) > out->rect.width)
> + return ERR_PTR(-EINVAL);
> +
> + /* tile setup */
> + htiles = DIV_ROUND_UP(out->rect.width, 1024);
> + vtiles = DIV_ROUND_UP(out->rect.height, 1024);
> +
> + in_valign = ipu_image_valign(in->pix.pixelformat);
> + in_halign = ipu_image_halign(in->pix.pixelformat);
> + out_valign = ipu_image_valign(out->pix.pixelformat);
> + out_halign = ipu_image_halign(out->pix.pixelformat);
> +
> + /* IC bursts are limited to either 8 or 16 pixels */
> + in_burst = 8;
> + out_burst = 8;
> +
> + if (in_valign < 0 || in_halign < 0 ||
> + out_valign < 0 || out_halign < 0) {
> + dev_err(ipu->dev, "unsupported in/out format\n");
> + return ERR_PTR(-EINVAL);
> + }
> +
> + /* compute static decimator coefficients */
> + while ((in->rect.width >> h_downsize_coeff) > out->rect.width)
> + h_downsize_coeff++;
> + while ((in->rect.height >> v_downsize_coeff) > out->rect.height)
> + v_downsize_coeff++;
> +
> + /* move and crop the output image according to IDMAC limitations */
> + switch (ctrl) {
> + case IPU_IMAGE_SCALE_ROUND_DOWN:
> + left = round_up(in->rect.left, in_halign);
> + top = round_up(in->rect.top, in_valign);
> + in->rect.width = in->rect.width - (left - in->rect.left);
> + in->rect.height = in->rect.height - (top - in->rect.top);
> + in->rect.left = left;
> + in->rect.top = top;
> + left = round_up(out->rect.left, out_halign);
> + top = round_up(out->rect.top, out_valign);
> + out->rect.width = round_down(out->rect.width - (left -
> + out->rect.left), out_burst);
> + out->rect.height = round_down(out->rect.height - (top -
> + out->rect.top), out_valign);
> + break;
> + case IPU_IMAGE_SCALE_ROUND_UP:
> + left = round_down(in->rect.left, in_halign);
> + top = round_down(in->rect.top, in_valign);
> + in->rect.width = in->rect.width + in->rect.left - left;
> + in->rect.height = in->rect.height + in->rect.top - top;
> + in->rect.left = left;
> + in->rect.top = top;
> + left = round_down(out->rect.left, out_halign);
> + top = round_down(out->rect.top, out_valign);
> + out->rect.width = round_up(out->rect.width + out->rect.left -
> + left, out_burst);
> + out->rect.height = round_up(out->rect.height + out->rect.top -
> + top, out_valign);
> + break;
> + case IPU_IMAGE_SCALE_PIXELPERFECT:
> + left = round_down(in->rect.left, in_halign);
> + top = round_down(in->rect.top, in_valign);
> + in->rect.width = in->rect.width + in->rect.left - left;
> + in->rect.height = in->rect.height + in->rect.top - top;
> + in->rect.left = left;
> + in->rect.top = top;
> + left = round_down(out->rect.left + out_halign / 2, out_halign);
> + top = round_down(out->rect.top + out_valign / 2, out_valign);
> + /*
> + * don't round width and height to burst size / pixel format
> + * limitations yet, we do it after determining the scaling
> + * coefficients
> + */
> + out->rect.width = out->rect.width + out->rect.left - left;
> + out->rect.height = out->rect.height + out->rect.top - top;
> + break;
> + default:
> + return ERR_PTR(-EINVAL);
> + }
> + out->rect.left = left;
> + out->rect.top = top;
> +
> + /* Round input width and height according to decimation */
> + in->rect.width = round_down(in->rect.width, 1 << h_downsize_coeff);
> + in->rect.height = round_down(in->rect.height, 1 << v_downsize_coeff);
> +
> + dev_dbg(ipu->dev,
> + "%s: in: %dx%d(%dx%d@%d,%d) -> out: %dx%d(%dx%d@%d,%d)\n",
> + __func__, in->pix.width, in->pix.height, in->rect.width,
> + in->rect.height, in->rect.left, in->rect.top, out->pix.width,
> + out->pix.height, out->rect.width, out->rect.height,
> + out->rect.left, out->rect.top);
> +
> + /*
> + * Compute the bilinear resizing coefficients that can/could be used if
> + * scaling using a single tile. The bottom right pixel should sample the
> + * input as close as possible to but not beyond the bottom right input
> + * pixel out of the decimator:
> + *
> + * (out->rect.width - 1) * h_resize / 8192.0 <= (in->rect.width >>
> + * h_downsize_coeff) - 1
> + * (out->rect.height - 1) * v_resize / 8192.0 <= (in->rect.height >>
> + * v_downsize_coeff) - 1
> + */
> + h_resize_opt = 8192 * ((in->rect.width >> h_downsize_coeff) - 1) /
> + (out->rect.width - 1);
> + v_resize_opt = 8192 * ((in->rect.height >> v_downsize_coeff) - 1) /
> + (out->rect.height - 1);
> +
> + dev_dbg(ipu->dev,
> + "%s: hscale: >>%d, *8192/%d vscale: >>%d, *8192/%d, %dx%d tiles\n",
> + __func__, h_downsize_coeff, h_resize_opt, v_downsize_coeff,
> + v_resize_opt, htiles, vtiles);
> +
> + ctx = kcalloc(htiles * vtiles, sizeof(*ctx), GFP_KERNEL);
> + if (!ctx)
> + return ERR_PTR(-ENOMEM);
> +
> + c = ctx;
> +
> + for (x = htiles - 1; x >= 0; x--) {
> + int in_right, out_right;
> +
> + /*
> + * Since we render tiles right to left, the right edge
> + * is already known. Depending on tile position and
> + * scaling mode, we may overshoot it.
> + */
> + if (x == htiles - 1) {
> + out_right = out->rect.left + out->rect.width;
> + in_right = in->rect.left + in->rect.width;
> + } else {
> + struct image_convert_ctx *c_right = c - vtiles;
> +
> + out_right = c_right->out.rect.left;
> + in_right = c_right->in.rect.left;
> + }
> +
> + /* Now determine the left edge of this tile column */
> + if (x == 0) {
> + /* For the leftmost column this is trivial */
> + c->out.rect.left = out->rect.left;
> + c->in.rect.left = in->rect.left;
> + } else {
> + int best_left, best_in_left;
> + int min_left, max_left;
> + int min_diff = INT_MAX;
> +
> + /*
> + * Find the best possible left edge. It must be adjusted
> + * according to IDMAC limitations, and should be
> + * chosen so that
> + * (in->rect.left + (c->out.rect.left - out->rect.left)
> + * * h_resize_opt / (8192 >> h_downsize_coeff))
> + * is as close as possible to a valid left edge in the
> + * input.
> + */
> + min_left = max(0,
> + round_up(out_right - 1024, out_halign));
> + max_left = min(round_down(out_right, out_halign),
> + x * 1024);
> + best_left = min_left;
> + best_in_left = (best_left - out->rect.left) *
> + h_resize_opt;
> + for (left = min_left; left < max_left;
> + left += out_halign) {
> + int diff, in_left;
> +
> + /*
> + * In ROUND_UP and ROUND_DOWN modes, for the
> + * rightmost column, only consider left edges
> + * that are a multiple of the burst size away
> + * from the right edge.
> + */
> + if ((ctrl != IPU_IMAGE_SCALE_PIXELPERFECT) &&
> + (x == htiles - 1) &&
> + ((out_right - left) % out_burst))
> + continue;
> + in_left = in->rect.left +
> + (((left - out->rect.left) *
> + h_resize_opt) << h_downsize_coeff);
> + diff = abs(in_left -
> + round_closest(in_left,
> + 8192 * in_halign));
> +
> + if (diff < min_diff) {
> + min_diff = diff;
> + best_left = left;
> + best_in_left = in_left;
> + }
> + }
> +
> + c->out.rect.left = best_left;
> + c->in.rect.left = DIV_ROUND_CLOSEST(best_in_left, 8192);
> +
> + dev_dbg(ipu->dev,
> + "%s: tile(%d,y):\tleft: %d -> %d (instead of %d.%04d -> %d)",
> + __func__, x, c->in.rect.left,
> + c->out.rect.left, best_in_left / 8192,
> + (best_in_left % 8192) * 10000 / 8192,
> + out->rect.left +
> + DIV_ROUND_CLOSEST((c->in.rect.left -
> + in->rect.left) *
> + (8192 >> h_downsize_coeff),
> + h_resize_opt));
> + }
> +
> + /* Determine tile width from left and right edges */
> + c->out.rect.width = out_right - c->out.rect.left;
> + c->in.rect.width = in_right - c->in.rect.left;
> +
> + /* Now we can determine the actual per-tile scaling factor */
> + if (x == htiles - 1) {
> + /*
> + * Round down for the right column, since we
> + * don't want to read beyond the right edge.
> + */
> + h_resize_coeff = 8192 * ((c->in.rect.width >>
> + h_downsize_coeff) - 1) /
> + (c->out.rect.width - 1);
> + } else {
> + /*
> + * Round to closest for seams between tiles for
> + * minimal distortion.
> + */
> + h_resize_coeff = DIV_ROUND_CLOSEST(8192 *
> + (c->in.rect.width >>
> + h_downsize_coeff),
> + c->out.rect.width);
> + }
> +
> + /*
> + * With the scaling factor known, round up output width
> + * to burst size. In ROUND_UP and ROUND_DOWN scaling mode
> + * this is a no-op for the right column.
> + */
> + c->out.rect.width = round_up(c->out.rect.width, out_burst);
> +
> + /*
> + * Calculate input width from the last accessed input pixel
> + * given output width and scaling coefficients. Round to
> + * burst size.
> + */
> + c->in.rect.width = (DIV_ROUND_UP((c->out.rect.width - 1) *
> + h_resize_coeff, 8192) + 1)
> + << h_downsize_coeff;
> + c->in.rect.width = round_up(c->in.rect.width, in_burst);
> +
> + for (y = vtiles - 1; y >= 0; y--) {
> + int in_bottom, out_bottom;
> +
> + memcpy(&c->in.pix, &in->pix,
> + sizeof(struct v4l2_pix_format));
> +
> + if (y == vtiles - 1) {
> + out_bottom = out->rect.top + out->rect.height;
> + in_bottom = in->rect.top + in->rect.height;
> + } else {
> + struct image_convert_ctx *c_below = c - 1;
> +
> + out_bottom = c_below->out.rect.top;
> + in_bottom = c_below->in.rect.top;
> +
> + /*
> + * Copy horizontal parameters from the tile
> + * below
> + */
> + c->out.rect.left = c_below->out.rect.left;
> + c->out.rect.width = c_below->out.rect.width;
> + c->in.rect.left = c_below->in.rect.left;
> + c->in.rect.width = c_below->in.rect.width;
> + }
> +
> + if (y == 0) {
> + c->out.rect.top = out->rect.top;
> + c->in.rect.top = in->rect.top;
> + } else {
> + int best_top, best_in_top;
> + int min_top, max_top;
> + int min_diff = INT_MAX;
> +
> + /*
> + * Find the best possible top edge. It must be
> + * adjusted according to IDMAC limitations, and
> + * should be chosen so that
> + * (in->rect.top + (c->out.rect.top -
> + * out->rect.top) * v_resize_opt /
> + * (8192 >> v_downsize_coeff))
> + * is as close as possible to a valid top edge
> + * in the input.
> + */
> + min_top = max(0,
> + round_up(out_bottom - 1024,
> + out_valign));
> + max_top = min(round_down(out_bottom,
> + out_halign), y * 1024);
> + best_top = min_top;
> + best_in_top = (best_top - out->rect.top) *
> + v_resize_opt;
> + for (top = min_top; top < max_top;
> + top += out_valign) {
> + int diff, in_top;
> +
> + in_top = in->rect.top +
> + (((top - out->rect.top) *
> + v_resize_opt) <<
> + v_downsize_coeff);
> + diff = abs(in_top -
> + round_closest(in_top, 8192 *
> + in_valign));
> +
> + if (diff < min_diff) {
> + min_diff = diff;
> + best_top = top;
> + best_in_top = in_top;
> + }
> + }
> +
> + c->out.rect.top = best_top;
> + c->in.rect.top = DIV_ROUND_CLOSEST(best_in_top,
> + 8192);
> +
> + dev_dbg(ipu->dev,
> + "%s: tile(%d,%d):\ttop: %d -> %d (instead of %d.%04d -> %d)",
> + __func__, x, y, c->in.rect.top,
> + c->out.rect.top, best_in_top / 8192,
> + (best_in_top % 8192) * 10000 / 8192,
> + out->rect.top +
> + DIV_ROUND_CLOSEST((c->in.rect.top -
> + in->rect.top) * (8192
> + >> v_downsize_coeff),
> + v_resize_opt));
> + }
> +
> + /* Determine tile height from top and bottom edges */
> + c->out.rect.height = out_bottom - c->out.rect.top;
> + c->in.rect.height = in_bottom - c->in.rect.top;
> +
> + /*
> + * Now we can determine the actual vertical per-tile
> + * scaling factor
> + */
> + if (y == vtiles - 1) {
> + /*
> + * Round down for the bottom row, since we
> + * don't want to read beyond the lower border.
> + */
> + v_resize_coeff = 8192 * ((c->in.rect.height >>
> + v_downsize_coeff) - 1)
> + / (c->out.rect.height - 1);
> + } else {
> + /*
> + * Round to closest for seams between tiles for
> + * minimal distortion.
> + */
> + v_resize_coeff = DIV_ROUND_CLOSEST(8192 *
> + (c->in.rect.height >>
> + v_downsize_coeff),
> + c->out.rect.height);
> + }
> +
> + /*
> + * With the scaling factor known, round up output height
> + * to IDMAC limitations
> + */
> + c->out.rect.height = round_up(c->out.rect.height,
> + out_valign);
> +
> + /*
> + * Calculate input height from the last accessed input
> + * line given output height and scaling coefficients.
> + */
> + c->in.rect.height = (DIV_ROUND_UP(
> + (c->out.rect.height - 1) *
> + v_resize_coeff, 8192) + 1)
> + << v_downsize_coeff;
> +
> + /* align height according to IDMAC restrictions */
> + c->in.rect.height = round_up(c->in.rect.height,
> + in_valign);
> +
> + memcpy(&c->out.pix, &out->pix,
> + sizeof(struct v4l2_pix_format));
> +
> + dev_dbg(ipu->dev,
> + "%s: tile(%d,%d): %dx%d(%dx%d@%d,%d) -> %dx%d(%dx%d@%d,%d), resize: %dx%d\n",
> + __func__, x, y,
> + c->in.pix.width, c->in.pix.height,
> + c->in.rect.width, c->in.rect.height,
> + c->in.rect.left, c->in.rect.top,
> + c->out.pix.width, c->out.pix.height,
> + c->out.rect.width, c->out.rect.height,
> + c->out.rect.left, c->out.rect.top,
> + h_resize_coeff, v_resize_coeff);
> +
> + c->rsc = (v_downsize_coeff << 30) |
> + (v_resize_coeff << 16) |
> + (h_downsize_coeff << 14) |
> + h_resize_coeff;
> +
> + c++;
> + }
> + }
> +
> + *num_tiles = htiles * vtiles;
> +
> + return ctx;
> +}
> +EXPORT_SYMBOL_GPL(ipu_image_convert_prepare);
> +
> +int ipu_image_convert_run(struct ipu_soc *ipu, struct ipu_image *in,
> + struct ipu_image *out, struct image_convert_ctx *ctx,
> + int num_tiles, void (*complete)(void *ctx, int err),
> + void *complete_context, bool free_ctx)
> +{
> + struct ipu_ic_priv *priv = ipu->ic_priv;
> + struct ipu_ic *ic = &priv->task[IC_TASK_POST_PROCESSOR];
> + unsigned long flags;
> + int i;
> +
> + for (i = 0; i < num_tiles; i++) {
> + ctx[i].in.phys0 = in->phys0;
> + ctx[i].out.phys0 = out->phys0;
> + }
> + ctx[num_tiles - 1].complete = complete;
> + ctx[num_tiles - 1].complete_context = complete_context;
> + if (free_ctx)
> + ctx[num_tiles - 1].freep = ctx;
> +
> + spin_lock_irqsave(&priv->lock, flags);
> +
> + for (i = 0; i < num_tiles; i++)
> + list_add_tail(&ctx[i].list, &ic->image_list);
> +
> + queue_work(ic->workqueue, &ic->work);
> +
> + spin_unlock_irqrestore(&priv->lock, flags);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(ipu_image_convert_run);
> +
> +static int ipu_image_convert_init(struct device *dev, struct ipu_soc *ipu,
> + struct ipu_ic_priv *priv)
> +{
> + int ret;
> + struct ipu_ic *ic = ipu_ic_get(ipu, IC_TASK_POST_PROCESSOR);
> + int irq = ipu_idmac_channel_irq(ipu, ic->output_channel,
> + IPU_IRQ_EOF);
> +
> + ic->workqueue = create_singlethread_workqueue(dev_name(ipu->dev));
> + if (!ic->workqueue)
> + return -ENOMEM;
> +
> + INIT_WORK(&ic->work, ipu_image_convert_work);
> + init_completion(&ic->complete);
> +
> + ret = devm_request_threaded_irq(dev, irq, NULL,
> + ipu_image_convert_handler,
> + IRQF_ONESHOT, "IC PP", ic);
> + if (ret)
> + goto err;
> +
> + return 0;
> +err:
> + destroy_workqueue(ic->workqueue);
> + return ret;
> +}
> +
> int ipu_ic_enable(struct ipu_ic *ic)
> {
> struct ipu_ic_priv *priv = ic->priv;
> @@ -736,12 +1473,30 @@ int ipu_ic_init(struct ipu_soc *ipu, struct device *dev,
> priv->ipu = ipu;
>
> for (i = 0; i < IC_NUM_TASKS; i++) {
> + INIT_LIST_HEAD(&priv->task[i].image_list);
> priv->task[i].task = i;
> priv->task[i].priv = priv;
> priv->task[i].reg = &ic_task_reg[i];
> priv->task[i].bit = &ic_task_bit[i];
> +
> + priv->task[i].input_channel = ipu_idmac_get(ipu,
> + ic_task_ch[i].in);
> + priv->task[i].output_channel = ipu_idmac_get(ipu,
> + ic_task_ch[i].out);
> + priv->task[i].rotation_input_channel = ipu_idmac_get(ipu,
> + ic_task_ch[i].rot_in);
> + priv->task[i].rotation_output_channel = ipu_idmac_get(ipu,
> + ic_task_ch[i].rot_out);
> + if (ic_task_ch[i].in_prev) {
> + priv->task[i].input_channel_p = ipu_idmac_get(ipu,
> + ic_task_ch[i].in_prev);
> + priv->task[i].input_channel_n = ipu_idmac_get(ipu,
> + ic_task_ch[i].in_next);
> + }
> }
>
> + ipu_image_convert_init(dev, ipu, priv);
> +
> return 0;
> }
>
> diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
> index 459508e..6d98a38 100644
> --- a/include/video/imx-ipu-v3.h
> +++ b/include/video/imx-ipu-v3.h
> @@ -316,7 +316,8 @@ int ipu_ic_task_init(struct ipu_ic *ic,
> int in_width, int in_height,
> int out_width, int out_height,
> enum ipu_color_space in_cs,
> - enum ipu_color_space out_cs);
> + enum ipu_color_space out_cs,
> + u32 rsc);
> int ipu_ic_task_graphics_init(struct ipu_ic *ic,
> enum ipu_color_space in_g_cs,
> bool galpha_en, u32 galpha,
> @@ -362,4 +363,35 @@ struct ipu_client_platformdata {
> int dma[2];
> };
>
> +enum ipu_image_scale_ctrl {
> + IPU_IMAGE_SCALE_ROUND_DOWN,
> + IPU_IMAGE_SCALE_PIXELPERFECT,
> + IPU_IMAGE_SCALE_ROUND_UP,
> +};
> +
> +struct image_convert_ctx;
> +
> +struct image_convert_ctx *ipu_image_convert_prepare(struct ipu_soc *ipu,
> + struct ipu_image *in, struct ipu_image *out,
> + enum ipu_image_scale_ctrl ctrl, int *num_tiles);
> +int ipu_image_convert_run(struct ipu_soc *ipu, struct ipu_image *in,
> + struct ipu_image *out, struct image_convert_ctx *ctx,
> + int num_tiles, void (*complete)(void *ctx, int err),
> + void *complete_context, bool free_ctx);
> +
> +static inline int ipu_image_convert(struct ipu_soc *ipu, struct ipu_image *in,
> + struct ipu_image *out, void (*complete)(void *ctx, int err),
> + void *complete_context, enum ipu_image_scale_ctrl ctrl)
> +{
> + struct image_convert_ctx *ctx;
> + int num_tiles;
> +
> + ctx = ipu_image_convert_prepare(ipu, in, out, ctrl, &num_tiles);
> + if (IS_ERR(ctx))
> + return PTR_ERR(ctx);
> +
> + return ipu_image_convert_run(ipu, in, out, ctx, num_tiles, complete,
> + complete_context, true);
> +}
> +
> #endif /* __DRM_IPU_H__ */
> --
> 2.1.4
>
More information about the dri-devel
mailing list