[Libva] [PATCH intel-driver 8/8] vpp: enable advanced video scaling in VPP pipelines too.

Mon Oct 13 23:46:42 PDT 2014

2014-10-13 19:27 GMT+02:00 Gwenole Beauchesne <gb.devel at gmail.com>:
> Honour advanced video scaling. i.e. propagate vaPutSurface() scaling
> flags, but also VPP filter flags. Also enable the sharp 8x8 filter for
> high-quality scaling options, while adaptive video scaling is disabled
> (bypassed) for now.
>
> Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
> ---
>  src/gen75_vpp_vebox.c      |  3 ++-
>  src/gen8_post_processing.c |  5 +++--
>  src/i965_post_processing.c | 28 +++++++++++++++++++++-------
>  src/i965_post_processing.h |  9 +++++++++
>  4 files changed, 35 insertions(+), 10 deletions(-)
>
> diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c
> index b7c2ea0..68f3071 100644
> --- a/src/gen75_vpp_vebox.c
> +++ b/src/gen75_vpp_vebox.c
> @@ -1261,12 +1261,13 @@ int hsw_veb_post_format_convert(VADriverContextP ctx,
>          vpp_surface_convert(ctx,proc_ctx->surface_output_object, obj_surface);
>
>      } else if(proc_ctx->format_convert_flags & POST_SCALING_CONVERT) {
> +        VAProcPipelineParameterBuffer * const pipe = proc_ctx->pipeline_param;
>         /* scaling, convert and copy NV12 to YV12/IMC3/IMC2/RGBA output*/
>          assert(obj_surface->fourcc == VA_FOURCC_NV12);
>
>          /* first step :surface scaling */
>          vpp_surface_scaling(ctx, proc_ctx->surface_output_scaled_object,
> -            obj_surface, 0);
> +            obj_surface, pipe->filter_flags);
>
>          /* second step: color format convert and copy to output */
>          obj_surface = proc_ctx->surface_output_object;
> diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
> index bd798f3..eb17a86 100644
> --- a/src/gen8_post_processing.c
> +++ b/src/gen8_post_processing.c
> @@ -895,7 +895,7 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
>
>      sx = (float)dst_rect->width / src_rect->width;
>      sy = (float)dst_rect->height / src_rect->height;
> -    avs_update_coefficients(avs, sx, sy, 0);
> +    avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
>
>      assert(avs->config->num_phases == 16);
>      for (i = 0; i <= 16; i++) {
> @@ -962,7 +962,8 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
>              intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
>      }
>
> -    sampler_8x8->dw152.default_sharpness_level = 0;
> +    sampler_8x8->dw152.default_sharpness_level =
> +        -avs_is_needed(pp_context->filter_flags);
>      sampler_8x8->dw153.adaptive_filter_for_all_channel = 1;
>      sampler_8x8->dw153.bypass_y_adaptive_filtering = 1;
>      sampler_8x8->dw153.bypass_x_adaptive_filtering = 1;
> diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c
> index 6642bbd..ea80854 100755
> --- a/src/i965_post_processing.c
> +++ b/src/i965_post_processing.c
> @@ -2484,7 +2484,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
>
>      sx = (float)dst_rect->width / src_rect->width;
>      sy = (float)dst_rect->height / src_rect->height;
> -    avs_update_coefficients(avs, sx, sy, 0);
> +    avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
>
>      assert(avs->config->num_phases == 16);
>      for (i = 0; i <= 16; i++) {
> @@ -2546,7 +2546,8 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
>      /* Adaptive filter for all channels (DW4.15) */
>      sampler_8x8_state->coefficients[0].dw4.table_1x_filter_c1 = 1U << 7;
>
> -    sampler_8x8_state->dw136.default_sharpness_level = 0;
> +    sampler_8x8_state->dw136.default_sharpness_level =
> +        -avs_is_needed(pp_context->filter_flags);
>      sampler_8x8_state->dw137.ilk.bypass_y_adaptive_filtering = 1;
>      sampler_8x8_state->dw137.ilk.bypass_x_adaptive_filtering = 1;
>      dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
> @@ -2839,7 +2840,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
>
>      sx = (float)dst_rect->width / src_rect->width;
>      sy = (float)dst_rect->height / src_rect->height;
> -    avs_update_coefficients(avs, sx, sy, 0);
> +    avs_update_coefficients(avs, sx, sy, pp_context->filter_flags);
>
>      assert(avs->config->num_phases == 16);
>      for (i = 0; i <= 16; i++) {
> @@ -2898,7 +2899,8 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
>              intel_format_convert(coeffs->uv_k_v[3], 1, 6, 1);
>      }
>
> -    sampler_8x8_state->dw136.default_sharpness_level = 0;
> +    sampler_8x8_state->dw136.default_sharpness_level =
> +        -avs_is_needed(pp_context->filter_flags);
>      if (IS_HASWELL(i965->intel.device_info)) {
>          sampler_8x8_state->dw137.hsw.adaptive_filter_for_all_channel = 1;
>          sampler_8x8_state->dw137.hsw.bypass_y_adaptive_filtering = 1;

Enabling adaptive filtering would be a better option, but I was not
really inspired yesterday. So, the sharp 8x8 filter is exclusively
used for now for HQ scaling and looks enough to me right now.

> @@ -4810,6 +4812,8 @@ i965_scaling_processing(
>      if (HAS_VPP(i965)) {
>          struct i965_surface src_surface;
>          struct i965_surface dst_surface;
> +        struct i965_post_processing_context *pp_context;
> +        unsigned int filter_flags;
>
>           _i965LockMutex(&i965->pp_mutex);
>
> @@ -4820,10 +4824,16 @@ i965_scaling_processing(
>           dst_surface.type = I965_SURFACE_TYPE_SURFACE;
>           dst_surface.flags = I965_SURFACE_FLAG_FRAME;
>
> -         va_status = i965_post_processing_internal(ctx, i965->pp_context,
> +         pp_context = i965->pp_context;
> +         filter_flags = pp_context->filter_flags;
> +         pp_context->filter_flags = va_flags;
> +
> +         va_status = i965_post_processing_internal(ctx, pp_context,
>               &src_surface, src_rect, &dst_surface, dst_rect,
>               avs_is_needed(va_flags) ? PP_NV12_AVS : PP_NV12_SCALING, NULL);
>
> +         pp_context->filter_flags = filter_flags;
> +
>           _i965UnlockMutex(&i965->pp_mutex);
>      }
>
> @@ -4850,6 +4860,7 @@ i965_post_processing(
>          VAStatus status;
>          struct i965_surface src_surface;
>          struct i965_surface dst_surface;
> +        struct i965_post_processing_context *pp_context;
>
>          /* Currently only support post processing for NV12 surface */
>          if (obj_surface->fourcc != VA_FOURCC_NV12)
> @@ -4857,6 +4868,8 @@ i965_post_processing(
>
>          _i965LockMutex(&i965->pp_mutex);
>
> +        pp_context = i965->pp_context;
> +        pp_context->filter_flags = va_flags;
>          if (avs_is_needed(va_flags)) {
>              struct i965_render_state *render_state = &i965->render_state;
>              struct intel_region *dest_region = render_state->draw_region;
> @@ -4878,13 +4891,13 @@ i965_post_processing(
>              obj_surface = SURFACE(out_surface_id);
>              assert(obj_surface);
>              i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
> -            i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0);
> +            i965_vpp_clear_surface(ctx, pp_context, obj_surface, 0);
>
>              dst_surface.base = (struct object_base *)obj_surface;
>              dst_surface.type = I965_SURFACE_TYPE_SURFACE;
>              dst_surface.flags = I965_SURFACE_FLAG_FRAME;
>
> -            i965_post_processing_internal(ctx, i965->pp_context,
> +            i965_post_processing_internal(ctx, pp_context,
>                                            &src_surface,
>                                            src_rect,
>                                            &dst_surface,
> @@ -5653,6 +5666,7 @@ i965_proc_picture(VADriverContextP ctx,
>                                        NULL);
>      } else {
>
> +        proc_context->pp_context.filter_flags = pipeline_param->filter_flags;
>          i965_post_processing_internal(ctx, &proc_context->pp_context,
>                                        &src_surface,
>                                        &src_rect,
> diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h
> index a567527..271941e 100755
> --- a/src/i965_post_processing.h
> +++ b/src/i965_post_processing.h
> @@ -491,6 +491,15 @@ struct i965_post_processing_context
>      struct pp_dn_context pp_dn_context;
>      void *private_context; /* pointer to the current private context */
>      void *pipeline_param;  /* pointer to the pipeline parameter */
> +    /**
> +     * \ref Extra filter flags used as a fast path.
> +     *
> +     * This corresponds to vaPutSurface() flags, for direct rendering,
> +     * or to VAProcPipelineParameterBuffer.filter_flags when the VPP
> +     * interfaces are used. In the latter case, this is just a copy of
> +     * that field.
> +     */
> +    unsigned int filter_flags;
>
>      int (*pp_x_steps)(void *private_context);
>      int (*pp_y_steps)(void *private_context);
> --
> 1.9.1
>

-- 
Gwenole Beauchesne
Intel Corporation SAS / 2 rue de Paris, 92196 Meudon Cedex, France
Registration Number (RCS): Nanterre B 302 456 199