[Libva] [PATCH intel-driver 6/8] vpp: cache calculation of AVS coefficients.

Gwenole Beauchesne gb.devel at gmail.com
Mon Oct 13 23:43:31 PDT 2014


2014-10-13 19:27 GMT+02:00 Gwenole Beauchesne <gb.devel at gmail.com>:
> If scaling parameters don't change, i.e. if the same scaling algorithm
> and factors are used, there is no point in calculating the filter
> coefficients again. So, just cache them into the existing AVS context.
>
> Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
> ---
>  src/gen8_post_processing.c | 13 ++++++-------
>  src/i965_post_processing.c | 27 +++++++++++++--------------
>  src/i965_post_processing.h |  3 +++
>  src/i965_vpp_avs.c         | 11 +++++++++++
>  src/i965_vpp_avs.h         |  6 ++++++
>  5 files changed, 39 insertions(+), 21 deletions(-)
>
> diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
> index 5632ca8..bd798f3 100644
> --- a/src/gen8_post_processing.c
> +++ b/src/gen8_post_processing.c
> @@ -39,7 +39,6 @@
>  #include "i965_drv_video.h"
>  #include "i965_post_processing.h"
>  #include "i965_render.h"
> -#include "i965_vpp_avs.h"
>  #include "intel_media.h"
>
>  #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
> @@ -779,7 +778,7 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
>      int width[3], height[3], pitch[3], offset[3];
>      int src_width, src_height;
>      unsigned char *cc_ptr;
> -    AVSState avs;
> +    AVSState * const avs = &pp_avs_context->state;
>      float sx, sy;
>
>      memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter));
> @@ -894,17 +893,15 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
>      sampler_8x8->dw15.s1u = 113; /* s1u = 0 */
>      sampler_8x8->dw15.s2u = 1203; /* s2u = 0 */
>
> -    avs_init_state(&avs, &gen8_avs_config);
> -
>      sx = (float)dst_rect->width / src_rect->width;
>      sy = (float)dst_rect->height / src_rect->height;
> -    avs_update_coefficients(&avs, sx, sy, 0);
> +    avs_update_coefficients(avs, sx, sy, 0);
>
> -    assert(avs.config->num_phases == 16);
> +    assert(avs->config->num_phases == 16);
>      for (i = 0; i <= 16; i++) {
>          struct gen8_sampler_8x8_avs_coefficients * const sampler_8x8_state =
>              &sampler_8x8->coefficients[i];
> -        const AVSCoeffs * const coeffs = &avs.coeffs[i];
> +        const AVSCoeffs * const coeffs = &avs->coeffs[i];
>
>          sampler_8x8_state->dw0.table_0x_filter_c0 =
>              intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
> @@ -1506,4 +1503,6 @@ gen8_post_processing_context_init(VADriverContextP ctx,
>
>      pp_context->idrt_size = 5 * sizeof(struct gen8_interface_descriptor_data);
>      pp_context->curbe_size = 256;
> +
> +    avs_init_state(&pp_context->pp_avs_context.state, &gen8_avs_config);
>  }
> diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c
> index 3e479a8..1e932fd 100755
> --- a/src/i965_post_processing.c
> +++ b/src/i965_post_processing.c
> @@ -38,7 +38,6 @@
>  #include "i965_drv_video.h"
>  #include "i965_post_processing.h"
>  #include "i965_render.h"
> -#include "i965_vpp_avs.h"
>  #include "intel_media.h"
>
>  extern VAStatus
> @@ -2430,7 +2429,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
>      int in_w, in_h, in_wpitch, in_hpitch;
>      int out_w, out_h, out_wpitch, out_hpitch;
>      int i;
> -    AVSState avs;
> +    AVSState * const avs = &pp_avs_context->state;
>      float sx, sy;
>
>      /* surface */
> @@ -2483,16 +2482,13 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
>      sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
>      memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
>
> -    avs_init_state(&avs, IS_GEN6(i965->intel.device_info) ? &gen6_avs_config :
> -        &gen5_avs_config);
> -
>      sx = (float)dst_rect->width / src_rect->width;
>      sy = (float)dst_rect->height / src_rect->height;
> -    avs_update_coefficients(&avs, sx, sy, 0);
> +    avs_update_coefficients(avs, sx, sy, 0);
>
> -    assert(avs.config->num_phases == 16);
> +    assert(avs->config->num_phases == 16);
>      for (i = 0; i <= 16; i++) {
> -        const AVSCoeffs * const coeffs = &avs.coeffs[i];
> +        const AVSCoeffs * const coeffs = &avs->coeffs[i];
>
>          sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
>              intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
> @@ -2819,7 +2815,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
>      int index, i;
>      int width[3], height[3], pitch[3], offset[3];
>      int src_width, src_height;
> -    AVSState avs;
> +    AVSState * const avs = &pp_avs_context->state;
>      float sx, sy;
>
>      /* source surface */
> @@ -2841,15 +2837,13 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
>      sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
>      memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
>
> -    avs_init_state(&avs, &gen6_avs_config);
> -
>      sx = (float)dst_rect->width / src_rect->width;
>      sy = (float)dst_rect->height / src_rect->height;
> -    avs_update_coefficients(&avs, sx, sy, 0);
> +    avs_update_coefficients(avs, sx, sy, 0);
>
> -    assert(avs.config->num_phases == 16);
> +    assert(avs->config->num_phases == 16);
>      for (i = 0; i <= 16; i++) {
> -        const AVSCoeffs * const coeffs = &avs.coeffs[i];
> +        const AVSCoeffs * const coeffs = &avs->coeffs[i];
>
>          sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
>              intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
> @@ -5376,6 +5370,7 @@ i965_post_processing_context_init(VADriverContextP ctx,
>      struct i965_driver_data *i965 = i965_driver_data(ctx);
>      int i;
>      struct i965_post_processing_context *pp_context = data;
> +    const AVSConfig *avs_config;
>
>      if (IS_IRONLAKE(i965->intel.device_info)) {
>         pp_context->urb.size = i965->intel.device_info->urb_size;
> @@ -5442,6 +5437,10 @@ i965_post_processing_context_init(VADriverContextP ctx,
>      pp_context->pp_dndi_context.current_out_obj_surface = NULL;
>      pp_context->pp_dndi_context.frame_order = -1;
>      pp_context->batch = batch;
> +
> +    avs_config = IS_IRONLAKE(i965->intel.device_info) ? &gen5_avs_config :
> +        &gen6_avs_config;
> +    avs_init_state(&pp_context->pp_avs_context.state, avs_config);
>  }
>
>  bool
> diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h
> index f0a277e..1350401 100755
> --- a/src/i965_post_processing.h
> +++ b/src/i965_post_processing.h
> @@ -29,6 +29,8 @@
>  #ifndef __I965_POST_PROCESSING_H__
>  #define __I965_POST_PROCESSING_H__
>
> +#include "i965_vpp_avs.h"
> +
>  #define MAX_PP_SURFACES                 48
>
>  #define I965_PP_FLAG_TOP_FIELD          1
> @@ -79,6 +81,7 @@ struct pp_scaling_context
>
>  struct pp_avs_context
>  {
> +    AVSState state;
>      int dest_x; /* in pixel */
>      int dest_y; /* in pixel */
>      int dest_w;
> diff --git a/src/i965_vpp_avs.c b/src/i965_vpp_avs.c
> index f5a0ba9..43abc25 100644
> --- a/src/i965_vpp_avs.c
> +++ b/src/i965_vpp_avs.c
> @@ -208,6 +208,9 @@ void
>  avs_init_state(AVSState *avs, const AVSConfig *config)
>  {
>      avs->config = config;
> +    avs->flags = 0;
> +    avs->scale_x = 0.0f;
> +    avs->scale_y = 0.0f;
>  }
>
>  /* Updates AVS coefficients for the supplied factors and quality level */
> @@ -216,6 +219,10 @@ avs_update_coefficients(AVSState *avs, float sx, float sy, uint32_t flags)
>  {
>      AVSGenCoeffsFunc gen_coeffs;
>
> +    flags &= VA_FILTER_SCALING_MASK;
> +    if (flags == avs->flags && sx == avs->scale_x && sy == avs->scale_y)
> +        return true;
> +
>      switch (flags & VA_FILTER_SCALING_MASK) {
>      case VA_FILTER_SCALING_HQ:
>          gen_coeffs = avs_gen_coeffs_lanczos;

Some fine-tuning I just thought about: only check for scale factors
for >= HQ scaling. The coefficients for bilinear (default) filtering
are not going to change.

> @@ -228,5 +235,9 @@ avs_update_coefficients(AVSState *avs, float sx, float sy, uint32_t flags)
>          assert(0 && "invalid set of coefficients generated");
>          return false;
>      }
> +
> +    avs->flags = flags;
> +    avs->scale_x = sx;
> +    avs->scale_y = sy;
>      return true;
>  }
> diff --git a/src/i965_vpp_avs.h b/src/i965_vpp_avs.h
> index b209468..ead4261 100644
> --- a/src/i965_vpp_avs.h
> +++ b/src/i965_vpp_avs.h
> @@ -81,6 +81,12 @@ struct avs_config {
>  struct avs_state {
>      /** Per-generation configuration parameters */
>      const AVSConfig *config;
> +    /** Scaling flags */
> +    uint32_t flags;
> +    /** Scaling factor on the X-axis (horizontal) */
> +    float scale_x;
> +    /** Scaling factor on the Y-axis (vertical) */
> +    float scale_y;
>      /** Coefficients for the polyphase scaler */
>      AVSCoeffs coeffs[AVS_MAX_PHASES + 1];
>  };
> --
> 1.9.1
>


More information about the Libva mailing list