[Libva] [PATCH intel-driver 6/8] vpp: cache calculation of AVS coefficients.
Gwenole Beauchesne
gb.devel at gmail.com
Mon Oct 13 23:43:31 PDT 2014
2014-10-13 19:27 GMT+02:00 Gwenole Beauchesne <gb.devel at gmail.com>:
> If scaling parameters don't change, i.e. if the same scaling algorithm
> and factors are used, there is no point in calculating the filter
> coefficients again. So, just cache them into the existing AVS context.
>
> Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
> ---
> src/gen8_post_processing.c | 13 ++++++-------
> src/i965_post_processing.c | 27 +++++++++++++--------------
> src/i965_post_processing.h | 3 +++
> src/i965_vpp_avs.c | 11 +++++++++++
> src/i965_vpp_avs.h | 6 ++++++
> 5 files changed, 39 insertions(+), 21 deletions(-)
>
> diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
> index 5632ca8..bd798f3 100644
> --- a/src/gen8_post_processing.c
> +++ b/src/gen8_post_processing.c
> @@ -39,7 +39,6 @@
> #include "i965_drv_video.h"
> #include "i965_post_processing.h"
> #include "i965_render.h"
> -#include "i965_vpp_avs.h"
> #include "intel_media.h"
>
> #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
> @@ -779,7 +778,7 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
> int width[3], height[3], pitch[3], offset[3];
> int src_width, src_height;
> unsigned char *cc_ptr;
> - AVSState avs;
> + AVSState * const avs = &pp_avs_context->state;
> float sx, sy;
>
> memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter));
> @@ -894,17 +893,15 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
> sampler_8x8->dw15.s1u = 113; /* s1u = 0 */
> sampler_8x8->dw15.s2u = 1203; /* s2u = 0 */
>
> - avs_init_state(&avs, &gen8_avs_config);
> -
> sx = (float)dst_rect->width / src_rect->width;
> sy = (float)dst_rect->height / src_rect->height;
> - avs_update_coefficients(&avs, sx, sy, 0);
> + avs_update_coefficients(avs, sx, sy, 0);
>
> - assert(avs.config->num_phases == 16);
> + assert(avs->config->num_phases == 16);
> for (i = 0; i <= 16; i++) {
> struct gen8_sampler_8x8_avs_coefficients * const sampler_8x8_state =
> &sampler_8x8->coefficients[i];
> - const AVSCoeffs * const coeffs = &avs.coeffs[i];
> + const AVSCoeffs * const coeffs = &avs->coeffs[i];
>
> sampler_8x8_state->dw0.table_0x_filter_c0 =
> intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
> @@ -1506,4 +1503,6 @@ gen8_post_processing_context_init(VADriverContextP ctx,
>
> pp_context->idrt_size = 5 * sizeof(struct gen8_interface_descriptor_data);
> pp_context->curbe_size = 256;
> +
> + avs_init_state(&pp_context->pp_avs_context.state, &gen8_avs_config);
> }
> diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c
> index 3e479a8..1e932fd 100755
> --- a/src/i965_post_processing.c
> +++ b/src/i965_post_processing.c
> @@ -38,7 +38,6 @@
> #include "i965_drv_video.h"
> #include "i965_post_processing.h"
> #include "i965_render.h"
> -#include "i965_vpp_avs.h"
> #include "intel_media.h"
>
> extern VAStatus
> @@ -2430,7 +2429,7 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
> int in_w, in_h, in_wpitch, in_hpitch;
> int out_w, out_h, out_wpitch, out_hpitch;
> int i;
> - AVSState avs;
> + AVSState * const avs = &pp_avs_context->state;
> float sx, sy;
>
> /* surface */
> @@ -2483,16 +2482,13 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
> sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
> memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
>
> - avs_init_state(&avs, IS_GEN6(i965->intel.device_info) ? &gen6_avs_config :
> - &gen5_avs_config);
> -
> sx = (float)dst_rect->width / src_rect->width;
> sy = (float)dst_rect->height / src_rect->height;
> - avs_update_coefficients(&avs, sx, sy, 0);
> + avs_update_coefficients(avs, sx, sy, 0);
>
> - assert(avs.config->num_phases == 16);
> + assert(avs->config->num_phases == 16);
> for (i = 0; i <= 16; i++) {
> - const AVSCoeffs * const coeffs = &avs.coeffs[i];
> + const AVSCoeffs * const coeffs = &avs->coeffs[i];
>
> sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
> intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
> @@ -2819,7 +2815,7 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
> int index, i;
> int width[3], height[3], pitch[3], offset[3];
> int src_width, src_height;
> - AVSState avs;
> + AVSState * const avs = &pp_avs_context->state;
> float sx, sy;
>
> /* source surface */
> @@ -2841,15 +2837,13 @@ gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
> sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
> memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
>
> - avs_init_state(&avs, &gen6_avs_config);
> -
> sx = (float)dst_rect->width / src_rect->width;
> sy = (float)dst_rect->height / src_rect->height;
> - avs_update_coefficients(&avs, sx, sy, 0);
> + avs_update_coefficients(avs, sx, sy, 0);
>
> - assert(avs.config->num_phases == 16);
> + assert(avs->config->num_phases == 16);
> for (i = 0; i <= 16; i++) {
> - const AVSCoeffs * const coeffs = &avs.coeffs[i];
> + const AVSCoeffs * const coeffs = &avs->coeffs[i];
>
> sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 =
> intel_format_convert(coeffs->y_k_h[0], 1, 6, 1);
> @@ -5376,6 +5370,7 @@ i965_post_processing_context_init(VADriverContextP ctx,
> struct i965_driver_data *i965 = i965_driver_data(ctx);
> int i;
> struct i965_post_processing_context *pp_context = data;
> + const AVSConfig *avs_config;
>
> if (IS_IRONLAKE(i965->intel.device_info)) {
> pp_context->urb.size = i965->intel.device_info->urb_size;
> @@ -5442,6 +5437,10 @@ i965_post_processing_context_init(VADriverContextP ctx,
> pp_context->pp_dndi_context.current_out_obj_surface = NULL;
> pp_context->pp_dndi_context.frame_order = -1;
> pp_context->batch = batch;
> +
> + avs_config = IS_IRONLAKE(i965->intel.device_info) ? &gen5_avs_config :
> + &gen6_avs_config;
> + avs_init_state(&pp_context->pp_avs_context.state, avs_config);
> }
>
> bool
> diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h
> index f0a277e..1350401 100755
> --- a/src/i965_post_processing.h
> +++ b/src/i965_post_processing.h
> @@ -29,6 +29,8 @@
> #ifndef __I965_POST_PROCESSING_H__
> #define __I965_POST_PROCESSING_H__
>
> +#include "i965_vpp_avs.h"
> +
> #define MAX_PP_SURFACES 48
>
> #define I965_PP_FLAG_TOP_FIELD 1
> @@ -79,6 +81,7 @@ struct pp_scaling_context
>
> struct pp_avs_context
> {
> + AVSState state;
> int dest_x; /* in pixel */
> int dest_y; /* in pixel */
> int dest_w;
> diff --git a/src/i965_vpp_avs.c b/src/i965_vpp_avs.c
> index f5a0ba9..43abc25 100644
> --- a/src/i965_vpp_avs.c
> +++ b/src/i965_vpp_avs.c
> @@ -208,6 +208,9 @@ void
> avs_init_state(AVSState *avs, const AVSConfig *config)
> {
> avs->config = config;
> + avs->flags = 0;
> + avs->scale_x = 0.0f;
> + avs->scale_y = 0.0f;
> }
>
> /* Updates AVS coefficients for the supplied factors and quality level */
> @@ -216,6 +219,10 @@ avs_update_coefficients(AVSState *avs, float sx, float sy, uint32_t flags)
> {
> AVSGenCoeffsFunc gen_coeffs;
>
> + flags &= VA_FILTER_SCALING_MASK;
> + if (flags == avs->flags && sx == avs->scale_x && sy == avs->scale_y)
> + return true;
> +
> switch (flags & VA_FILTER_SCALING_MASK) {
> case VA_FILTER_SCALING_HQ:
> gen_coeffs = avs_gen_coeffs_lanczos;
Some fine-tuning I just thought about: only check for scale factors
for >= HQ scaling. The coefficients for bilinear (default) filtering
are not going to change.
> @@ -228,5 +235,9 @@ avs_update_coefficients(AVSState *avs, float sx, float sy, uint32_t flags)
> assert(0 && "invalid set of coefficients generated");
> return false;
> }
> +
> + avs->flags = flags;
> + avs->scale_x = sx;
> + avs->scale_y = sy;
> return true;
> }
> diff --git a/src/i965_vpp_avs.h b/src/i965_vpp_avs.h
> index b209468..ead4261 100644
> --- a/src/i965_vpp_avs.h
> +++ b/src/i965_vpp_avs.h
> @@ -81,6 +81,12 @@ struct avs_config {
> struct avs_state {
> /** Per-generation configuration parameters */
> const AVSConfig *config;
> + /** Scaling flags */
> + uint32_t flags;
> + /** Scaling factor on the X-axis (horizontal) */
> + float scale_x;
> + /** Scaling factor on the Y-axis (vertical) */
> + float scale_y;
> /** Coefficients for the polyphase scaler */
> AVSCoeffs coeffs[AVS_MAX_PHASES + 1];
> };
> --
> 1.9.1
>
More information about the Libva
mailing list