[Libva] [PATCH 7/8] Execute the 10-bit scaling for P010 surface on Gen9+

Fri Nov 11 07:21:07 UTC 2016

On 11/11/2016 03:05 PM, Xiang, Haihao wrote:
> On Wed, 2016-11-09 at 14:39 -0500, Zhao Yakui wrote:
>> Now the 10-bit scaling based on GPU shader is supported on Gen9+. In
>> such
>> case it will use the 10-bit scaling based on GPU shader instead of
>> three-steps
>> by using VEBOX(VEBOX->NV12->Scale NV12->VEBOX).
>
> The 2nd step (NV12 to scale NV12) doesn't use VEBOX.

Yes. This is only to indicate that it needs the intermediate buffer for 
the corresponding conversion.

I will update the change log in next version.

>
>>   Of course when the size is
>> not changed, it still falls back to VEBOX.
>>
>>
>> Signed-off-by: Zhao Yakui<yakui.zhao at intel.com>
>> ---
>>   src/gen75_picture_process.c | 50
>> +++++++++++++++++++++++++++++++++++++++------
>>   1 file changed, 44 insertions(+), 6 deletions(-)
>>
>> diff --git a/src/gen75_picture_process.c
>> b/src/gen75_picture_process.c
>> index 069088a..4f7f794 100644
>> --- a/src/gen75_picture_process.c
>> +++ b/src/gen75_picture_process.c
>> @@ -38,6 +38,7 @@
>>   #include "i965_drv_video.h"
>>   #include "i965_post_processing.h"
>>   #include "gen75_picture_process.h"
>> +#include "gen8_post_processing.h"
>>
>>   extern struct hw_context *
>>   i965_proc_context_init(VADriverContextP ctx,
>> @@ -91,6 +92,21 @@ gen75_vpp_vebox(VADriverContextP ctx,
>>        return va_status;
>>   }
>>
>> +static int intel_gpe_support_10bit_scaling(struct
>> intel_video_process_context *proc_ctx)
>> +{
>> +    struct i965_proc_context *gpe_proc_ctx;
>> +
>> +    if (!proc_ctx || !proc_ctx->vpp_fmt_cvt_ctx)
>> +        return 0;
>> +
>> +    gpe_proc_ctx = (struct i965_proc_context *)proc_ctx-
>>> vpp_fmt_cvt_ctx;
>> +
>> +    if (gpe_proc_ctx->pp_context.scaling_context_initialized)
>> +        return 1;
>> +    else
>> +        return 0;
>> +}
>> +
>>   VAStatus
>>   gen75_proc_picture(VADriverContextP ctx,
>>                      VAProfile profile,
>> @@ -165,12 +181,6 @@ gen75_proc_picture(VADriverContextP ctx,
>>           i965_check_alloc_surface_bo(ctx, obj_dst_surf, is_tiled,
>> fourcc, sampling);
>>       }
>>
>> -    proc_ctx->surface_render_output_object = obj_dst_surf;
>> -    proc_ctx->surface_pipeline_input_object = obj_src_surf;
>> -    assert(pipeline_param->num_filters<= 4);
>> -
>> -    int vpp_stage1 = 0, vpp_stage2 = 1, vpp_stage3 = 0;
>> -
>>       if (pipeline_param->surface_region) {
>>           src_rect.x = pipeline_param->surface_region->x;
>>           src_rect.y = pipeline_param->surface_region->y;
>> @@ -195,6 +205,34 @@ gen75_proc_picture(VADriverContextP ctx,
>>           dst_rect.height = obj_dst_surf->orig_height;
>>       }
>>
>> +    if (pipeline_param->num_filters == 0 || pipeline_param->filters
>> == NULL ){
>> +        if ((obj_src_surf->fourcc == VA_FOURCC_P010)&&
>> +            (obj_dst_surf->fourcc == VA_FOURCC_P010)&&
>> +            (src_rect.width != dst_rect.width ||
>> +                 src_rect.height != dst_rect.height)&&
>> +            intel_gpe_support_10bit_scaling(proc_ctx)) {
>> +            struct i965_proc_context *gpe_proc_ctx;
>> +            struct i965_surface src_surface, dst_surface;
>> +
>> +            src_surface.base = (struct object_base *)obj_src_surf;
>> +            src_surface.type = I965_SURFACE_TYPE_SURFACE;
>> +            dst_surface.base = (struct object_base *)obj_dst_surf;
>> +            dst_surface.type = I965_SURFACE_TYPE_SURFACE;
>> +            gpe_proc_ctx = (struct i965_proc_context *)proc_ctx-
>>> vpp_fmt_cvt_ctx;
>> +
>> +            return gen9_p010_scaling_post_processing(ctx,
>> &gpe_proc_ctx->pp_context,
>> +&src_surface,
>> &src_rect,
>> +&dst_surface,
>> &dst_rect);
>> +        }
>> +    }
>> +
>> +    proc_ctx->surface_render_output_object = obj_dst_surf;
>> +    proc_ctx->surface_pipeline_input_object = obj_src_surf;
>> +    assert(pipeline_param->num_filters<= 4);
>> +
>> +    int vpp_stage1 = 0, vpp_stage2 = 1, vpp_stage3 = 0;
>> +
>> +
>>       if(obj_src_surf->fourcc == VA_FOURCC_P010) {
>>           vpp_stage1 = 1;
>>           vpp_stage2 = 0;