[Libva] [PATCH 4/8] work around hw limitation(dword alignment) of horizontal offset
Zhao Halley
halley.zhao at intel.com
Sun Aug 12 23:50:11 PDT 2012
on dst surface left edge (nv12 avs)
---
src/i965_post_processing.c | 78 ++++++++++++++++++++++++++++++--------------
src/i965_post_processing.h | 21 +++++++-----
2 files changed, 66 insertions(+), 33 deletions(-)
diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c
index a0b6c13..e583317 100755
--- a/src/i965_post_processing.c
+++ b/src/i965_post_processing.c
@@ -55,6 +55,10 @@
#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
#define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
+#define GPU_ASM_BLOCK_WIDTH 16
+#define GPU_ASM_BLOCK_HEIGHT 8
+#define GPU_ASM_X_OFFSET_ALIGNMENT 4
+
static const uint32_t pp_null_gen5[][4] = {
#include "shaders/post_processing/gen5_6/null.g4b.gen5"
};
@@ -1687,23 +1691,36 @@ pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context
return 0;
}
-static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, int width, int height)
+static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
{
- #define BLOCK_WIDTH 16
- #define BLOCK_HEIGHT 8
- int i = 0;
- if (width%BLOCK_WIDTH){
- pp_context->block_horizontal_mask = (1 << (width%BLOCK_WIDTH)) - 1;
+ int i;
+ /* x offset of dest surface must be dword aligned.
+ * so we have to extend dst surface on left edge, and mask out pixels not interested
+ */
+ if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
+ pp_context->block_horizontal_mask_left = 0;
+ for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
+ {
+ pp_context->block_horizontal_mask_left |= 1<<i;
+ }
+ }
+ else {
+ pp_context->block_horizontal_mask_left = 0xffff;
+ }
+
+ int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
+ if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
+ pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
}
else {
- pp_context->block_horizontal_mask = 0xffff;
+ pp_context->block_horizontal_mask_right = 0xffff;
}
- if (height%BLOCK_HEIGHT){
- pp_context->block_vertical_mask = (1 << (height%BLOCK_HEIGHT)) - 1;
+ if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
+ pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
}
else {
- pp_context->block_vertical_mask = 0xff;
+ pp_context->block_vertical_mask_bottom = 0xff;
}
}
@@ -2251,19 +2268,21 @@ pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context
pp_context->pp_y_steps = pp_avs_y_steps;
pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
- pp_avs_context->dest_x = dst_rect->x;
+ int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
+ float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
+ pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
pp_avs_context->dest_y = dst_rect->y;
- pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
+ pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
- pp_avs_context->src_normalized_x = (float)src_rect->x / in_w;
+ pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
- pp_avs_context->src_w = src_rect->width;
+ pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
pp_avs_context->src_h = src_rect->height;
pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
- pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
+ pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
pp_inline_parameter->grf5.block_count_x = 1; /* M x 1 */
pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
pp_inline_parameter->grf6.video_step_delta = 0.0;
@@ -3485,8 +3504,6 @@ ironlake_pp_initialize(
else
va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
- calculate_boundary_block_mask(pp_context, dst_rect->width, dst_rect->height);
-
return va_status;
}
@@ -3623,6 +3640,8 @@ gen6_pp_initialize(
else
va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
+ calculate_boundary_block_mask(pp_context, dst_rect);
+
return va_status;
}
@@ -3799,27 +3818,36 @@ static void update_block_mask_parameter(struct i965_post_processing_context *pp_
struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
pp_inline_parameter->grf5.block_vertical_mask = 0xff;
- pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
- pp_inline_parameter->grf6.block_vertical_mask = pp_context->block_vertical_mask;
- pp_inline_parameter->grf6.block_horizontal_mask = pp_context->block_horizontal_mask;
+ pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
+ // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
+ pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
+ pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
+ pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
/* 1 x N */
if (x_steps == 1) {
if (y == y_steps-1) {
- pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask;
+ pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
}
else {
- pp_inline_parameter->grf6.block_vertical_mask = 0xff;
+ pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
}
}
/* M x 1 */
if (y_steps == 1) {
- if (x == x_steps-1) {
- pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask;
+ if (x == 0) { // all blocks in this group are on the left edge
+ pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
+ pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left;
+ }
+ else if (x == x_steps-1) {
+ pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
+ pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
}
else {
- pp_inline_parameter->grf6.block_horizontal_mask = 0xffff;
+ pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
+ pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
+ pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
}
}
diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h
index 50ba88d..463712c 100755
--- a/src/i965_post_processing.h
+++ b/src/i965_post_processing.h
@@ -313,13 +313,17 @@ struct pp_inline_parameter
/* AVS r6.0 */
float video_step_delta;
- /* r6.1 */
- unsigned int block_horizontal_mask:16;
- unsigned int block_vertical_mask:8;
- unsigned int pad:8;
+ /* r6.1 */ // sizeof(int) == 4?
+ unsigned int block_horizontal_mask_right:16;
+ unsigned int block_vertical_mask_bottom:8;
+ unsigned int pad1:8;
- /* r6.2-r6.7 */
- unsigned int padx[6];
+ /* r6.2 */
+ unsigned int block_horizontal_mask_middle:16;
+ unsigned int pad2:16;
+
+ /* r6.3-r6.7 */
+ unsigned int padx[5];
} grf6;
};
@@ -459,8 +463,9 @@ struct i965_post_processing_context
struct intel_batchbuffer *batch;
- unsigned int block_horizontal_mask:16;
- unsigned int block_vertical_mask:8;
+ unsigned int block_horizontal_mask_left:16;
+ unsigned int block_horizontal_mask_right:16;
+ unsigned int block_vertical_mask_bottom:8;
};
struct i965_proc_context
--
1.7.9.5
More information about the Libva
mailing list