[igt-dev] [PATCH i-g-t 2/4] lib/DG2: create flat ccs framebuffers with 4-tile
Jeevan B
jeevan.b at intel.com
Thu Apr 7 11:19:22 UTC 2022
From: Juha-Pekka Heikkilä <juha-pekka.heikkila at intel.com>
Add support for DG2 flat ccs framebuffers with tile-4.
Signed-off-by: Juha-Pekka Heikkilä <juha-pekka.heikkila at intel.com>
Signed-off-by: Jeevan B <jeevan.b at intel.com>
---
lib/gen9_render.h | 37 ++++++++++---
lib/igt_fb.c | 18 +++---
lib/intel_aux_pgtable.c | 6 +-
lib/intel_batchbuffer.c | 2 +-
lib/intel_bufops.c | 116 +++++++++++++++++++++++++++++++++++----
lib/intel_chipset.h | 3 +-
lib/rendercopy_gen9.c | 119 +++++++++++++++++++++++++++-------------
lib/veboxcopy_gen12.c | 110 +++++++++++++++++++++++++++----------
8 files changed, 314 insertions(+), 97 deletions(-)
diff --git a/lib/gen9_render.h b/lib/gen9_render.h
index 06d9718c..8c0b3b4b 100644
--- a/lib/gen9_render.h
+++ b/lib/gen9_render.h
@@ -59,9 +59,15 @@ struct gen9_surface_state {
uint32_t depth:11;
} ss3;
- struct {
- uint32_t minimum_array_element:27;
- uint32_t pad0:5;
+ union {
+ struct {
+ uint32_t minimum_array_element:27;
+ uint32_t pad0:5;
+ } skl;
+ struct {
+ uint32_t decompress_in_l3:1;
+ uint32_t pad0:31;
+ } dg2;
} ss4;
struct {
@@ -116,6 +122,15 @@ struct gen9_surface_state {
uint32_t media_compression:1;
uint32_t pad2:1;
} tgl;
+
+ struct {
+ uint32_t pad0:14;
+ uint32_t disable_support_for_multi_gpu_partial_writes:1;
+ uint32_t disable_support_for_multi_gpu_atomics:1;
+ uint32_t pad1:14;
+ uint32_t memory_compression_enable:1;
+ uint32_t memory_compression_type:1;
+ } dg2;
} ss7;
struct {
@@ -138,15 +153,21 @@ struct gen9_surface_state {
uint32_t aux_base_addr_hi;
} ss11;
- /* register can be used for either
- * clear value or depth clear value
- */
struct {
- uint32_t clear_address;
+ /*
+ * compression_format is used only dg2 onward.
+ * prior to dg2 full ss12 is used for the address
+ * but due to alignments bits 0..6 will be zero
+ * and asserted in code to be so
+ */
+ uint32_t compression_format:5;
+ uint32_t pad0:1;
+ uint32_t clear_address:26;
} ss12;
struct {
- uint32_t clear_address_hi;
+ uint32_t clear_address_hi:16;
+ uint32_t pad0:16;
} ss13;
struct {
diff --git a/lib/igt_fb.c b/lib/igt_fb.c
index eafbe7fd..55762426 100644
--- a/lib/igt_fb.c
+++ b/lib/igt_fb.c
@@ -584,7 +584,7 @@ static bool is_ccs_modifier(uint64_t modifier)
static bool is_ccs_plane(const struct igt_fb *fb, int plane)
{
- if (!is_ccs_modifier(fb->modifier))
+ if (!is_ccs_modifier(fb->modifier) || HAS_FLATCCS(intel_get_drm_devid(fb->fd)))
return false;
return plane >= fb->num_planes / 2;
@@ -686,7 +686,7 @@ static int fb_num_planes(const struct igt_fb *fb)
{
int num_planes = lookup_drm_format(fb->drm_format)->num_planes;
- if (is_ccs_modifier(fb->modifier))
+ if (is_ccs_modifier(fb->modifier) && !HAS_FLATCCS(intel_get_drm_devid(fb->fd)))
num_planes *= 2;
if (fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC)
@@ -763,7 +763,7 @@ static uint32_t calc_plane_stride(struct igt_fb *fb, int plane)
return ALIGN(min_stride, tile_width);
} else if (is_gen12_ccs_cc_plane(fb, plane)) {
/* clear color always fixed to 64 bytes */
- return 64;
+ return HAS_FLATCCS(intel_get_drm_devid(fb->fd)) ? 512 : 64;
} else if (is_gen12_ccs_plane(fb, plane)) {
/*
* The CCS surface stride is
@@ -2504,9 +2504,10 @@ igt_fb_create_intel_buf(int fd, struct buf_ops *bops,
if (is_ccs_modifier(fb->modifier)) {
igt_assert_eq(fb->strides[0] & 127, 0);
- if (is_gen12_ccs_modifier(fb->modifier))
- igt_assert_eq(fb->strides[1] & 63, 0);
- else
+ if (is_gen12_ccs_modifier(fb->modifier)) {
+ if (!HAS_FLATCCS(intel_get_drm_devid(fb->fd)))
+ igt_assert_eq(fb->strides[1] & 63, 0);
+ } else
igt_assert_eq(fb->strides[1] & 127, 0);
if (is_gen12_mc_ccs_modifier(fb->modifier))
@@ -2539,7 +2540,7 @@ igt_fb_create_intel_buf(int fd, struct buf_ops *bops,
buf->yuv_semiplanar_bpp = yuv_semiplanar_bpp(fb->drm_format);
if (is_ccs_modifier(fb->modifier)) {
- num_surfaces = fb->num_planes / 2;
+ num_surfaces = fb->num_planes / (HAS_FLATCCS(intel_get_drm_devid(fb->fd)) ? 1 : 2);
for (i = 0; i < num_surfaces; i++)
init_buf_ccs(buf, i,
fb->offsets[num_surfaces + i],
@@ -2560,6 +2561,9 @@ igt_fb_create_intel_buf(int fd, struct buf_ops *bops,
if (fb->modifier == I915_FORMAT_MOD_Y_TILED_GEN12_RC_CCS_CC)
buf->cc.offset = fb->offsets[2];
+ if (fb->modifier == I915_FORMAT_MOD_4_TILED_DG2_RC_CCS)
+ buf->cc.offset = fb->offsets[1];
+
return buf;
}
diff --git a/lib/intel_aux_pgtable.c b/lib/intel_aux_pgtable.c
index f5796fdf..e31a6c34 100644
--- a/lib/intel_aux_pgtable.c
+++ b/lib/intel_aux_pgtable.c
@@ -263,7 +263,8 @@ static uint64_t pgt_get_l1_flags(const struct intel_buf *buf, int surface_idx)
} entry = {
.e = {
.valid = 1,
- .tile_mode = buf->tiling == I915_TILING_Y ? 1 : 0,
+ .tile_mode = buf->tiling == I915_TILING_Y ? 1 :
+ (buf->tiling == I915_TILING_4 ? 2 : 0),
}
};
@@ -274,7 +275,8 @@ static uint64_t pgt_get_l1_flags(const struct intel_buf *buf, int surface_idx)
*/
igt_assert(buf->tiling == I915_TILING_Y ||
buf->tiling == I915_TILING_Yf ||
- buf->tiling == I915_TILING_Ys);
+ buf->tiling == I915_TILING_Ys ||
+ buf->tiling == I915_TILING_4);
entry.e.ycr = surface_idx > 0;
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index ebf3c598..f657cc41 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -1146,7 +1146,7 @@ igt_render_copyfunc_t igt_get_render_copyfunc(int devid)
copy = gen9_render_copyfunc;
else if (IS_GEN11(devid))
copy = gen11_render_copyfunc;
- else if (IS_DG2(devid))
+ else if (HAS_FLATCCS(devid))
copy = gen12p71_render_copyfunc;
else if (IS_GEN12(devid))
copy = gen12_render_copyfunc;
diff --git a/lib/intel_bufops.c b/lib/intel_bufops.c
index f13063fa..6ab665fa 100644
--- a/lib/intel_bufops.c
+++ b/lib/intel_bufops.c
@@ -89,6 +89,7 @@
#define TILE_Y TILE_DEF(I915_TILING_Y)
#define TILE_Yf TILE_DEF(I915_TILING_Yf)
#define TILE_Ys TILE_DEF(I915_TILING_Ys)
+#define TILE_4 TILE_DEF(I915_TILING_4)
#define CCS_OFFSET(buf) (buf->ccs[0].offset)
#define CCS_SIZE(gen, buf) \
@@ -105,16 +106,19 @@ struct buf_ops {
uint32_t supported_hw_tiles;
uint32_t swizzle_x;
uint32_t swizzle_y;
+ uint32_t swizzle_tile4;
bo_copy linear_to;
bo_copy linear_to_x;
bo_copy linear_to_y;
bo_copy linear_to_yf;
bo_copy linear_to_ys;
+ bo_copy linear_to_tile4;
bo_copy to_linear;
bo_copy x_to_linear;
bo_copy y_to_linear;
bo_copy yf_to_linear;
bo_copy ys_to_linear;
+ bo_copy tile4_to_linear;
};
static const char *tiling_str(uint32_t tiling)
@@ -125,6 +129,7 @@ static const char *tiling_str(uint32_t tiling)
case I915_TILING_Y: return "Y";
case I915_TILING_Yf: return "Yf";
case I915_TILING_Ys: return "Ys";
+ case I915_TILING_4: return "4";
default: return "UNKNOWN";
}
}
@@ -222,7 +227,8 @@ static void set_hw_tiled(struct buf_ops *bops, struct intel_buf *buf)
{
uint32_t ret_tiling, ret_swizzle;
- if (buf->tiling != I915_TILING_X && buf->tiling != I915_TILING_Y)
+ if (buf->tiling != I915_TILING_X && buf->tiling != I915_TILING_Y &&
+ buf->tiling != I915_TILING_4)
return;
if (!buf_ops_has_hw_fence(bops, buf->tiling)) {
@@ -320,6 +326,49 @@ static void *y_ptr(void *ptr,
return ptr + pos;
}
+/*
+ * (x,y) to memory location in 4-tiled surface
+ *
+ * coverted those divisions and multiplications to shifts and masks
+ * in hope this wouldn't be so slow.
+ */
+static void *tile4_ptr(void *ptr,
+ unsigned int x, unsigned int y,
+ unsigned int stride, unsigned int cpp)
+{
+ const int tile_width = 128;
+ const int tile_height = 32;
+ const int subtile_size = 64;
+ const int owords = 16;
+ int base, _x, _y, subtile, tile_x, tile_y;
+ int x_loc = x << __builtin_ctz(cpp);
+ int pos;
+
+ /* Pixel in tile via masks */
+ tile_x = x_loc & (tile_width - 1);
+ tile_y = y & (tile_height - 1);
+
+ /* subtile in 4k tile */
+ _x = tile_x >> __builtin_ctz(owords);
+ _y = tile_y >> 2;
+
+ /* 4-tile swizzle */
+ subtile = ((_y >> 1) << 4) + ((_y & 1) << 2) + (_x & 3) + ((_x & 4) << 1);
+
+ /* memory location */
+ base = (y >> __builtin_ctz(tile_height)) *
+ (stride << __builtin_ctz(tile_height)) +
+ (((x_loc >> __builtin_ctz(tile_width)) << __builtin_ctz(4096)));
+
+ pos = base + (subtile << __builtin_ctz(subtile_size)) +
+ ((tile_y & 3) << __builtin_ctz(owords)) +
+ (tile_x & (owords - 1));
+ igt_assert((pos & (cpp - 1)) == 0);
+ pos = pos >> __builtin_ctz(cpp);
+
+ return ptr + pos;
+}
+
static void *yf_ptr(void *ptr,
unsigned int x, unsigned int y,
unsigned int stride, unsigned int cpp)
@@ -365,6 +414,9 @@ static tile_fn __get_tile_fn_ptr(int tiling)
case I915_TILING_Yf:
fn = yf_ptr;
break;
+ case I915_TILING_4:
+ fn = tile4_ptr;
+ break;
case I915_TILING_Ys:
/* To be implemented */
break;
@@ -391,7 +443,7 @@ static void __copy_ccs(struct buf_ops *bops, struct intel_buf *buf,
void *map;
int gen;
- if (!buf->compression)
+ if (!buf->compression || HAS_FLATCCS(intel_get_drm_devid(bops->fd)))
return;
gen = bops->intel_gen;
@@ -551,6 +603,13 @@ static void copy_linear_to_ys(struct buf_ops *bops, struct intel_buf *buf,
__copy_linear_to(bops->fd, buf, linear, I915_TILING_Ys, 0);
}
+static void copy_linear_to_tile4(struct buf_ops *bops, struct intel_buf *buf,
+ uint32_t *linear)
+{
+ DEBUGFN();
+ __copy_linear_to(bops->fd, buf, linear, I915_TILING_4, bops->swizzle_tile4);
+}
+
static void __copy_to_linear(int fd, struct intel_buf *buf,
uint32_t *linear, int tiling, uint32_t swizzle)
{
@@ -601,6 +660,13 @@ static void copy_ys_to_linear(struct buf_ops *bops, struct intel_buf *buf,
__copy_to_linear(bops->fd, buf, linear, I915_TILING_Ys, 0);
}
+static void copy_tile4_to_linear(struct buf_ops *bops, struct intel_buf *buf,
+ uint32_t *linear)
+{
+ DEBUGFN();
+ __copy_to_linear(bops->fd, buf, linear, I915_TILING_4, 0);
+}
+
static void copy_linear_to_gtt(struct buf_ops *bops, struct intel_buf *buf,
uint32_t *linear)
{
@@ -756,7 +822,8 @@ static void __intel_buf_init(struct buf_ops *bops,
igt_require(bops->intel_gen >= 9);
igt_assert(req_tiling == I915_TILING_Y ||
- req_tiling == I915_TILING_Yf);
+ req_tiling == I915_TILING_Yf ||
+ req_tiling == I915_TILING_4);
/*
* On GEN12+ we align the main surface to 4 * 4 main surface
* tiles, which is 64kB. These 16 tiles are mapped by 4 AUX
@@ -778,13 +845,18 @@ static void __intel_buf_init(struct buf_ops *bops,
buf->bpp = bpp;
buf->compression = compression;
- aux_width = intel_buf_ccs_width(bops->intel_gen, buf);
- aux_height = intel_buf_ccs_height(bops->intel_gen, buf);
+ if (!HAS_FLATCCS(intel_get_drm_devid(bops->fd))) {
+ int aux_width, aux_height;
- buf->ccs[0].offset = buf->surface[0].stride * ALIGN(height, 32);
- buf->ccs[0].stride = aux_width;
+ aux_width = intel_buf_ccs_width(bops->intel_gen, buf);
+ aux_height = intel_buf_ccs_height(bops->intel_gen, buf);
- size = buf->ccs[0].offset + aux_width * aux_height;
+ buf->ccs[0].offset = buf->surface[0].stride * ALIGN(height, 32);
+ buf->ccs[0].stride = aux_width;
+ size = buf->ccs[0].offset + aux_width * aux_height;
+ } else {
+ size = buf->ccs[0].offset;
+ }
} else {
if (tiling) {
devid = intel_get_drm_devid(bops->fd);
@@ -1176,17 +1248,19 @@ void intel_buf_write_aux_to_png(struct intel_buf *buf, const char *filename)
#define DEFAULT_BUFOPS(__gen_start, __gen_end) \
.gen_start = __gen_start, \
.gen_end = __gen_end, \
- .supported_hw_tiles = TILE_X | TILE_Y, \
+ .supported_hw_tiles = TILE_X | TILE_Y | TILE_4, \
.linear_to = copy_linear_to_wc, \
.linear_to_x = copy_linear_to_gtt, \
.linear_to_y = copy_linear_to_gtt, \
.linear_to_yf = copy_linear_to_yf, \
.linear_to_ys = copy_linear_to_ys, \
+ .linear_to_tile4 = copy_linear_to_tile4, \
.to_linear = copy_wc_to_linear, \
.x_to_linear = copy_gtt_to_linear, \
.y_to_linear = copy_gtt_to_linear, \
.yf_to_linear = copy_yf_to_linear, \
- .ys_to_linear = copy_ys_to_linear
+ .ys_to_linear = copy_ys_to_linear, \
+ .tile4_to_linear = copy_tile4_to_linear
struct buf_ops buf_ops_arr[] = {
{
@@ -1201,7 +1275,7 @@ struct buf_ops buf_ops_arr[] = {
{
DEFAULT_BUFOPS(12, 12),
- .supported_tiles = TILE_NONE | TILE_X | TILE_Y | TILE_Yf | TILE_Ys,
+ .supported_tiles = TILE_NONE | TILE_X | TILE_Y | TILE_Yf | TILE_Ys | TILE_4,
},
};
@@ -1230,6 +1304,8 @@ static bool probe_hw_tiling(struct buf_ops *bops, uint32_t tiling,
bops->swizzle_x = buf_swizzle;
else if (tiling == I915_TILING_Y)
bops->swizzle_y = buf_swizzle;
+ else if (tiling == I915_TILING_4)
+ bops->swizzle_tile4 = buf_swizzle;
*swizzling_supported = buf_swizzle == phys_swizzle;
}
@@ -1390,6 +1466,24 @@ static struct buf_ops *__buf_ops_create(int fd, bool check_idempotency)
}
}
+ if (is_hw_tiling_supported(bops, I915_TILING_4)) {
+ bool swizzling_supported;
+ bool supported = probe_hw_tiling(bops, I915_TILING_4,
+ &swizzling_supported);
+
+ if (!swizzling_supported) {
+ igt_debug("Swizzling for 4 is not supported\n");
+ bops->supported_tiles &= ~TILE_4;
+ }
+
+ igt_debug("4 fence support: %s\n", bool_str(supported));
+ if (!supported) {
+ bops->supported_hw_tiles &= ~TILE_4;
+ bops->linear_to_tile4 = copy_linear_to_tile4;
+ bops->tile4_to_linear = copy_tile4_to_linear;
+ }
+ }
+
/* Disable other tiling format functions if not supported */
if (!is_tiling_supported(bops, I915_TILING_Yf)) {
igt_debug("Yf format not supported\n");
diff --git a/lib/intel_chipset.h b/lib/intel_chipset.h
index db75a829..4d9f4623 100644
--- a/lib/intel_chipset.h
+++ b/lib/intel_chipset.h
@@ -219,6 +219,7 @@ void intel_check_pch(void);
#define HAS_4TILE(devid) (intel_get_device_info(devid)->has_4tile)
-#define HAS_FLATCCS(devid) (intel_get_device_info(devid)->has_flatccs)
+/* use HAS_4TILE here as all devices with 4-tile have flat ccs. */
+#define HAS_FLATCCS(devid) HAS_4TILE(devid)
#endif /* _INTEL_CHIPSET_H */
diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index 6c45efb4..b0e775f6 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -165,7 +165,8 @@ intel_get_uc_mocs(int fd) {
/* Mostly copy+paste from gen6, except height, width, pitch moved */
static uint32_t
-gen8_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst) {
+gen8_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst,
+ bool fast_clear) {
struct gen9_surface_state *ss;
uint32_t write_domain, read_domain;
uint64_t address;
@@ -192,15 +193,26 @@ gen8_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst) {
case 64: ss->ss0.surface_format = SURFACEFORMAT_R16G16B16A16_FLOAT; break;
default: igt_assert(0);
}
- ss->ss0.render_cache_read_write = 1;
ss->ss0.vertical_alignment = 1; /* align 4 */
- ss->ss0.horizontal_alignment = 1; /* align 4 */
+ ss->ss0.horizontal_alignment = 1; /* align 4 or HALIGN_32 on display ver >= 13*/
+
+ if (HAS_FLATCCS(ibb->devid)) {
+ /*
+ * mocs table version 1 index 3 groub wb use l3
+ */
+ ss->ss1.memory_object_control = 3 << 1;
+ ss->ss5.mip_tail_start_lod = 0;
+ } else {
+ ss->ss0.render_cache_read_write = 1;
+ ss->ss1.memory_object_control = intel_get_uc_mocs(i915);
+ ss->ss5.mip_tail_start_lod = 1; /* needed with trmode */
+ }
+
if (buf->tiling == I915_TILING_X)
ss->ss0.tiled_mode = 2;
else if (buf->tiling != I915_TILING_NONE)
ss->ss0.tiled_mode = 3;
- ss->ss1.memory_object_control = intel_get_uc_mocs(i915);
if (intel_buf_pxp(buf))
ss->ss1.memory_object_control |= 1;
@@ -208,7 +220,6 @@ gen8_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst) {
ss->ss5.trmode = 1;
else if (buf->tiling == I915_TILING_Ys)
ss->ss5.trmode = 2;
- ss->ss5.mip_tail_start_lod = 1; /* needed with trmode */
address = intel_bb_offset_reloc(ibb, buf->handle,
read_domain, write_domain,
@@ -229,20 +240,21 @@ gen8_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst) {
if (buf->compression == I915_COMPRESSION_MEDIA)
ss->ss7.tgl.media_compression = 1;
else if (buf->compression == I915_COMPRESSION_RENDER) {
- igt_assert(buf->ccs[0].stride);
ss->ss6.aux_mode = 0x5; /* AUX_CCS_E */
- ss->ss6.aux_pitch = (buf->ccs[0].stride / 128) - 1;
+ if (buf->ccs[0].stride) {
+ ss->ss6.aux_pitch = (buf->ccs[0].stride / 128) - 1;
- address = intel_bb_offset_reloc_with_delta(ibb, buf->handle,
- read_domain, write_domain,
- (buf->cc.offset ? (1 << 10) : 0) | buf->ccs[0].offset,
- intel_bb_offset(ibb) + 4 * 10,
- buf->addr.offset);
- ss->ss10.aux_base_addr = (address + buf->ccs[0].offset) >> 12;
- ss->ss11.aux_base_addr_hi = (address + buf->ccs[0].offset) >> 32;
+ address = intel_bb_offset_reloc_with_delta(ibb, buf->handle,
+ read_domain, write_domain,
+ (buf->cc.offset ? (1 << 10) : 0) | buf->ccs[0].offset,
+ intel_bb_offset(ibb) + 4 * 10,
+ buf->addr.offset);
+ ss->ss10.aux_base_addr = (address + buf->ccs[0].offset) >> 12;
+ ss->ss11.aux_base_addr_hi = (address + buf->ccs[0].offset) >> 32;
+ }
- if (buf->cc.offset) {
+ if (fast_clear || (buf->cc.offset && !HAS_FLATCCS(ibb->devid))) {
igt_assert(buf->compression == I915_COMPRESSION_RENDER);
ss->ss10.clearvalue_addr_enable = 1;
@@ -252,8 +264,28 @@ gen8_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst) {
buf->cc.offset,
intel_bb_offset(ibb) + 4 * 12,
buf->addr.offset);
- ss->ss12.clear_address = address + buf->cc.offset;
+ /*
+ * If this assert doesn't hold below clear address will be
+ * written wrong.
+ */
+ igt_assert(__builtin_ctzl(address + buf->cc.offset) >= 6 &&
+ (__builtin_clzl(address + buf->cc.offset) >= 16));
+
+ ss->ss12.clear_address = (address + buf->cc.offset) >> 6;
ss->ss13.clear_address_hi = (address + buf->cc.offset) >> 32;
+ } else if (HAS_FLATCCS(ibb->devid)) {
+ ss->ss7.dg2.memory_compression_type = 0;
+ ss->ss7.dg2.memory_compression_enable = 0;
+ ss->ss7.dg2.disable_support_for_multi_gpu_partial_writes = 1;
+ ss->ss7.dg2.disable_support_for_multi_gpu_atomics = 1;
+
+ /*
+ * For now here is coming only 32bpp rgb format
+ * which is marked below as B8G8R8X8_UNORM = '8'
+ * If here ever arrive other formats below need to be
+ * fixed to take that into account.
+ */
+ ss->ss12.compression_format = 8;
}
}
@@ -266,14 +298,15 @@ gen8_bind_surfaces(struct intel_bb *ibb,
const struct intel_buf *dst)
{
uint32_t *binding_table, binding_table_offset;
+ bool fast_clear = !src;
binding_table = intel_bb_ptr_align(ibb, 32);
binding_table_offset = intel_bb_ptr_add_return_prev_offset(ibb, 32);
- binding_table[0] = gen8_bind_buf(ibb, dst, 1);
+ binding_table[0] = gen8_bind_buf(ibb, dst, 1, fast_clear);
if (src != NULL)
- binding_table[1] = gen8_bind_buf(ibb, src, 0);
+ binding_table[1] = gen8_bind_buf(ibb, src, 0, false);
return binding_table_offset;
}
@@ -856,12 +889,14 @@ gen8_emit_ps(struct intel_bb *ibb, uint32_t kernel, bool fast_clear) {
static void
gen9_emit_depth(struct intel_bb *ibb)
{
+ bool need_10dw = HAS_FLATCCS(ibb->devid);
+
intel_bb_out(ibb, GEN8_3DSTATE_WM_DEPTH_STENCIL | (4 - 2));
intel_bb_out(ibb, 0);
intel_bb_out(ibb, 0);
intel_bb_out(ibb, 0);
- intel_bb_out(ibb, GEN7_3DSTATE_DEPTH_BUFFER | (8-2));
+ intel_bb_out(ibb, GEN7_3DSTATE_DEPTH_BUFFER | (need_10dw ? (10-2) : (8-2)));
intel_bb_out(ibb, 0);
intel_bb_out(ibb, 0);
intel_bb_out(ibb, 0);
@@ -869,6 +904,10 @@ gen9_emit_depth(struct intel_bb *ibb)
intel_bb_out(ibb, 0);
intel_bb_out(ibb, 0);
intel_bb_out(ibb, 0);
+ if (need_10dw) {
+ intel_bb_out(ibb, 0);
+ intel_bb_out(ibb, 0);
+ }
intel_bb_out(ibb, GEN8_3DSTATE_HIER_DEPTH_BUFFER | (5-2));
intel_bb_out(ibb, 0);
@@ -1080,7 +1119,7 @@ void _gen9_render_op(struct intel_bb *ibb,
gen9_emit_state_base_address(ibb);
- if (IS_DG2(ibb->devid) || intel_gen(ibb->devid) > 12) {
+ if (HAS_FLATCCS(ibb->devid) || intel_gen(ibb->devid) > 12) {
intel_bb_out(ibb, GEN4_3DSTATE_BINDING_TABLE_POOL_ALLOC | 2);
intel_bb_emit_reloc(ibb, ibb->handle,
I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
@@ -1197,18 +1236,12 @@ void gen12p71_render_copyfunc(struct intel_bb *ibb,
struct intel_buf *dst,
unsigned int dst_x, unsigned int dst_y)
{
- struct aux_pgtable_info pgtable_info = { };
-
- gen12_aux_pgtable_init(&pgtable_info, ibb, src, dst);
-
_gen9_render_op(ibb, src, src_x, src_y,
width, height, dst, dst_x, dst_y,
- pgtable_info.pgtable_buf,
+ NULL,
NULL,
gen12p71_render_copy,
sizeof(gen12p71_render_copy));
-
- gen12_aux_pgtable_cleanup(ibb, &pgtable_info);
}
void gen12_render_clearfunc(struct intel_bb *ibb,
@@ -1217,16 +1250,24 @@ void gen12_render_clearfunc(struct intel_bb *ibb,
unsigned int width, unsigned int height,
const float clear_color[4])
{
- struct aux_pgtable_info pgtable_info = { };
-
- gen12_aux_pgtable_init(&pgtable_info, ibb, NULL, dst);
-
- _gen9_render_op(ibb, NULL, 0, 0,
- width, height, dst, dst_x, dst_y,
- pgtable_info.pgtable_buf,
- clear_color,
- gen12_render_copy,
- sizeof(gen12_render_copy));
-
- gen12_aux_pgtable_cleanup(ibb, &pgtable_info);
+ if (!HAS_FLATCCS(ibb->devid)) {
+ struct aux_pgtable_info pgtable_info = { };
+ gen12_aux_pgtable_init(&pgtable_info, ibb, NULL, dst);
+
+ _gen9_render_op(ibb, NULL, 0, 0,
+ width, height, dst, dst_x, dst_y,
+ pgtable_info.pgtable_buf,
+ clear_color,
+ gen12_render_copy,
+ sizeof(gen12_render_copy));
+
+ gen12_aux_pgtable_cleanup(ibb, &pgtable_info);
+ } else {
+ _gen9_render_op(ibb, NULL, 0, 0,
+ width, height, dst, dst_x, dst_y,
+ NULL,
+ clear_color,
+ gen12p71_render_copy,
+ sizeof(gen12p71_render_copy));
+ }
}
diff --git a/lib/veboxcopy_gen12.c b/lib/veboxcopy_gen12.c
index 17564493..e6a40f51 100644
--- a/lib/veboxcopy_gen12.c
+++ b/lib/veboxcopy_gen12.c
@@ -53,19 +53,25 @@ struct vebox_surface_state {
uint32_t width:14;
uint32_t height:14;
} ss2;
- struct {
+ union {
+ struct {
#define VEBOX_TILE_WALK_XMAJOR 0
#define VEBOX_TILE_WALK_YMAJOR 1
- uint32_t tile_walk:1;
- uint32_t tiled_surface:1;
- uint32_t chroma_half_pitch:1;
- uint32_t surface_pitch:17;
- uint32_t chroma_interleave:1;
- uint32_t lsb_packed_enable:1;
- uint32_t bayer_input_alignment:2;
- uint32_t bayer_pattern_format:1;
- uint32_t bayer_pattern_offset:2;
- uint32_t surface_format:5;
+ uint32_t tile_walk:1;
+ uint32_t tiled_surface:1;
+ uint32_t chroma_half_pitch:1;
+ uint32_t surface_pitch:17;
+ uint32_t chroma_interleave:1;
+ uint32_t lsb_packed_enable:1;
+ uint32_t bayer_input_alignment:2;
+ uint32_t bayer_pattern_format:1;
+ uint32_t bayer_pattern_offset:2;
+ uint32_t surface_format:5;
+ }tgl;
+ struct {
+ uint32_t tile_mode:2;
+ uint32_t pad0:30;
+ } dg2;
} ss3;
struct {
uint32_t u_y_offset:15;
@@ -82,9 +88,15 @@ struct vebox_surface_state {
uint32_t frame_x_offset:15;
uint32_t pad:2;
} ss6;
- struct {
- uint32_t derived_surface_pitch:17;
- uint32_t pad:15;
+ union {
+ struct {
+ uint32_t derived_surface_pitch:17;
+ uint32_t pad:15;
+ } skl;
+ struct {
+ uint32_t pad:27;
+ uint32_t compression_format:5;
+ } dg2;
} ss7;
struct {
uint32_t skin_score_output_surface_pitch:17;
@@ -166,17 +178,46 @@ static void emit_surface_state_cmd(struct intel_bb *ibb,
ss->ss2.height = height - 1;
ss->ss2.width = width - 1;
- ss->ss3.surface_format = format;
+ ss->ss3.tgl.surface_format = format;
if (format_is_interleaved_yuv(format))
- ss->ss3.chroma_interleave = 1;
- ss->ss3.surface_pitch = pitch - 1;
- ss->ss3.tile_walk = (tiling == I915_TILING_Y) ||
- (tiling == I915_TILING_Yf);
- ss->ss3.tiled_surface = tiling != I915_TILING_NONE;
+ ss->ss3.tgl.chroma_interleave = 1;
+ ss->ss3.tgl.surface_pitch = pitch - 1;
ss->ss4.u_y_offset = uv_offset / pitch;
- ss->ss7.derived_surface_pitch = pitch - 1;
+ if (HAS_FLATCCS(ibb->devid)) {
+ /*
+ * 4-tile = 3 (Tile 4)
+ */
+ ss->ss3.dg2.tile_mode = (tiling != I915_TILING_NONE) ? 3 : 0;
+
+ switch (format) {
+ case R8G8B8A8_UNORM:
+ ss->ss7.dg2.compression_format = 0xa;
+ break;
+ case PLANAR_420_8:
+ ss->ss7.dg2.compression_format = 0xf;
+ break;
+ case PLANAR_420_16:
+ ss->ss7.dg2.compression_format = 8;
+ break;
+ case YCRCB_NORMAL:
+ ss->ss7.dg2.compression_format = 3;
+ break;
+ case PACKED_444A_8:
+ ss->ss7.dg2.compression_format = 0x9;
+ break;
+ default:
+ igt_assert(0);
+ }
+ } else {
+ ss->ss3.tgl.tile_walk = (tiling == I915_TILING_Y) ||
+ (tiling == I915_TILING_Yf) ||
+ (tiling == I915_TILING_4);
+ ss->ss3.tgl.tiled_surface = tiling != I915_TILING_NONE;
+ }
+
+ ss->ss7.skl.derived_surface_pitch = pitch - 1;
intel_bb_ptr_add(ibb, sizeof(*ss));
}
@@ -203,7 +244,11 @@ static void emit_tiling_convert_cmd(struct intel_bb *ibb,
tc->tc1_2.input_compression_type =
src->compression == I915_COMPRESSION_RENDER;
}
- tc->tc1_2.input_tiled_resource_mode = src->tiling == I915_TILING_Yf;
+
+ if (HAS_FLATCCS(ibb->devid))
+ tc->tc1_2.input_mocs_idx = 3;
+ else
+ tc->tc1_2.input_tiled_resource_mode = src->tiling == I915_TILING_Yf;
reloc_delta = tc->tc1_2_l;
igt_assert(src->addr.offset == ALIGN(src->addr.offset, 0x1000));
@@ -220,7 +265,12 @@ static void emit_tiling_convert_cmd(struct intel_bb *ibb,
tc->tc3_4.output_compression_type =
dst->compression == I915_COMPRESSION_RENDER;
}
- tc->tc3_4.output_tiled_resource_mode = dst->tiling == I915_TILING_Yf;
+
+ if (HAS_FLATCCS(ibb->devid))
+ tc->tc3_4.output_mocs_idx = 3;
+ else
+ tc->tc3_4.output_tiled_resource_mode = dst->tiling == I915_TILING_Yf;
+
reloc_delta = tc->tc3_4_l;
igt_assert(dst->addr.offset == ALIGN(dst->addr.offset, 0x1000));
@@ -255,10 +305,13 @@ void gen12_vebox_copyfunc(struct intel_bb *ibb,
intel_bb_add_intel_buf(ibb, dst, true);
intel_bb_add_intel_buf(ibb, src, false);
- intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
- gen12_aux_pgtable_init(&aux_pgtable_info, ibb, src, dst);
- aux_pgtable_state = gen12_create_aux_pgtable_state(ibb,
- aux_pgtable_info.pgtable_buf);
+ if (!HAS_FLATCCS(ibb->devid)) {
+ intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
+ gen12_aux_pgtable_init(&aux_pgtable_info, ibb, src, dst);
+ aux_pgtable_state = gen12_create_aux_pgtable_state(ibb,
+ aux_pgtable_info.pgtable_buf);
+
+ }
intel_bb_ptr_set(ibb, 0);
gen12_emit_aux_pgtable_state(ibb, aux_pgtable_state, false);
@@ -311,5 +364,6 @@ void gen12_vebox_copyfunc(struct intel_bb *ibb,
intel_bb_reset(ibb, false);
- gen12_aux_pgtable_cleanup(ibb, &aux_pgtable_info);
+ if (!HAS_FLATCCS(ibb->devid))
+ gen12_aux_pgtable_cleanup(ibb, &aux_pgtable_info);
}
--
2.35.1
More information about the igt-dev
mailing list