Mesa (master): turnip: implement VK_EXT_sample_locations

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 22 19:01:25 UTC 2020


Module: Mesa
Branch: master
Commit: a92d2e11095d9f1f8bc1188fd3d2b8391acc4591
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a92d2e11095d9f1f8bc1188fd3d2b8391acc4591

Author: Jonathan Marek <jonathan at marek.ca>
Date:   Tue Apr 21 12:14:23 2020 -0400

turnip: implement VK_EXT_sample_locations

Passes tests in:

dEQP-VK.pipeline.multisample.sample_locations_ext.*

Note that these tests fail because of gl_PrimitiveID not working correctly:

dEQP-VK.pipeline.multisample.sample_locations_ext.verify_location.*

Signed-off-by: Jonathan Marek <jonathan at marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4665>

---

 src/freedreno/registers/a6xx.xml              | 39 ++++++++++++-------
 src/freedreno/vulkan/tu_cmd_buffer.c          | 25 +++++--------
 src/freedreno/vulkan/tu_device.c              | 29 ++++++++++++++
 src/freedreno/vulkan/tu_extensions.py         |  1 +
 src/freedreno/vulkan/tu_pipeline.c            | 54 +++++++++++++++++++++++++++
 src/freedreno/vulkan/tu_private.h             |  6 ++-
 src/freedreno/vulkan/tu_shader.c              | 10 ++++-
 src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 10 ++---
 src/gallium/drivers/freedreno/a6xx/fd6_gmem.c |  9 -----
 9 files changed, 135 insertions(+), 48 deletions(-)

diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml
index a4a3af50ece..a2bda8aee10 100644
--- a/src/freedreno/registers/a6xx.xml
+++ b/src/freedreno/registers/a6xx.xml
@@ -1933,12 +1933,25 @@ to upconvert to 32b float internally?
 		<bitfield name="MSAA_DISABLE" pos="2" type="boolean"/>
 	</reg32>
 
-	<!-- always 0x0 -->
-	<reg32 offset="0x80a4" name="GRAS_UNKNOWN_80A4"/>
-	<!-- always 0x0 -->
-	<reg32 offset="0x80a5" name="GRAS_UNKNOWN_80A5"/>
-	<!-- always 0x0 -->
-	<reg32 offset="0x80a6" name="GRAS_UNKNOWN_80A6"/>
+	<bitset name="a6xx_sample_config" inline="yes">
+		<bitfield name="LOCATION_ENABLE" pos="1" type="boolean"/>
+	</bitset>
+
+	<bitset name="a6xx_sample_locations" inline="yes">
+		<bitfield name="SAMPLE_0_X" low="0" high="3" radix="4" type="fixed"/>
+		<bitfield name="SAMPLE_0_Y" low="4" high="7" radix="4" type="fixed"/>
+		<bitfield name="SAMPLE_1_X" low="8" high="11" radix="4" type="fixed"/>
+		<bitfield name="SAMPLE_1_Y" low="12" high="15" radix="4" type="fixed"/>
+		<bitfield name="SAMPLE_2_X" low="16" high="19" radix="4" type="fixed"/>
+		<bitfield name="SAMPLE_2_Y" low="20" high="23" radix="4" type="fixed"/>
+		<bitfield name="SAMPLE_3_X" low="24" high="27" radix="4" type="fixed"/>
+		<bitfield name="SAMPLE_3_Y" low="28" high="31" radix="4" type="fixed"/>
+	</bitset>
+
+	<reg32 offset="0x80a4" name="GRAS_SAMPLE_CONFIG" type="a6xx_sample_config"/>
+	<reg32 offset="0x80a5" name="GRAS_SAMPLE_LOCATION_0" type="a6xx_sample_locations"/>
+	<reg32 offset="0x80a6" name="GRAS_SAMPLE_LOCATION_1" type="a6xx_sample_locations"/>
+
 	<!-- always 0x0 -->
 	<reg32 offset="0x80af" name="GRAS_UNKNOWN_80AF"/>
 
@@ -2058,12 +2071,9 @@ to upconvert to 32b float internally?
 		<bitfield name="MSAA_DISABLE" pos="2" type="boolean"/>
 	</reg32>
 
-	<!-- always 0x0 ? -->
-	<reg32 offset="0x8804" name="RB_UNKNOWN_8804"/>
-	<!-- always 0x0 ? -->
-	<reg32 offset="0x8805" name="RB_UNKNOWN_8805"/>
-	<!-- always 0x0 ? -->
-	<reg32 offset="0x8806" name="RB_UNKNOWN_8806"/>
+	<reg32 offset="0x8804" name="RB_SAMPLE_CONFIG" type="a6xx_sample_config"/>
+	<reg32 offset="0x8805" name="RB_SAMPLE_LOCATION_0" type="a6xx_sample_locations"/>
+	<reg32 offset="0x8806" name="RB_SAMPLE_LOCATION_1" type="a6xx_sample_locations"/>
 
 	<!--
 	note: maybe not actually called RB_RENDER_CONTROLn (since RB_RENDER_CNTL
@@ -3132,8 +3142,9 @@ to upconvert to 32b float internally?
 	<reg64 offset="0xb302" name="SP_TP_BORDER_COLOR_BASE_ADDR" type="address"/>
 	<reg32 offset="0xb302" name="SP_TP_BORDER_COLOR_BASE_ADDR_LO"/>
 	<reg32 offset="0xb303" name="SP_TP_BORDER_COLOR_BASE_ADDR_HI"/>
-	<!-- always 0x0 ? -->
-	<reg32 offset="0xb304" name="SP_TP_UNKNOWN_B304"/>
+	<reg32 offset="0xb304" name="SP_TP_SAMPLE_CONFIG" type="a6xx_sample_config"/>
+	<reg32 offset="0xb305" name="SP_TP_SAMPLE_LOCATION_0" type="a6xx_sample_locations"/>
+	<reg32 offset="0xb306" name="SP_TP_SAMPLE_LOCATION_1" type="a6xx_sample_locations"/>
 
 	<reg32 offset="0xb309" name="SP_TP_UNKNOWN_B309"/>
 
diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c
index 6cf74b3f0c4..f830f07e2ef 100644
--- a/src/freedreno/vulkan/tu_cmd_buffer.c
+++ b/src/freedreno/vulkan/tu_cmd_buffer.c
@@ -692,15 +692,6 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
 
       tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
       tu_cs_emit(cs, 0x0);
-
-      tu_cs_emit_regs(cs,
-                      A6XX_RB_UNKNOWN_8804(0));
-
-      tu_cs_emit_regs(cs,
-                      A6XX_SP_TP_UNKNOWN_B304(0));
-
-      tu_cs_emit_regs(cs,
-                      A6XX_GRAS_UNKNOWN_80A4(0));
    } else {
       tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
       tu_cs_emit(cs, 0x1);
@@ -852,14 +843,7 @@ tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
    tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9981, 0x3);
    tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9E72, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9108, 0x3);
-   tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2);
-   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8804, 0);
-   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 0);
-   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A5, 0);
-   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A6, 0);
-   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8805, 0);
-   tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8806, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8878, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8879, 0);
    tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);
@@ -2152,6 +2136,15 @@ tu_CmdSetStencilReference(VkCommandBuffer commandBuffer,
    cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
 }
 
+void
+tu_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer,
+                            const VkSampleLocationsInfoEXT* pSampleLocationsInfo)
+{
+   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
+
+   tu6_emit_sample_locations(&cmd->draw_cs, pSampleLocationsInfo);
+}
+
 void
 tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
                       uint32_t commandBufferCount,
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index 006f7d4e73b..73c264417eb 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -928,6 +928,22 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
          properties->transformFeedbackDraw = true;
          break;
       }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
+         VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
+            (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
+         properties->sampleLocationSampleCounts = 0;
+         if (pdevice->supported_extensions.EXT_sample_locations) {
+            properties->sampleLocationSampleCounts =
+               VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
+         }
+         properties->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
+         properties->sampleLocationCoordinateRange[0] = 0.0f;
+         properties->sampleLocationCoordinateRange[1] = 0.9375f;
+         properties->sampleLocationSubPixelBits = 4;
+         properties->variableSampleLocations = true;
+         break;
+      }
+
       default:
          break;
       }
@@ -2319,3 +2335,16 @@ tu_GetDeviceGroupPeerMemoryFeatures(
                           VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
                           VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
 }
+
+void tu_GetPhysicalDeviceMultisamplePropertiesEXT(
+   VkPhysicalDevice                            physicalDevice,
+   VkSampleCountFlagBits                       samples,
+   VkMultisamplePropertiesEXT*                 pMultisampleProperties)
+{
+   TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
+
+   if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->supported_extensions.EXT_sample_locations)
+      pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
+   else
+      pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
+}
diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py
index 574e9411c08..498b38613b6 100644
--- a/src/freedreno/vulkan/tu_extensions.py
+++ b/src/freedreno/vulkan/tu_extensions.py
@@ -77,6 +77,7 @@ EXTENSIONS = [
     Extension('VK_KHR_external_memory_fd',                1, True),
     Extension('VK_EXT_external_memory_dma_buf',           1, True),
     Extension('VK_EXT_image_drm_format_modifier',         1, False),
+    Extension('VK_EXT_sample_locations',                  1, 'device->gpu_id == 650'),
     Extension('VK_EXT_transform_feedback',                1, True),
     Extension('VK_ANDROID_native_buffer',                 1, True),
     Extension('VK_KHR_external_semaphore_fd',             1, True),
diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c
index c3a467ebbfc..d9ffa2411fe 100644
--- a/src/freedreno/vulkan/tu_pipeline.c
+++ b/src/freedreno/vulkan/tu_pipeline.c
@@ -325,6 +325,8 @@ tu_dynamic_state_bit(VkDynamicState state)
       return TU_DYNAMIC_STENCIL_WRITE_MASK;
    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
       return TU_DYNAMIC_STENCIL_REFERENCE;
+   case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
+      return TU_DYNAMIC_SAMPLE_LOCATIONS;
    default:
       unreachable("invalid dynamic state");
       return 0;
@@ -1733,6 +1735,47 @@ tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor)
                      A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1));
 }
 
+void
+tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc)
+{
+   if (!samp_loc) {
+      tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 1);
+      tu_cs_emit(cs, 0);
+
+      tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 1);
+      tu_cs_emit(cs, 0);
+
+      tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 1);
+      tu_cs_emit(cs, 0);
+      return;
+   }
+
+   assert(samp_loc->sampleLocationsPerPixel == samp_loc->sampleLocationsCount);
+   assert(samp_loc->sampleLocationGridSize.width == 1);
+   assert(samp_loc->sampleLocationGridSize.height == 1);
+
+   uint32_t sample_config =
+      A6XX_RB_SAMPLE_CONFIG_LOCATION_ENABLE;
+   uint32_t sample_locations = 0;
+   for (uint32_t i = 0; i < samp_loc->sampleLocationsCount; i++) {
+      sample_locations |=
+         (A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(samp_loc->pSampleLocations[i].x) |
+          A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(samp_loc->pSampleLocations[i].y)) << i*8;
+   }
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 2);
+   tu_cs_emit(cs, sample_config);
+   tu_cs_emit(cs, sample_locations);
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 2);
+   tu_cs_emit(cs, sample_config);
+   tu_cs_emit(cs, sample_locations);
+
+   tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 2);
+   tu_cs_emit(cs, sample_config);
+   tu_cs_emit(cs, sample_locations);
+}
+
 static void
 tu6_emit_gras_unknowns(struct tu_cs *cs)
 {
@@ -2415,6 +2458,17 @@ tu_pipeline_builder_parse_multisample_and_color_blend(
    if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS))
       tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants);
 
+   if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SAMPLE_LOCATIONS)) {
+      const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
+         vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
+      const VkSampleLocationsInfoEXT *samp_loc = NULL;
+
+      if (sample_locations && sample_locations->sampleLocationsEnable)
+         samp_loc = &sample_locations->sampleLocationsInfo;
+
+      tu6_emit_sample_locations(&blend_cs, samp_loc);
+   }
+
    tu6_emit_blend_control(&blend_cs, blend_enable_mask, msaa_info);
 
    pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs);
diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h
index eb026998293..dfd9d5101b5 100644
--- a/src/freedreno/vulkan/tu_private.h
+++ b/src/freedreno/vulkan/tu_private.h
@@ -722,7 +722,8 @@ enum tu_dynamic_state_bits
    TU_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7,
    TU_DYNAMIC_STENCIL_REFERENCE = 1 << 8,
    TU_DYNAMIC_DISCARD_RECTANGLE = 1 << 9,
-   TU_DYNAMIC_ALL = (1 << 10) - 1,
+   TU_DYNAMIC_SAMPLE_LOCATIONS = 1 << 10,
+   TU_DYNAMIC_ALL = (1 << 11) - 1,
 };
 
 struct tu_vertex_binding
@@ -1265,6 +1266,9 @@ tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport);
 void
 tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor);
 
+void
+tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc);
+
 void
 tu6_emit_gras_su_cntl(struct tu_cs *cs,
                       uint32_t gras_su_cntl,
diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c
index aa07f5df066..d58209c404c 100644
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@@ -26,6 +26,7 @@
 #include "spirv/nir_spirv.h"
 #include "util/mesa-sha1.h"
 #include "nir/nir_xfb_info.h"
+#include "vk_util.h"
 
 #include "ir3/ir3_nir.h"
 
@@ -605,9 +606,16 @@ tu_shader_compile_options_init(
          }
       }
 
+      const VkPipelineMultisampleStateCreateInfo *msaa_info = pipeline_info->pMultisampleState;
+      const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations =
+         vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
       if (!pipeline_info->pRasterizationState->rasterizerDiscardEnable &&
-          pipeline_info->pMultisampleState->rasterizationSamples > 1)
+          (msaa_info->rasterizationSamples > 1 ||
+          /* also set msaa key when sample location is not the default
+           * since this affects varying interpolation */
+           (sample_locations && sample_locations->sampleLocationsEnable))) {
          msaa = true;
+      }
    }
 
    *options = (struct tu_shader_compile_options) {
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index 8fcb603bfa1..024b139d130 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -1322,17 +1322,13 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
 	WRITE(REG_A6XX_PC_UNKNOWN_9981, 0x3);
 	WRITE(REG_A6XX_PC_UNKNOWN_9E72, 0);
 	WRITE(REG_A6XX_VPC_UNKNOWN_9108, 0x3);
-	WRITE(REG_A6XX_SP_TP_UNKNOWN_B304, 0);
+	WRITE(REG_A6XX_SP_TP_SAMPLE_CONFIG, 0);
 	/* NOTE blob seems to (mostly?) use 0xb2 for SP_TP_UNKNOWN_B309
 	 * but this seems to kill texture gather offsets.
 	 */
 	WRITE(REG_A6XX_SP_TP_UNKNOWN_B309, 0xa2);
-	WRITE(REG_A6XX_RB_UNKNOWN_8804, 0);
-	WRITE(REG_A6XX_GRAS_UNKNOWN_80A4, 0);
-	WRITE(REG_A6XX_GRAS_UNKNOWN_80A5, 0);
-	WRITE(REG_A6XX_GRAS_UNKNOWN_80A6, 0);
-	WRITE(REG_A6XX_RB_UNKNOWN_8805, 0);
-	WRITE(REG_A6XX_RB_UNKNOWN_8806, 0);
+	WRITE(REG_A6XX_RB_SAMPLE_CONFIG, 0);
+	WRITE(REG_A6XX_GRAS_SAMPLE_CONFIG, 0);
 	WRITE(REG_A6XX_RB_UNKNOWN_8878, 0);
 	WRITE(REG_A6XX_RB_UNKNOWN_8879, 0);
 	WRITE(REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
index f56b3422d55..b42287486a8 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
@@ -889,15 +889,6 @@ fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
 
 		OUT_PKT7(ring, CP_SET_MODE, 1);
 		OUT_RING(ring, 0x0);
-
-		OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8804, 1);
-		OUT_RING(ring, 0x0);
-
-		OUT_PKT4(ring, REG_A6XX_SP_TP_UNKNOWN_B304, 1);
-		OUT_RING(ring, 0x0);
-
-		OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_80A4, 1);
-		OUT_RING(ring, 0x0);
 	} else {
 		set_window_offset(ring, x1, y1);
 



More information about the mesa-commit mailing list