[Mesa-dev] [PATCH] radv: Support multiple devices.
Andres Rodriguez
andresx7 at gmail.com
Mon Jan 16 20:41:09 UTC 2017
On 2017-01-16 03:34 PM, Bas Nieuwenhuizen wrote:
> On Mon, Jan 16, 2017 at 9:30 PM, Andres Rodriguez <andresx7 at gmail.com> wrote:
>> Small comments inline for a bit of extra error handling. Still digesting
>> radv and vulkan, so the feedback might be a bit incorrect.
>>
>>
>>
>> On 2017-01-16 02:59 PM, Bas Nieuwenhuizen wrote:
>>> Pretty straightforward. Also deleted the big comment block as it
>>> is a pretty standard pattern for filling in arrays.
>>>
>>> Also removed the error message on non-existent devices, as getting
>>> 7 errors printed to the console each time you enumerate the
>>> devices is pretty confusing.
>>>
>>> Signed-off-by: Bas Nieuwenhuizen <basni at google.com>
>>> ---
>>>
>>> Tested with 1 GPU using radeon and 1 using amdgpu, but the radeon winsys
>>> is
>>> not ready yet.
>>>
>>> src/amd/vulkan/radv_cmd_buffer.c | 18 ++++-----
>>> src/amd/vulkan/radv_device.c | 73
>>> ++++++++++++------------------------
>>> src/amd/vulkan/radv_image.c | 18 ++++-----
>>> src/amd/vulkan/radv_pipeline.c | 10 ++---
>>> src/amd/vulkan/radv_pipeline_cache.c | 10 ++---
>>> src/amd/vulkan/radv_private.h | 4 +-
>>> src/amd/vulkan/radv_query.c | 6 +--
>>> src/amd/vulkan/radv_wsi.c | 4 +-
>>> src/amd/vulkan/si_cmd_buffer.c | 16 ++++----
>>> 9 files changed, 67 insertions(+), 92 deletions(-)
>>>
>>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c
>>> b/src/amd/vulkan/radv_cmd_buffer.c
>>> index 651b1dd452e..01e77f8a7a9 100644
>>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>>> @@ -117,7 +117,7 @@ radv_dynamic_state_copy(struct radv_dynamic_state
>>> *dest,
>>> bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
>>> {
>>> return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
>>> -
>>> cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK;
>>> + cmd_buffer->device->physical_device->rad_info.chip_class >=
>>> CIK;
>>> }
>>> enum ring_type radv_queue_family_to_ring(int f) {
>>> @@ -645,7 +645,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer
>>> *cmd_buffer,
>>> int index,
>>> struct radv_color_buffer_info *cb)
>>> {
>>> - bool is_vi =
>>> cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI;
>>> + bool is_vi =
>>> cmd_buffer->device->physical_device->rad_info.chip_class >= VI;
>>> radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE
>>> + index * 0x3c, 11);
>>> radeon_emit(cmd_buffer->cs, cb->cb_color_base);
>>> radeon_emit(cmd_buffer->cs, cb->cb_color_pitch);
>>> @@ -911,13 +911,13 @@ void radv_set_db_count_control(struct
>>> radv_cmd_buffer *cmd_buffer)
>>> uint32_t db_count_control;
>>> if(!cmd_buffer->state.active_occlusion_queries) {
>>> - if
>>> (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
>>> + if
>>> (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
>>> db_count_control = 0;
>>> } else {
>>> db_count_control =
>>> S_028004_ZPASS_INCREMENT_DISABLE(1);
>>> }
>>> } else {
>>> - if
>>> (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
>>> + if
>>> (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
>>> db_count_control =
>>> S_028004_PERFECT_ZPASS_COUNTS(1) |
>>> S_028004_SAMPLE_RATE(0) | /* TODO: set
>>> this to the number of samples of the current framebuffer */
>>> S_028004_ZPASS_ENABLE(1) |
>>> @@ -1129,7 +1129,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer
>>> *cmd_buffer)
>>> va += offset + buffer->offset;
>>> desc[0] = va;
>>> desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
>>> S_008F04_STRIDE(stride);
>>> - if
>>> (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class <= CIK &&
>>> stride)
>>> + if
>>> (cmd_buffer->device->physical_device->rad_info.chip_class <= CIK && stride)
>>> desc[2] = (buffer->size - offset -
>>> cmd_buffer->state.pipeline->va_format_size[i]) / stride + 1;
>>> else
>>> desc[2] = buffer->size - offset;
>>> @@ -1161,7 +1161,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer
>>> *cmd_buffer)
>>> radeon_set_context_reg(cmd_buffer->cs,
>>> R_028B54_VGT_SHADER_STAGES_EN, 0);
>>> ia_multi_vgt_param =
>>> si_get_ia_multi_vgt_param(cmd_buffer);
>>> - if
>>> (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
>>> + if
>>> (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
>>> radeon_set_context_reg_idx(cmd_buffer->cs,
>>> R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
>>> radeon_set_context_reg_idx(cmd_buffer->cs,
>>> R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
>>> radeon_set_uconfig_reg_idx(cmd_buffer->cs,
>>> R_030908_VGT_PRIMITIVE_TYPE, 1, cmd_buffer->state.pipeline->graphics.prim);
>>> @@ -1433,7 +1433,7 @@ VkResult radv_BeginCommandBuffer(
>>> RADV_CMD_FLAG_INV_SMEM_L1 |
>>> RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER |
>>> RADV_CMD_FLAG_INV_GLOBAL_L2;
>>> -
>>> si_init_config(&cmd_buffer->device->instance->physicalDevice, cmd_buffer);
>>> +
>>> si_init_config(cmd_buffer->device->physical_device, cmd_buffer);
>>> radv_set_db_count_control(cmd_buffer);
>>> si_emit_cache_flush(cmd_buffer);
>>> break;
>>> @@ -1443,7 +1443,7 @@ VkResult radv_BeginCommandBuffer(
>>> RADV_CMD_FLAG_INV_VMEM_L1 |
>>> RADV_CMD_FLAG_INV_SMEM_L1 |
>>> RADV_CMD_FLAG_INV_GLOBAL_L2;
>>> -
>>> si_init_compute(&cmd_buffer->device->instance->physicalDevice, cmd_buffer);
>>> +
>>> si_init_compute(cmd_buffer->device->physical_device, cmd_buffer);
>>> si_emit_cache_flush(cmd_buffer);
>>> break;
>>> case RADV_QUEUE_TRANSFER:
>>> @@ -2628,7 +2628,7 @@ static void write_event(struct radv_cmd_buffer
>>> *cmd_buffer,
>>> /* TODO: this is overkill. Probably should figure something out
>>> from
>>> * the stage mask. */
>>> - if
>>> (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class == CIK) {
>>> + if (cmd_buffer->device->physical_device->rad_info.chip_class ==
>>> CIK) {
>>> radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
>>> radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
>>> EVENT_INDEX(5));
>>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>>> index 08a1bf301ee..6a692fc1233 100644
>>> --- a/src/amd/vulkan/radv_device.c
>>> +++ b/src/amd/vulkan/radv_device.c
>>> @@ -190,8 +190,7 @@ radv_physical_device_init(struct radv_physical_device
>>> *device,
>>> fd = open(path, O_RDWR | O_CLOEXEC);
>>> if (fd < 0)
>>> - return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
>>> - "failed to open %s: %m", path);
>>> + return VK_ERROR_INCOMPATIBLE_DRIVER;
>>> version = drmGetVersion(fd);
>>> if (!version) {
>>> @@ -365,10 +364,8 @@ void radv_DestroyInstance(
>>> {
>>> RADV_FROM_HANDLE(radv_instance, instance, _instance);
>>> - if (instance->physicalDeviceCount > 0) {
>>> - /* We support at most one physical device. */
>>> - assert(instance->physicalDeviceCount == 1);
>>> - radv_physical_device_finish(&instance->physicalDevice);
>>> + for (int i = 0; i < instance->physicalDeviceCount; ++i) {
>>> + radv_physical_device_finish(instance->physicalDevices +
>>> i);
>> instance->physicalDeviceCount should be within bounds, but a check wouldn't
>> hurt.
>>> }
>>> VG(VALGRIND_DESTROY_MEMPOOL(instance));
>>> @@ -388,52 +385,29 @@ VkResult radv_EnumeratePhysicalDevices(
>>> if (instance->physicalDeviceCount < 0) {
>>> char path[20];
>>> + instance->physicalDeviceCount = 0;
>>> for (unsigned i = 0; i < 8; i++) {
>>> snprintf(path, sizeof(path), "/dev/dri/renderD%d",
>>> 128 + i);
>>> - result =
>>> radv_physical_device_init(&instance->physicalDevice,
>>> - instance,
>>> path);
>>> - if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
>>> - break;
>>> - }
>>> -
>>> - if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
>>> - instance->physicalDeviceCount = 0;
>>> - } else if (result == VK_SUCCESS) {
>>> - instance->physicalDeviceCount = 1;
>>> - } else {
>>> - return result;
>>> + result =
>>> radv_physical_device_init(instance->physicalDevices +
>>> +
>>> instance->physicalDeviceCount,
>> Here we should have a bounds check to make sure physicalDeviceCount is not
>> greater than the array size. It would be an application error to hit that
>> case, but if it happens the call will probably silently succeed even though
>> it is using memory it doesn't own.
> Note that the loop runs only 8 times, and we fill at most one element
> of the array each iteration, so we never run out of the array. I'll
> add the constant though.
That sounds good. FWIW,
Reviewed-by: Andres Rodriguez <andresx7 at gmail.com>
>
>>> + instance,
>>> path);
>>> + if (result == VK_SUCCESS)
>>> + ++instance->physicalDeviceCount;
>>> + else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
>>> + return result;
>>> }
>>> }
>>> - /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is
>>> NULL;
>>> - * otherwise it's an inout parameter.
>>> - *
>>> - * The Vulkan spec (git aaed022) says:
>>> - *
>>> - * pPhysicalDeviceCount is a pointer to an unsigned integer
>>> variable
>>> - * that is initialized with the number of devices the
>>> application is
>>> - * prepared to receive handles to. pname:pPhysicalDevices is
>>> pointer to
>>> - * an array of at least this many VkPhysicalDevice handles
>>> [...].
>>> - *
>>> - * Upon success, if pPhysicalDevices is NULL,
>>> vkEnumeratePhysicalDevices
>>> - * overwrites the contents of the variable pointed to by
>>> - * pPhysicalDeviceCount with the number of physical devices in
>>> in the
>>> - * instance; otherwise, vkEnumeratePhysicalDevices overwrites
>>> - * pPhysicalDeviceCount with the number of physical handles
>>> written to
>>> - * pPhysicalDevices.
>>> - */
>>> if (!pPhysicalDevices) {
>>> *pPhysicalDeviceCount = instance->physicalDeviceCount;
>>> - } else if (*pPhysicalDeviceCount >= 1) {
>>> - pPhysicalDevices[0] =
>>> radv_physical_device_to_handle(&instance->physicalDevice);
>>> - *pPhysicalDeviceCount = 1;
>>> - } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount)
>>> {
>>> - return VK_INCOMPLETE;
>>> } else {
>>> - *pPhysicalDeviceCount = 0;
>>> + *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount,
>>> instance->physicalDeviceCount);
>>> + for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i)
>>> + pPhysicalDevices[i] =
>>> radv_physical_device_to_handle(instance->physicalDevices + i);
>>> }
>>> - return VK_SUCCESS;
>>> + return *pPhysicalDeviceCount < instance->physicalDeviceCount ?
>>> VK_INCOMPLETE
>>> + :
>>> VK_SUCCESS;
>>> }
>>> void radv_GetPhysicalDeviceFeatures(
>>> @@ -775,6 +749,7 @@ VkResult radv_CreateDevice(
>>> device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
>>> device->instance = physical_device->instance;
>>> + device->physical_device = physical_device;
>>> device->debug_flags = device->instance->debug_flags;
>>> @@ -1658,14 +1633,14 @@ radv_initialise_color_surface(struct radv_device
>>> *device,
>>> if (iview->image->fmask.size) {
>>> va = device->ws->buffer_get_va(iview->bo) +
>>> iview->image->offset + iview->image->fmask.offset;
>>> - if (device->instance->physicalDevice.rad_info.chip_class
>>>> = CIK)
>>> + if (device->physical_device->rad_info.chip_class >= CIK)
>>> cb->cb_color_pitch |=
>>> S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
>>> cb->cb_color_attrib |=
>>> S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
>>> cb->cb_color_fmask = va >> 8;
>>> cb->cb_color_fmask_slice =
>>> S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
>>> } else {
>>> /* This must be set for fast clear to work without FMASK.
>>> */
>>> - if (device->instance->physicalDevice.rad_info.chip_class
>>>> = CIK)
>>> + if (device->physical_device->rad_info.chip_class >= CIK)
>>> cb->cb_color_pitch |=
>>> S_028C64_FMASK_TILE_MAX(pitch_tile_max);
>>> cb->cb_color_attrib |=
>>> S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
>>> cb->cb_color_fmask = cb->cb_color_base;
>>> @@ -1725,7 +1700,7 @@ radv_initialise_color_surface(struct radv_device
>>> *device,
>>> if (iview->image->surface.dcc_size && level_info->dcc_enabled)
>>> cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
>>> - if (device->instance->physicalDevice.rad_info.chip_class >= VI) {
>>> + if (device->physical_device->rad_info.chip_class >= VI) {
>>> unsigned max_uncompressed_block_size = 2;
>>> if (iview->image->samples > 1) {
>>> if (iview->image->surface.bpe == 1)
>>> @@ -1740,7 +1715,7 @@ radv_initialise_color_surface(struct radv_device
>>> *device,
>>> /* This must be set for fast clear to work without FMASK. */
>>> if (!iview->image->fmask.size &&
>>> - device->instance->physicalDevice.rad_info.chip_class == SI) {
>>> + device->physical_device->rad_info.chip_class == SI) {
>>> unsigned bankh =
>>> util_logbase2(iview->image->surface.bankh);
>>> cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
>>> }
>>> @@ -1800,8 +1775,8 @@ radv_initialise_ds_surface(struct radv_device
>>> *device,
>>> else
>>> ds->db_stencil_info =
>>> S_028044_FORMAT(V_028044_STENCIL_INVALID);
>>> - if (device->instance->physicalDevice.rad_info.chip_class >= CIK) {
>>> - struct radeon_info *info =
>>> &device->instance->physicalDevice.rad_info;
>>> + if (device->physical_device->rad_info.chip_class >= CIK) {
>>> + struct radeon_info *info =
>>> &device->physical_device->rad_info;
>>> unsigned tiling_index =
>>> iview->image->surface.tiling_index[level];
>>> unsigned stencil_index =
>>> iview->image->surface.stencil_tiling_index[level];
>>> unsigned macro_index =
>>> iview->image->surface.macro_tile_index;
>>> @@ -2031,7 +2006,7 @@ radv_init_sampler(struct radv_device *device,
>>> uint32_t max_aniso = pCreateInfo->anisotropyEnable &&
>>> pCreateInfo->maxAnisotropy > 1.0 ?
>>> (uint32_t)
>>> pCreateInfo->maxAnisotropy : 0;
>>> uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
>>> - bool is_vi = (device->instance->physicalDevice.rad_info.chip_class
>>>> = VI);
>>> + bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
>>> sampler->state[0] =
>>> (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
>>>
>>> S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
>>> diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
>>> index 2a41c8e323e..f75f0088495 100644
>>> --- a/src/amd/vulkan/radv_image.c
>>> +++ b/src/amd/vulkan/radv_image.c
>>> @@ -112,7 +112,7 @@ radv_init_surface(struct radv_device *device,
>>> VK_IMAGE_USAGE_STORAGE_BIT)) ||
>>> (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
>>> (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
>>> - device->instance->physicalDevice.rad_info.chip_class < VI ||
>>> + device->physical_device->rad_info.chip_class < VI ||
>>> create_info->scanout || (device->debug_flags &
>>> RADV_DEBUG_NO_DCC) ||
>>> !radv_is_colorbuffer_format_supported(pCreateInfo->format,
>>> &blendable))
>>> surface->flags |= RADEON_SURF_DISABLE_DCC;
>>> @@ -123,7 +123,7 @@ radv_init_surface(struct radv_device *device,
>>> #define ATI_VENDOR_ID 0x1002
>>> static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
>>> {
>>> - return (ATI_VENDOR_ID << 16) |
>>> device->instance->physicalDevice.rad_info.pci_id;
>>> + return (ATI_VENDOR_ID << 16) |
>>> device->physical_device->rad_info.pci_id;
>>> }
>>> static inline unsigned
>>> @@ -326,7 +326,7 @@ si_make_texture_descriptor(struct radv_device *device,
>>> /* The last dword is unused by hw. The shader uses it to
>>> clear
>>> * bits in the first dword of sampler state.
>>> */
>>> - if (device->instance->physicalDevice.rad_info.chip_class
>>> <= CIK && image->samples <= 1) {
>>> + if (device->physical_device->rad_info.chip_class <= CIK &&
>>> image->samples <= 1) {
>>> if (first_level == last_level)
>>> state[7] = C_008F30_MAX_ANISO_RATIO;
>>> else
>>> @@ -517,8 +517,8 @@ radv_image_get_cmask_info(struct radv_device *device,
>>> struct radv_image *image,
>>> struct radv_cmask_info *out)
>>> {
>>> - unsigned pipe_interleave_bytes =
>>> device->instance->physicalDevice.rad_info.pipe_interleave_bytes;
>>> - unsigned num_pipes =
>>> device->instance->physicalDevice.rad_info.num_tile_pipes;
>>> + unsigned pipe_interleave_bytes =
>>> device->physical_device->rad_info.pipe_interleave_bytes;
>>> + unsigned num_pipes =
>>> device->physical_device->rad_info.num_tile_pipes;
>>> unsigned cl_width, cl_height;
>>> switch (num_pipes) {
>>> @@ -589,8 +589,8 @@ radv_image_get_htile_size(struct radv_device *device,
>>> {
>>> unsigned cl_width, cl_height, width, height;
>>> unsigned slice_elements, slice_bytes, base_align;
>>> - unsigned num_pipes =
>>> device->instance->physicalDevice.rad_info.num_tile_pipes;
>>> - unsigned pipe_interleave_bytes =
>>> device->instance->physicalDevice.rad_info.pipe_interleave_bytes;
>>> + unsigned num_pipes =
>>> device->physical_device->rad_info.num_tile_pipes;
>>> + unsigned pipe_interleave_bytes =
>>> device->physical_device->rad_info.pipe_interleave_bytes;
>>> /* Overalign HTILE on P2 configs to work around GPU hangs in
>>> * piglit/depthstencil-render-miplevels 585.
>>> @@ -599,7 +599,7 @@ radv_image_get_htile_size(struct radv_device *device,
>>> * are always reproducible. I think I have seen the test hang
>>> * on Carrizo too, though it was very rare there.
>>> */
>>> - if (device->instance->physicalDevice.rad_info.chip_class >= CIK &&
>>> num_pipes < 4)
>>> + if (device->physical_device->rad_info.chip_class >= CIK &&
>>> num_pipes < 4)
>>> num_pipes = 4;
>>> switch (num_pipes) {
>>> @@ -821,7 +821,7 @@ void radv_image_set_optimal_micro_tile_mode(struct
>>> radv_device *device,
>>> * definitions for them either. They are all 2D_TILED_THIN1 modes
>>> with
>>> * different bpp and micro tile mode.
>>> */
>>> - if (device->instance->physicalDevice.rad_info.chip_class >= CIK) {
>>> + if (device->physical_device->rad_info.chip_class >= CIK) {
>>> switch (micro_tile_mode) {
>>> case 0: /* displayable */
>>> image->surface.tiling_index[0] = 10;
>>> diff --git a/src/amd/vulkan/radv_pipeline.c
>>> b/src/amd/vulkan/radv_pipeline.c
>>> index d1a3efe9c96..360b5196551 100644
>>> --- a/src/amd/vulkan/radv_pipeline.c
>>> +++ b/src/amd/vulkan/radv_pipeline.c
>>> @@ -278,7 +278,7 @@ static const char *radv_get_shader_name(struct
>>> radv_shader_variant *var,
>>> }
>>> static void radv_dump_pipeline_stats(struct radv_device *device, struct
>>> radv_pipeline *pipeline)
>>> {
>>> - unsigned lds_increment =
>>> device->instance->physicalDevice.rad_info.chip_class >= CIK ? 512 : 256;
>>> + unsigned lds_increment =
>>> device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
>>> struct radv_shader_variant *var;
>>> struct ac_shader_config *conf;
>>> int i;
>>> @@ -299,7 +299,7 @@ static void radv_dump_pipeline_stats(struct
>>> radv_device *device, struct radv_pip
>>> }
>>> if (conf->num_sgprs) {
>>> - if
>>> (device->instance->physicalDevice.rad_info.chip_class >= VI)
>>> + if (device->physical_device->rad_info.chip_class
>>>> = VI)
>>> max_simd_waves = MIN2(max_simd_waves, 800
>>> / conf->num_sgprs);
>>> else
>>> max_simd_waves = MIN2(max_simd_waves, 512
>>> / conf->num_sgprs);
>>> @@ -409,7 +409,7 @@ static struct radv_shader_variant
>>> *radv_shader_variant_create(struct radv_device
>>> bool dump)
>>> {
>>> struct radv_shader_variant *variant = calloc(1, sizeof(struct
>>> radv_shader_variant));
>>> - enum radeon_family chip_family =
>>> device->instance->physicalDevice.rad_info.family;
>>> + enum radeon_family chip_family =
>>> device->physical_device->rad_info.family;
>>> LLVMTargetMachineRef tm;
>>> if (!variant)
>>> return NULL;
>>> @@ -423,7 +423,7 @@ static struct radv_shader_variant
>>> *radv_shader_variant_create(struct radv_device
>>> options.unsafe_math = !!(device->debug_flags &
>>> RADV_DEBUG_UNSAFE_MATH);
>>> options.family = chip_family;
>>> - options.chip_class =
>>> device->instance->physicalDevice.rad_info.chip_class;
>>> + options.chip_class = device->physical_device->rad_info.chip_class;
>>> tm = ac_create_target_machine(chip_family);
>>> ac_compile_nir_shader(tm, &binary, &variant->config,
>>> &variant->info, shader, &options, dump);
>>> @@ -1034,7 +1034,7 @@ radv_pipeline_init_multisample_state(struct
>>> radv_pipeline *pipeline,
>>> const VkPipelineMultisampleStateCreateInfo *vkms =
>>> pCreateInfo->pMultisampleState;
>>> struct radv_blend_state *blend = &pipeline->graphics.blend;
>>> struct radv_multisample_state *ms = &pipeline->graphics.ms;
>>> - unsigned num_tile_pipes =
>>> pipeline->device->instance->physicalDevice.rad_info.num_tile_pipes;
>>> + unsigned num_tile_pipes =
>>> pipeline->device->physical_device->rad_info.num_tile_pipes;
>>> int ps_iter_samples = 1;
>>> uint32_t mask = 0xffff;
>>> diff --git a/src/amd/vulkan/radv_pipeline_cache.c
>>> b/src/amd/vulkan/radv_pipeline_cache.c
>>> index 4fd09beb633..2cb1dfb6eb0 100644
>>> --- a/src/amd/vulkan/radv_pipeline_cache.c
>>> +++ b/src/amd/vulkan/radv_pipeline_cache.c
>>> @@ -308,7 +308,6 @@ radv_pipeline_cache_load(struct radv_pipeline_cache
>>> *cache,
>>> const void *data, size_t size)
>>> {
>>> struct radv_device *device = cache->device;
>>> - struct radv_physical_device *pdevice =
>>> &device->instance->physicalDevice;
>>> struct cache_header header;
>>> if (size < sizeof(header))
>>> @@ -320,9 +319,9 @@ radv_pipeline_cache_load(struct radv_pipeline_cache
>>> *cache,
>>> return;
>>> if (header.vendor_id != 0x1002)
>>> return;
>>> - if (header.device_id !=
>>> device->instance->physicalDevice.rad_info.pci_id)
>>> + if (header.device_id != device->physical_device->rad_info.pci_id)
>>> return;
>>> - if (memcmp(header.uuid, pdevice->uuid, VK_UUID_SIZE) != 0)
>>> + if (memcmp(header.uuid, device->physical_device->uuid,
>>> VK_UUID_SIZE) != 0)
>>> return;
>>> char *end = (void *) data + size;
>>> @@ -404,7 +403,6 @@ VkResult radv_GetPipelineCacheData(
>>> {
>>> RADV_FROM_HANDLE(radv_device, device, _device);
>>> RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
>>> - struct radv_physical_device *pdevice =
>>> &device->instance->physicalDevice;
>>> struct cache_header *header;
>>> VkResult result = VK_SUCCESS;
>>> const size_t size = sizeof(*header) + cache->total_size;
>>> @@ -421,8 +419,8 @@ VkResult radv_GetPipelineCacheData(
>>> header->header_size = sizeof(*header);
>>> header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
>>> header->vendor_id = 0x1002;
>>> - header->device_id =
>>> device->instance->physicalDevice.rad_info.pci_id;
>>> - memcpy(header->uuid, pdevice->uuid, VK_UUID_SIZE);
>>> + header->device_id = device->physical_device->rad_info.pci_id;
>>> + memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE);
>>> p += header->header_size;
>>> struct cache_entry *entry;
>>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
>>> index b095e3f39a6..0627d797178 100644
>>> --- a/src/amd/vulkan/radv_private.h
>>> +++ b/src/amd/vulkan/radv_private.h
>>> @@ -290,7 +290,7 @@ struct radv_instance {
>>> uint32_t apiVersion;
>>> int physicalDeviceCount;
>>> - struct radv_physical_device physicalDevice;
>>> + struct radv_physical_device physicalDevices[8];
>> This could be a macro, RADV_MAX_PHYSISCAL_DEVICES or some better name.
>>
>>> uint64_t debug_flags;
>>> };
>>> @@ -497,6 +497,8 @@ struct radv_device {
>>> struct radeon_winsys_bo *trace_bo;
>>> uint32_t *trace_id_ptr;
>>> +
>>> + struct radv_physical_device *physical_device;
>>> };
>>> struct radv_device_memory {
>>> diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
>>> index 06762dee086..a29a05d4b84 100644
>>> --- a/src/amd/vulkan/radv_query.c
>>> +++ b/src/amd/vulkan/radv_query.c
>>> @@ -35,10 +35,10 @@
>>> static unsigned get_max_db(struct radv_device *device)
>>> {
>>> - unsigned num_db =
>>> device->instance->physicalDevice.rad_info.num_render_backends;
>>> - MAYBE_UNUSED unsigned rb_mask =
>>> device->instance->physicalDevice.rad_info.enabled_rb_mask;
>>> + unsigned num_db =
>>> device->physical_device->rad_info.num_render_backends;
>>> + MAYBE_UNUSED unsigned rb_mask =
>>> device->physical_device->rad_info.enabled_rb_mask;
>>> - if (device->instance->physicalDevice.rad_info.chip_class == SI)
>>> + if (device->physical_device->rad_info.chip_class == SI)
>>> num_db = 8;
>>> else
>>> num_db = MAX2(8, num_db);
>>> diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
>>> index 002b3a85014..2f45961cf8c 100644
>>> --- a/src/amd/vulkan/radv_wsi.c
>>> +++ b/src/amd/vulkan/radv_wsi.c
>>> @@ -251,7 +251,7 @@ VkResult radv_CreateSwapchainKHR(
>>> RADV_FROM_HANDLE(radv_device, device, _device);
>>> ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface);
>>> struct wsi_interface *iface =
>>> -
>>> device->instance->physicalDevice.wsi_device.wsi[surface->platform];
>>> +
>>> device->physical_device->wsi_device.wsi[surface->platform];
>>> struct wsi_swapchain *swapchain;
>>> const VkAllocationCallbacks *alloc;
>>> if (pAllocator)
>>> @@ -259,7 +259,7 @@ VkResult radv_CreateSwapchainKHR(
>>> else
>>> alloc = &device->alloc;
>>> VkResult result = iface->create_swapchain(surface, _device,
>>> -
>>> &device->instance->physicalDevice.wsi_device,
>>> +
>>> &device->physical_device->wsi_device,
>>> pCreateInfo,
>>> alloc,
>>> &radv_wsi_image_fns,
>>> &swapchain);
>>> diff --git a/src/amd/vulkan/si_cmd_buffer.c
>>> b/src/amd/vulkan/si_cmd_buffer.c
>>> index e59d52e82af..e2025b1dd19 100644
>>> --- a/src/amd/vulkan/si_cmd_buffer.c
>>> +++ b/src/amd/vulkan/si_cmd_buffer.c
>>> @@ -511,8 +511,8 @@ si_write_scissors(struct radeon_winsys_cs *cs, int
>>> first,
>>> uint32_t
>>> si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
>>> {
>>> - enum chip_class chip_class =
>>> cmd_buffer->device->instance->physicalDevice.rad_info.chip_class;
>>> - struct radeon_info *info =
>>> &cmd_buffer->device->instance->physicalDevice.rad_info;
>>> + enum chip_class chip_class =
>>> cmd_buffer->device->physical_device->rad_info.chip_class;
>>> + struct radeon_info *info =
>>> &cmd_buffer->device->physical_device->rad_info;
>>> unsigned prim = cmd_buffer->state.pipeline->graphics.prim;
>>> unsigned primgroup_size = 128; /* recommended without a GS */
>>> unsigned max_primgroup_in_wave = 2;
>>> @@ -599,7 +599,7 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer
>>> *cmd_buffer)
>>> void
>>> si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
>>> {
>>> - enum chip_class chip_class =
>>> cmd_buffer->device->instance->physicalDevice.rad_info.chip_class;
>>> + enum chip_class chip_class =
>>> cmd_buffer->device->physical_device->rad_info.chip_class;
>>> unsigned cp_coher_cntl = 0;
>>> bool is_compute = cmd_buffer->queue_family_index ==
>>> RADV_QUEUE_COMPUTE;
>>> @@ -638,7 +638,7 @@ si_emit_cache_flush(struct radv_cmd_buffer
>>> *cmd_buffer)
>>> S_0085F0_CB7_DEST_BASE_ENA(1);
>>> /* Necessary for DCC */
>>> - if
>>> (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI) {
>>> + if
>>> (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
>>> radeon_emit(cmd_buffer->cs,
>>> PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
>>> radeon_emit(cmd_buffer->cs,
>>> EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
>>> EVENT_INDEX(5));
>>> @@ -756,7 +756,7 @@ static void si_emit_cp_dma_copy_buffer(struct
>>> radv_cmd_buffer *cmd_buffer,
>>> radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
>>> - if
>>> (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
>>> + if (cmd_buffer->device->physical_device->rad_info.chip_class >=
>>> CIK) {
>>> radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
>>> radeon_emit(cs, sync_flag | sel); /* CP_SYNC [31] */
>>> radeon_emit(cs, src_va); /* SRC_ADDR_LO
>>> [31:0] */
>>> @@ -802,7 +802,7 @@ static void si_emit_cp_dma_clear_buffer(struct
>>> radv_cmd_buffer *cmd_buffer,
>>> radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
>>> - if
>>> (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
>>> + if (cmd_buffer->device->physical_device->rad_info.chip_class >=
>>> CIK) {
>>> radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
>>> radeon_emit(cs, sync_flag | dst_sel |
>>> S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
>>> radeon_emit(cs, clear_value); /* DATA [31:0] */
>>> @@ -875,8 +875,8 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer
>>> *cmd_buffer,
>>> uint64_t skipped_size = 0, realign_size = 0;
>>> - if (cmd_buffer->device->instance->physicalDevice.rad_info.family
>>> <= CHIP_CARRIZO ||
>>> - cmd_buffer->device->instance->physicalDevice.rad_info.family
>>> == CHIP_STONEY) {
>>> + if (cmd_buffer->device->physical_device->rad_info.family <=
>>> CHIP_CARRIZO ||
>>> + cmd_buffer->device->physical_device->rad_info.family ==
>>> CHIP_STONEY) {
>>> /* If the size is not aligned, we must add a dummy copy at
>>> the end
>>> * just to align the internal counter. Otherwise, the DMA
>>> engine
>>> * would slow down by an order of magnitude for following
>>> copies.
>>
More information about the mesa-dev
mailing list