[PATCH v4] drm/virtio: Add drm_panic support

Tue Nov 19 10:21:54 UTC 2024

On 18/11/2024 17:16, Dmitry Osipenko wrote:
> On 11/13/24 11:44, Ryosuke Yasuoka wrote:
>> From: Jocelyn Falempe <jfalempe at redhat.com>
>>
>> Virtio gpu supports the drm_panic module, which displays a message to
>> the screen when a kernel panic occurs.
>>
>> Signed-off-by: Ryosuke Yasuoka <ryasuoka at redhat.com>
>> Signed-off-by: Jocelyn Falempe <jfalempe at redhat.com>
>> ---
> 
> On a second look, I spotted few problems, see them below:
> 
> ...
>> +/* For drm_panic */
>> +static int virtio_gpu_panic_resource_flush(struct drm_plane *plane,
>> +					   struct virtio_gpu_vbuffer *vbuf,
>> +					   uint32_t x, uint32_t y,
>> +					   uint32_t width, uint32_t height)
>> +{
>> +	int ret;
>> +	struct drm_device *dev = plane->dev;
>> +	struct virtio_gpu_device *vgdev = dev->dev_private;
>> +	struct virtio_gpu_framebuffer *vgfb;
>> +	struct virtio_gpu_object *bo;
>> +
>> +	vgfb = to_virtio_gpu_framebuffer(plane->state->fb);
>> +	bo = gem_to_virtio_gpu_obj(vgfb->base.obj[0]);
>> +
>> +	ret = virtio_gpu_panic_cmd_resource_flush(vgdev, vbuf, bo->hw_res_handle, x, y,
>> +						  width, height);
>> +	return ret;
> 
> Nit: return directly directly in such cases, dummy ret variable not needed
> 
>> +}
>> +
>>   static void virtio_gpu_resource_flush(struct drm_plane *plane,
>>   				      uint32_t x, uint32_t y,
>>   				      uint32_t width, uint32_t height)
>> @@ -359,11 +406,128 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane,
>>   	virtio_gpu_cursor_ping(vgdev, output);
>>   }
>>   
>> +static int virtio_drm_get_scanout_buffer(struct drm_plane *plane,
>> +					 struct drm_scanout_buffer *sb)
>> +{
>> +	struct virtio_gpu_object *bo;
>> +
>> +	if (!plane->state || !plane->state->fb || !plane->state->visible)
>> +		return -ENODEV;
>> +
>> +	bo = gem_to_virtio_gpu_obj(plane->state->fb->obj[0]);
> 
> VRAM BOs aren't vmappable and should be rejected.
> 
> In the virtio_panic_flush() below,
> virtio_gpu_panic_cmd_transfer_to_host_2d() is invoked only for dumb BOs.
> Thus, only dumb BO supports panic output and should be accepted by
> get_scanout_buffer(), other should be rejected with ENODEV here, AFAICT.
> Correct?

Yes, it's correct
> 
>> +	/* try to vmap it if possible */
>> +	if (!bo->base.vaddr) {
>> +		int ret;
>> +
>> +		ret = drm_gem_shmem_vmap(&bo->base, &sb->map[0]);
>> +		if (ret)
>> +			return ret;
>> +	} else {
>> +		iosys_map_set_vaddr(&sb->map[0], bo->base.vaddr);
>> +	}
>> +
>> +	sb->format = plane->state->fb->format;
>> +	sb->height = plane->state->fb->height;
>> +	sb->width = plane->state->fb->width;
>> +	sb->pitch[0] = plane->state->fb->pitches[0];
>> +	return 0;
>> +}
>> +
>> +struct virtio_gpu_panic_object_array {
>> +	struct ww_acquire_ctx ticket;
>> +	struct list_head next;
>> +	u32 nents, total;
>> +	struct drm_gem_object *objs;
>> +};
> 
> This virtio_gpu_panic_object_array struct is a hack, use
> virtio_gpu_array_alloc(). Maybe add atomic variant of the array_alloc().

We can't allocate memory in the panic handler, but maybe it can be 
pre-allocated, like the virtio_panic_buffer ?
> 
>> +static void virtio_gpu_panic_put_vbuf(struct virtqueue *vq,
>> +				      struct virtio_gpu_vbuffer *vbuf,
>> +				      struct virtio_gpu_object_array *objs)
>> +{
>> +	unsigned int len;
>> +	int i;
>> +
>> +	/* waiting vbuf to be used */
>> +	for (i = 0; i < 500; i++) {
>> +		if (vbuf == virtqueue_get_buf(vq, &len)) {
> 
> Is it guaranteed that virtio_gpu_dequeue_ctrl_func() never runs in
> parallel here?

Yes, in the panic handler, only one CPU remains active, and no other 
task can be scheduled.
> 
>> +			if (objs != NULL && vbuf->objs)
>> +				drm_gem_object_put(objs->objs[0]);
> 
> This drm_gem_object_put(objs->objs) is difficult to follow. Why
> vbuf->objs can't be used directly?
> 
> Better to remove all error handlings for simplicity. It's not important
> if a bit of memory leaked on panic.

We try to reclaim, to make it easier to test with the debugfs interface.
But this is a bit racy anyway, because it's not the real panic context.
> 
>> +			break;
>> +		}
>> +		udelay(1);
>> +	}
>> +}
>> +
>> +static void virtio_panic_flush(struct drm_plane *plane)
>> +{
>> +	int ret;
>> +	struct virtio_gpu_object *bo;
>> +	struct drm_device *dev = plane->dev;
>> +	struct virtio_gpu_device *vgdev = dev->dev_private;
>> +	struct drm_rect rect;
>> +	void *vp_buf = vgdev->virtio_panic_buffer;
>> +	struct virtio_gpu_vbuffer *vbuf_dumb_bo = vp_buf;
>> +	struct virtio_gpu_vbuffer *vbuf_resource_flush = vp_buf + VBUFFER_SIZE;
>> +
>> +	rect.x1 = 0;
>> +	rect.y1 = 0;
>> +	rect.x2 = plane->state->fb->width;
>> +	rect.y2 = plane->state->fb->height;
>> +
>> +	bo = gem_to_virtio_gpu_obj(plane->state->fb->obj[0]);
>> +
>> +	struct drm_gem_object obj;
>> +	struct virtio_gpu_panic_object_array objs = {
>> +		.next = { NULL, NULL },
>> +		.nents = 0,
>> +		.total = 1,
>> +		.objs = &obj
>> +	};
> 
> This &obj is unitialized stack data. The .objs points to an array of obj
> pointers and you pointing it to object. Like I suggested above, let's
> remove this hack and use proper virtio_gpu_array_alloc().
> 
>> +	if (bo->dumb) {
>> +		ret = virtio_gpu_panic_update_dumb_bo(vgdev,
>> +						      plane->state,
>> +						      &rect,
>> +						      (struct virtio_gpu_object_array *)&objs,
>> +						      vbuf_dumb_bo);
>> +		if (ret) {
>> +			if (vbuf_dumb_bo->objs)
>> +				drm_gem_object_put(&objs.objs[0]);
>> +			return;
>> +		}
>> +	}
>> +
>> +	ret = virtio_gpu_panic_resource_flush(plane, vbuf_resource_flush,
>> +					      plane->state->src_x >> 16,
>> +					      plane->state->src_y >> 16,
>> +					      plane->state->src_w >> 16,
>> +					      plane->state->src_h >> 16);
>> +	if (ret) {
>> +		virtio_gpu_panic_put_vbuf(vgdev->ctrlq.vq,
>> +					  vbuf_dumb_bo,
>> +					  (struct virtio_gpu_object_array *)&objs);
> 
> The virtio_gpu_panic_notify() hasn't been invoked here, thus this
> put_vbuf should always time out because vq hasn't been kicked. Again,
> best to leak resources on error than to have broken/untested error
> handling code paths.

agreed
> 
>> +		return;
>> +	}
>> +
>> +	virtio_gpu_panic_notify(vgdev);
>> +
>> +	virtio_gpu_panic_put_vbuf(vgdev->ctrlq.vq,
>> +				  vbuf_dumb_bo,
>> +				  (struct virtio_gpu_object_array *)&objs);
>> +	virtio_gpu_panic_put_vbuf(vgdev->ctrlq.vq,
>> +				  vbuf_resource_flush,
>> +				  NULL);
>> +}
>> +
>>   static const struct drm_plane_helper_funcs virtio_gpu_primary_helper_funcs = {
>>   	.prepare_fb		= virtio_gpu_plane_prepare_fb,
>>   	.cleanup_fb		= virtio_gpu_plane_cleanup_fb,
>>   	.atomic_check		= virtio_gpu_plane_atomic_check,
>>   	.atomic_update		= virtio_gpu_primary_plane_update,
>> +	.get_scanout_buffer	= virtio_drm_get_scanout_buffer,
>> +	.panic_flush		= virtio_panic_flush,
>>   };
>>   
>>   static const struct drm_plane_helper_funcs virtio_gpu_cursor_helper_funcs = {
>> @@ -383,6 +547,13 @@ struct drm_plane *virtio_gpu_plane_init(struct virtio_gpu_device *vgdev,
>>   	const uint32_t *formats;
>>   	int nformats;
>>   
>> +	/* allocate panic buffers */
>> +	if (index == 0 && type == DRM_PLANE_TYPE_PRIMARY) {
>> +		vgdev->virtio_panic_buffer = drmm_kzalloc(dev, 2 * VBUFFER_SIZE, GFP_KERNEL);
>> +		if (!vgdev->virtio_panic_buffer)
>> +			return ERR_PTR(-ENOMEM);
>> +	}
> 
> If there is more than one virtio-gpu display, then this panic buffer is
> reused by other displays. It seems to work okay, but potential
> implications are unclear. Won't it be more robust to have a panic buffer
> per CRTC?

The drm panic handler call each plane sequentially, so it's not a 
problem. Having one buffer per CRTC would just add more complexity.

> 
> Also, please rename vgdev->virtio_panic_buffer to vgdev->panic_vbuf for
> brevity.
> 

Thanks for the review.

-- 

Jocelyn