[Intel-gfx] [PATCH 2/8] drm/i915: Migrate to safe iterators in error state capture
Tomas Elf
tomas.elf at intel.com
Fri Oct 9 04:40:51 PDT 2015
On 09/10/2015 09:27, Daniel Vetter wrote:
> On Thu, Oct 08, 2015 at 07:31:34PM +0100, Tomas Elf wrote:
>> Using safe list iterators alleviates the problem of unsynchronized driver list
>> manipulations while error state capture is in the process of traversing lists.
>>
>> Signed-off-by: Tomas Elf <tomas.elf at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_gpu_error.c | 38 +++++++++++++++++------------------
>> 1 file changed, 19 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index 2f04e4f..32c1799 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -718,10 +718,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>> static u32 capture_active_bo(struct drm_i915_error_buffer *err,
>> int count, struct list_head *head)
>> {
>> - struct i915_vma *vma;
>> + struct i915_vma *vma, *tmpvma;
>> int i = 0;
>>
>> - list_for_each_entry(vma, head, mm_list) {
>> + list_for_each_entry_safe(vma, tmpvma, head, mm_list) {
>
> _safe isn't safe against anything, it's only safe against removal of the
> current list item done with a list_del/list_move from this thread. I.e.
> the only thing it does is have a temporary pointer to the next list
> element so if you delete the current one it won't fall over.
>
> It doesn't protect against anything else, and especially not against other
> threads totally stomping the list.
Absolutely! But it's good enough to make the test not fall over within
an hour of testing and instead allow it to run for 12+ hours during
continuous long-duration testing, which is what I need for the TDR
validation.
>
> So we need proper protection for these lists, independent of
> dev->struct_mutex. The least invasive way to do that is probably rcu,
> which only requires us that we release the memory for any object sitting
> on these lists with kfree_rcu instead of normal kfree. Another option
> might be a spinlock, but that would be a lot more invasive, and Chris
> won't like the execbuf overhead this causes ;-)
> -Daniel
Awesome! Let's go with that then.
Thanks,
Tomas
>
>> capture_bo(err++, vma);
>> if (++i == count)
>> break;
>> @@ -734,17 +734,17 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
>> int count, struct list_head *head,
>> struct i915_address_space *vm)
>> {
>> - struct drm_i915_gem_object *obj;
>> + struct drm_i915_gem_object *obj, *tmpobj;
>> struct drm_i915_error_buffer * const first = err;
>> struct drm_i915_error_buffer * const last = err + count;
>>
>> - list_for_each_entry(obj, head, global_list) {
>> - struct i915_vma *vma;
>> + list_for_each_entry_safe(obj, tmpobj, head, global_list) {
>> + struct i915_vma *vma, *tmpvma;
>>
>> if (err == last)
>> break;
>>
>> - list_for_each_entry(vma, &obj->vma_list, vma_link)
>> + list_for_each_entry_safe(vma, tmpvma, &obj->vma_list, vma_link)
>> if (vma->vm == vm && vma->pin_count > 0)
>> capture_bo(err++, vma);
>> }
>> @@ -958,13 +958,13 @@ static void i915_gem_record_active_context(struct intel_engine_cs *ring,
>> struct drm_i915_error_ring *ering)
>> {
>> struct drm_i915_private *dev_priv = ring->dev->dev_private;
>> - struct drm_i915_gem_object *obj;
>> + struct drm_i915_gem_object *obj, *tmpobj;
>>
>> /* Currently render ring is the only HW context user */
>> if (ring->id != RCS || !error->ccid)
>> return;
>>
>> - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
>> + list_for_each_entry_safe(obj, tmpobj, &dev_priv->mm.bound_list, global_list) {
>> if (!i915_gem_obj_ggtt_bound(obj))
>> continue;
>>
>> @@ -979,7 +979,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>> struct drm_i915_error_state *error)
>> {
>> struct drm_i915_private *dev_priv = dev->dev_private;
>> - struct drm_i915_gem_request *request;
>> + struct drm_i915_gem_request *request, *tmpreq;
>> int i, count;
>>
>> for (i = 0; i < I915_NUM_RINGS; i++) {
>> @@ -1055,7 +1055,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>> i915_gem_record_active_context(ring, error, &error->ring[i]);
>>
>> count = 0;
>> - list_for_each_entry(request, &ring->request_list, list)
>> + list_for_each_entry_safe(request, tmpreq, &ring->request_list, list)
>> count++;
>>
>> error->ring[i].num_requests = count;
>> @@ -1068,7 +1068,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>> }
>>
>> count = 0;
>> - list_for_each_entry(request, &ring->request_list, list) {
>> + list_for_each_entry_safe(request, tmpreq, &ring->request_list, list) {
>> struct drm_i915_error_request *erq;
>>
>> erq = &error->ring[i].requests[count++];
>> @@ -1088,17 +1088,17 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
>> const int ndx)
>> {
>> struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
>> - struct drm_i915_gem_object *obj;
>> - struct i915_vma *vma;
>> + struct drm_i915_gem_object *obj, *tmpobj;
>> + struct i915_vma *vma, *tmpvma;
>> int i;
>>
>> i = 0;
>> - list_for_each_entry(vma, &vm->active_list, mm_list)
>> + list_for_each_entry_safe(vma, tmpvma, &vm->active_list, mm_list)
>> i++;
>> error->active_bo_count[ndx] = i;
>>
>> - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
>> - list_for_each_entry(vma, &obj->vma_list, vma_link)
>> + list_for_each_entry_safe(obj, tmpobj, &dev_priv->mm.bound_list, global_list) {
>> + list_for_each_entry_safe(vma, tmpvma, &obj->vma_list, vma_link)
>> if (vma->vm == vm && vma->pin_count > 0)
>> i++;
>> }
>> @@ -1128,10 +1128,10 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
>> static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
>> struct drm_i915_error_state *error)
>> {
>> - struct i915_address_space *vm;
>> + struct i915_address_space *vm, *tmpvm;
>> int cnt = 0, i = 0;
>>
>> - list_for_each_entry(vm, &dev_priv->vm_list, global_link)
>> + list_for_each_entry_safe(vm, tmpvm, &dev_priv->vm_list, global_link)
>> cnt++;
>>
>> error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
>> @@ -1155,7 +1155,7 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
>> error->pinned_bo = NULL;
>> error->pinned_bo_count = NULL;
>> } else {
>> - list_for_each_entry(vm, &dev_priv->vm_list, global_link)
>> + list_for_each_entry_safe(vm, tmpvm, &dev_priv->vm_list, global_link)
>> i915_gem_capture_vm(dev_priv, error, vm, i++);
>>
>> error->vm_count = cnt;
>> --
>> 1.9.1
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
More information about the Intel-gfx
mailing list