etnaviv-gpu 134000.gpu: MMU fault status 0x00000002 on i.XM6 Quad Plus

Luís Mendes luis.p.mendes at gmail.com
Thu Nov 2 22:26:08 UTC 2017


Hi Russell,

Please ignore my previous email...
I tried to include the stacktrace of the calling process, but I am missing
something despite I have included -rdynamic in the linker flags, but no
stack is being generated.
The debug code is like this in case you want to check how the variables are
being obtained:
void etnaviv_accel_CopyNtoN(DrawablePtr pSrc, DrawablePtr pDst,
    GCPtr pGC, BoxPtr pBox, int nBox, int dx, int dy, Bool reverse,
    Bool upsidedown, Pixel bitPlane, void *closure)
{
...
    if (!etnaviv_init_dstsrc_drawable(etnaviv, &op, pDst, pSrc))
        goto fallback;


    syslog(LOG_ERR, "initial op.src.offset.x=%d, op.src.offset.y=%d\n",
            op.src.offset.x, op.src.offset.y);

    /* Include the copy delta on the source */
    op.src.offset.x += dx - op.dst.offset.x;
    op.src.offset.y += dy - op.dst.offset.y;
    op.src_origin_mode = SRC_ORIGIN_RELATIVE;

    /* Calculate the overall extent */
    extent.x1 = max_t(short, pDst->x, pSrc->x - dx);
    extent.y1 = max_t(short, pDst->y, pSrc->y - dy);
    extent.x2 = min_t(short, pDst->x + pDst->width,
                 pSrc->x + pSrc->width - dx);
    extent.y2 = min_t(short, pDst->y + pDst->height,
                 pSrc->y + pSrc->height - dy);

    if (etna_bo_size(op.dst.bo) == 98304) {
        int nptrs;
        syslog(LOG_ERR, "dx: %d, op.dst.offset.x:%d\n", dx,
op.dst.offset.x);
        syslog(LOG_ERR, "dy: %d, op.dst.offset.y:%d\n", dy,
op.dst.offset.y);
        syslog(LOG_ERR, "final op.src.offset.x=%d, op.src.offset.y=%d\n",
            op.src.offset.x, op.src.offset.y);
        syslog(LOG_ERR, "pDst->x=%d, pSrc->x=%d\n", pDst->x, pSrc->x);
        syslog(LOG_ERR, "pDst->y=%d, pSrc->y=%d\n", pDst->y, pSrc->y);
        syslog(LOG_ERR, "pDst->width=%d, pSrc->width=%d\n", pDst->width,
pSrc->width);
        syslog(LOG_ERR, "pDst->height=%d, pSrc->height=%d\n", pDst->height,
pSrc->height);
        syslog(LOG_ERR, "extent(x1=%d, y1=%d, x2=%d, y2=%d)\n", extent.x1,
extent.y1,
            extent.x2, extent.y2);
        if (pGC) {
            syslog(LOG_ERR, "pGC is not NULL\n");
        } else {
            syslog(LOG_ERR, "pGC is NULL\n");
        }
        nptrs = backtrace(buffer, 100);
        strings = backtrace_symbols(buffer, nptrs);
        if (strings == NULL) {
            syslog(LOG_ERR, "Error getting stacktrace\n");
        } else {
            for (j = 0; j < nptrs; j++) {
                syslog(LOG_ERR, "[%d] %s\n", j, strings[j]);
            }
            free(strings);
        }
    }


The MMU fault dump now looks like this:
=== Register dump
0000000c = 000000df
00000000 = 00040900
00000004 = 7ffffff8 Idle: FE- DE- PE- SH+ PA+ SE+ RA+ TX+ VG+ IM+ FP+ TS+
00000008 = 00002200
00000014 = ffffffff
00000018 = 14010000
0000001c = e02c7eca
00000020 = 00000320
00000024 = 00005303
00000028 = 20140510
0000002c = 20353900
00000034 = e9399eff
00000038 = e9399eff
00000070 = 00000000
00000100 = 00140021
00000104 = 00000000
00000108 = 000000fa
0000010c = 00000000
00000400 = 00000000
00000404 = 00000000
00000408 = 00000000
0000040c = 00000000
00000410 = 00000000
00000414 = 3c000000
00000418 = 00000000
0000041c = 00000000
00000420 = 00000000
00000424 = 00000000
00000428 = 00000000
0000042c = 00030000
00000480 = 000000a8
0000065c = 00000001
00000660 = 00000803 Cmd: [load0 DMA: idle Fetch: valid] Req idle Cal idle
00000664 = 00001230 Command DMA address
00000668 = 00000040 FE fetched word 0
0000066c = 00000000 FE fetched word 1
00000670 = 00000000
=== Buffers
 Num Name  IOVA     Size
   0 reg   00000000 00000128      296
   1 mmu   00000000 00401000  4198400
   2 ring  00000000 00001000     4096
*  3 cmd   00001000 00000320      800
   4 cmd   00002000 00000190      400
   5 cmd   00003000 00000320      800
   6 cmd   00004000 00000310      784
   7 bomap 00000000 000018f0     6384
   8 bo    00040000 00300000  3145728
   9 bo    00340000 00001000     4096
  10 bo    00341000 00002000     8192
  11 bo    030b9000 00001000     4096
  12 bo    0311b000 00001000     4096
  13 bo    0311c000 00001000     4096
  14 bo    0311d000 00018000    98304
Checking MMU entries... ok

I have attached cmd-00001000.bin.

The syslog logging follows in attachment too.

By looking at the log I would say that the culprit copyNtoN is this one, as
it is the one nearer the first MMU fault, but is not matching the condtion
"etna_bo_size(op.dst.bo) == 98304":
Nov  2 22:10:29 picolo xf86_armada[744]: initial op.src.offset.x=0,
op.src.offset.y=0
Nov  2 22:10:29 picolo kernel: [   66.966997] etnaviv-gpu 134000.gpu: MMU
fault status 0x00000002
Nov  2 22:10:29 picolo kernel: [   66.973342] etnaviv-gpu 134000.gpu: MMU 0
fault addr 0x0803ffc0
Nov  2 22:10:29 picolo kernel: [   66.979286] etnaviv-gpu 134000.gpu: MMU 1
fault addr 0x00000000
Nov  2 22:10:29 picolo kernel: [   66.985221] etnaviv-gpu 134000.gpu: MMU 2
fault addr 0x00000000
Nov  2 22:10:29 picolo kernel: [   66.991154] etnaviv-gpu 134000.gpu: MMU 3
fault addr 0x00000000


Luis

On Thu, Nov 2, 2017 at 9:57 PM, Luís Mendes <luis.p.mendes at gmail.com> wrote:

> Hi Russel,
>
> I have the debug log with MMU faults and the logs you requested. I tried
> to obtain the stacktrace with backtrace, I have included -rdynamic flag to
> the compiler, but got no luck...
> The debug code is like this:
> void etnaviv_accel_CopyNtoN(DrawablePtr pSrc, DrawablePtr pDst,
>     GCPtr pGC, BoxPtr pBox, int nBox, int dx, int dy, Bool reverse,
>     Bool upsidedown, Pixel bitPlane, void *closure)
> {
> ...
>     extent.y2 = min_t(short, pDst->y + pDst->height,
>                  pSrc->y + pSrc->height - dy);
>
>     if (etna_bo_size(op.dst.bo) == 98304) {
>         int nptrs;
>         syslog(LOG_ERR, "dx: %d, op.dst.offset.x:%d\n", dx,
> op.dst.offset.x);
>         syslog(LOG_ERR, "dy: %d, op.dst.offset.y:%d\n", dy,
> op.dst.offset.y);
>         syslog(LOG_ERR, "final op.src.offset.x=%d, op.src.offset.y=%d\n",
>             op.dst.offset.x, op.dst.offset.y);
>         syslog(LOG_ERR, "pDst->x=%d, pSrc->x=%d\n", pDst->x, pSrc->x);
>         syslog(LOG_ERR, "pDst->y=%d, pSrc->y=%d\n", pDst->y, pSrc->y);
>         syslog(LOG_ERR, "pDst->width=%d, pSrc->width=%d\n", pDst->width,
> pSrc->width);
>         syslog(LOG_ERR, "pDst->height=%d, pSrc->height=%d\n",
> pDst->height, pSrc->height);
>         syslog(LOG_ERR, "extent(x1=%d, y1=%d, x2=%d, y2=%d)\n", extent.x1,
> extent.y1,
>             extent.x2, extent.y2);
>         if (pGC) {
>             syslog(LOG_ERR, "pGC is not NULL\n");
>         } else {
>             syslog(LOG_ERR, "pGC is NULL\n");
>         }
>         nptrs = backtrace(buffer, 100);
>         strings = backtrace_symbols(buffer, nptrs);
>         if (strings == NULL) {
>             syslog(LOG_ERR, "Error getting stacktrace\n");
>         } else {
>             for (j = 0; j < nptrs; j++) {
>                 syslog(LOG_ERR, "[%d] %s\n", j, strings[j]);
>             }
>             free(strings);
>         }
>     }
>
>
> On Thu, Nov 2, 2017 at 8:16 PM, Luís Mendes <luis.p.mendes at gmail.com>
> wrote:
>
>>
>> I will do that. The strange thing is that I don't think
>> etnaviv_accel_CopyNtoN(...) is the culprit of the MMU faults, because if I
>> switch from hardware accelerated copyNtoN to software unaccelerated copyNtoN
>>  the MMU faults still occur, if I remember... However the corruption in
>> the menus and dialog windows can be fixed by switching to unaccelerated
>> copyNtoN.
>>
>> I will include the debug log when I have it.
>>
>>
>> On Thu, Nov 2, 2017 at 4:18 PM, Russell King - ARM Linux <
>> linux at armlinux.org.uk> wrote:
>>
>>> On Thu, Nov 02, 2017 at 03:33:19PM +0000, Luís Mendes wrote:
>>> > Hi Russel,
>>> >
>>> > The requested file follows in attachment.
>>>
>>> Thanks - see below.
>>>
>>> > On Thu, Nov 2, 2017 at 3:20 PM, Russell King - ARM Linux <
>>> > linux at armlinux.org.uk> wrote:
>>> >
>>> > > On Thu, Nov 02, 2017 at 03:05:38PM +0000, Luís Mendes wrote:
>>> > > > [   56.173613] etnaviv-gpu 134000.gpu: MMU fault status 0x00000002
>>> > > > [   56.179955] etnaviv-gpu 134000.gpu: MMU 0 fault addr 0x0803ffc0
>>> > > > [   56.185905] etnaviv-gpu 134000.gpu: MMU 1 fault addr 0x00000000
>>> > > > [   56.191843] etnaviv-gpu 134000.gpu: MMU 2 fault addr 0x00000000
>>> > > > [   56.197778] etnaviv-gpu 134000.gpu: MMU 3 fault addr 0x00000000
>>> > > > [   59.258367] etnaviv-gpu 134000.gpu: hangcheck detected gpu
>>> lockup!
>>> > > > [   59.265910] etnaviv-gpu 134000.gpu:      completed fence: 378
>>> > > > [   59.271737] etnaviv-gpu 134000.gpu:      active fence: 383
>>> > > > [   59.277930] etnaviv-gpu 134000.gpu: hangcheck recover!
>>> > > >
>>> > > > === Register dump
>>> > > > 0000000c = 000000df
>>> > > > 00000000 = 00040900
>>> > > > 00000004 = 7ffffff8 Idle: FE- DE- PE- SH+ PA+ SE+ RA+ TX+ VG+ IM+
>>> FP+ TS+
>>> > > > 00000008 = 00002200
>>> > > > 00000014 = ffffffff
>>> > > > 00000018 = 14010000
>>> > > > 0000001c = e02c7eca
>>> > > > 00000020 = 00000320
>>> > > > 00000024 = 00005303
>>> > > > 00000028 = 20140510
>>> > > > 0000002c = 20353900
>>> > > > 00000034 = e9399eff
>>> > > > 00000038 = e9399eff
>>> > > > 00000070 = 00000000
>>> > > > 00000100 = 00140021
>>> > > > 00000104 = 00000000
>>> > > > 00000108 = 000000fa
>>> > > > 0000010c = 00000000
>>> > > > 00000400 = 00000000
>>> > > > 00000404 = 00000000
>>> > > > 00000408 = 00000000
>>> > > > 0000040c = 00000000
>>> > > > 00000410 = 00000000
>>> > > > 00000414 = 3c000000
>>> > > > 00000418 = 00000000
>>> > > > 0000041c = 00000000
>>> > > > 00000420 = 00000000
>>> > > > 00000424 = 00000000
>>> > > > 00000428 = 00000000
>>> > > > 0000042c = 00030000
>>> > > > 00000480 = 000000a8
>>> > > > 0000065c = 00000001
>>> > > > 00000660 = 00000803 Cmd: [load0 DMA: idle Fetch: valid] Req idle
>>> Cal idle
>>> > > > 00000664 = 00001230 Command DMA address
>>> > > > 00000668 = 00000040 FE fetched word 0
>>> > > > 0000066c = 00000000 FE fetched word 1
>>> > > > 00000670 = 00000000
>>> > >
>>> > > Okay, so we stopped at 0x1230.
>>> > >
>>> > > > ===
>>> > > > Buffers
>>> > > >
>>> > > >  Num Name  IOVA     Size
>>> > > >    0 reg   00000000 00000128      296
>>> > > >    1 mmu   00000000 00401000  4198400
>>> > > >    2 ring  00000000 00001000     4096
>>> > > > *  3 cmd   00001000 00000320      800
>>> > > >    4 cmd   00002000 00000190      400
>>> > > >    5 cmd   00003000 00000320      800
>>> > > >    6 cmd   00004000 00000190      400
>>> > > >    7 cmd   00005000 00000188      392
>>> > > >    8 bomap 00000000 000018f0     6384
>>> > > >    9 bo    00040000 00300000  3145728
>>> > > >   10 bo    00340000 00001000     4096
>>> > > >   11 bo    00341000 00002000     8192
>>> > > >   12 bo    02e60000 00001000     4096
>>> > > >   13 bo    030c3000 00001000     4096
>>> > > >   14 bo    030c4000 00001000     4096
>>> > > >   15 bo    030c5000 00018000    98304
>>> > > > Checking MMU entries... ok
>>> > >
>>> > > So, buffer 3 is the command buffer we were processing, it's only 800
>>> > > bytes long.  You should find that along side the log file, called
>>> > > "cmd-00001000.bin".  Please send me this file.  Thanks.
>>>
>>> Here's the decoded command buffer.  My decoding includes the buffer
>>> addresses, and the ranges that the GPU would access based on the draw
>>> commands.  We have from the table above, the addresses and sizes of
>>> the bos currently mapped into the GPU's IOVA space.
>>>
>>> 00000: 08050480 00341000  LDST 0x1200=0x00341000
>>>                 000000a0  LDST 0x1204=0x000000a0
>>>                 00000000  LDST 0x1208=0x00000000
>>>                 06000046  LDST 0x120c=0x06000046
>>>                 fe48fd53  LDST 0x1210=0xfe48fd53
>>> 00018: 0804048a 00040000  LDST 0x1228=0x00040000
>>>                 00001000  LDST 0x122c=0x00001000
>>>                 00000000  LDST 0x1230=0x00000000
>>>                 00002006  LDST 0x1234=0x00002006
>>> 00030: 0801049f 00000000  LDST 0x127c=0x00000000
>>> 00038: 08030497 0030cccc  LDST 0x125c=0x0030cccc
>>>                 01b802ad  LDST 0x1260=0x01b802ad
>>>                 01e002d5  LDST 0x1264=0x01e002d5
>>> 00048: 20000100 00000000
>>>                             01b802ad 01e002d5  0,0,40,40 ->
>>> 685,440,725,480
>>>         Blit: Dst:00040000 Src:00341000 Clip 685,440,725,480
>>>         Src: 0x00341000-0x003429a0 (40,40)
>>>         Dst: 0x00040000-0x00220b54 (725,480)
>>>
>>>   Looks fine - source bo 11, destination bo 9.
>>>
>>> 00058: 08010001 00000000  LDST 0x0004=0x00000000
>>> 00060: 08010001 00000000  LDST 0x0004=0x00000000
>>> 00068: 08010001 00000000  LDST 0x0004=0x00000000
>>>
>>>   This is the GC320 "workaround":
>>>
>>> 00070: 08050480 00340000  LDST 0x1200=0x00340000
>>>                 00000040  LDST 0x1204=0x00000040
>>>                 00000000  LDST 0x1208=0x00000000
>>>                 03000043  LDST 0x120c=0x03000043
>>>                 ffff0000  LDST 0x1210=0xffff0000
>>> 00088: 0804048a 00340000  LDST 0x1228=0x00340000
>>>                 00000040  LDST 0x122c=0x00000040
>>>                 00000000  LDST 0x1230=0x00000000
>>>                 00002003  LDST 0x1234=0x00002003
>>> 000a0: 0801049f 00000000  LDST 0x127c=0x00000000
>>> 000a8: 08030497 0030cccc  LDST 0x125c=0x0030cccc
>>>                 00010000  LDST 0x1260=0x00010000
>>>                 00020001  LDST 0x1264=0x00020001
>>> 000b8: 20000100 00000000
>>>                             00010000 00020001  0,0,1,1 -> 0,1,1,2
>>>         Blit: Dst:00340000 Src:00340000 Clip 0,1,1,2
>>>         Src: 0x00340000-0x00340044 (1,1)
>>>         Dst: 0x00340000-0x00340084 (1,2)
>>>
>>>   Looks fine, source and destination bo 10.
>>>
>>> 000c8: 08010e03 00000008  LDST 0x380c=0x00000008 Flush PE2D
>>> 000d0: 08010e02 00000701  LDST 0x3808=0x00000701 SEM FE -> PE
>>> 000d8: 48000000 00000701  STALL FE -> PE
>>> 000e0: 18000000(00000008)  NOP
>>> 000e8: 18000000(00000701)  NOP
>>> 000f0: 18000000(00000701)  NOP
>>> 000f8: 18000000(00000701)  NOP
>>> 00100: 18000000(000b0022)  NOP
>>> 00108: 18000000(00000000)  NOP
>>> 00110: 18000000(00000000)  NOP
>>> 00118: 18000000(00000000)  NOP
>>> 00120: 18000000(00100020)  NOP
>>> 00128: 18000000(00000000)  NOP
>>> 00130: 18000000(000b002a)  NOP
>>> 00138: 18000000(00000000)  NOP
>>> 00140: 18000000(00000000)  NOP
>>> 00148: 18000000(00000000)  NOP
>>> 00150: 18000000(00000018)  NOP
>>> 00158: 18000000(00000000)  NOP
>>> 00160: 18000000(000b0032)  NOP
>>> 00168: 18000000(00000000)  NOP
>>> 00170: 18000000(00000000)  NOP
>>> 00178: 18000000(00000000)  NOP
>>> 00180: 08050480 00040000  LDST 0x1200=0x00040000
>>>                 00001000  LDST 0x1204=0x00001000
>>>                 00000000  LDST 0x1208=0x00000000
>>>                 06000046  LDST 0x120c=0x06000046
>>>                 ffe80000  LDST 0x1210=0xffe80000
>>> 00198: 0804048a 030c5000  LDST 0x1228=0x030c5000
>>>                 00001000  LDST 0x122c=0x00001000
>>>                 00000000  LDST 0x1230=0x00000000
>>>                 00002006  LDST 0x1234=0x00002006
>>> 001b0: 0802049f 00ff0001  LDST 0x127c=0x00ff0001
>>>                 01000100  LDST 0x1280=0x01000100
>>> 001c0: 080304b2 ff000000  LDST 0x12c8=0xff000000
>>>                 00000000  LDST 0x12cc=0x00000000
>>>                 00000000  LDST 0x12d0=0x00000000
>>> 001d0: 08030497 0030cccc  LDST 0x125c=0x0030cccc
>>>                 00000000  LDST 0x1260=0x00000000
>>>                 00180400  LDST 0x1264=0x00180400
>>> 001e0: 20000100 00000000
>>>                             00000000 00180400  0,-24,1024,0 ->
>>> 0,0,1024,24
>>>         Blit: Dst:030c5000 Src:00040000 Clip 0,0,1024,24
>>>         Src: 0x00040000-0x00041000 (1024,0)
>>>         Dst: 0x030c5000-0x030de000 (1024,24)
>>>
>>>   Source bo 9, which looks fine.
>>>   Destination bo 15, which is:
>>>
>>>     15 bo    030c5000 00018000    98304
>>>
>>>   It's final IOVA is the sum of those two, which is 0x030dd000.
>>>
>>>   The destination stride is from state 0x122c, which is 4096.
>>>   Destination start address was 0x030c5000 in state 0x1228, and the
>>>   bottom y in the draw command is 24.  That gives 98304 bytes, but
>>>   there's also the right x as well, which is another 1024 pixels on
>>>   top, giving an extra 4096 bytes on top of that.
>>>
>>>   So, it looks like this bo was too small for the draw command
>>>   being requested.
>>>
>>>   It's interesting to note that the MMU fault addresses don't seem to
>>>   correspond - my GC320 is a MMUv1 GPU, and doesn't have MMU faults,
>>>   so I don't know how to interpret these.
>>>
>>> 001f0: 08010001 00000000  LDST 0x0004=0x00000000
>>> 001f8: 08010001 00000000  LDST 0x0004=0x00000000
>>> 00200: 08010001 00000000  LDST 0x0004=0x00000000
>>> 00208: 08050480 00340000  LDST 0x1200=0x00340000
>>>                 00000040  LDST 0x1204=0x00000040
>>>                 00000000  LDST 0x1208=0x00000000
>>>                 03000043  LDST 0x120c=0x03000043
>>>                 ffff0000  LDST 0x1210=0xffff0000
>>> 00220: 0804048a 00340000  LDST 0x1228=0x00340000
>>>                 00000040  LDST 0x122c=0x00000040
>>>                 00000000  LDST 0x1230=0x00000000
>>>                 00002003  LDST 0x1234=0x00002003
>>>
>>> <============stalled mid-loading above states=============>
>>>
>>> (rest of dump truncated as nor relevant.)
>>>
>>> The obvious question is - why did we end up with a 1024x24 blit copy
>>> into a buffer that was actually too small.  This also happens to be
>>> the hardware clip window size as well (which should be bounded by the
>>> source and destination drawable sizes and the copy position.)
>>>
>>> I suspect if we set extent.y2 one less, we'll get problems with the
>>> last line being corrupted on other copies.
>>>
>>> Any chance of adding some debug to etnaviv_accel_CopyNtoN() to trace
>>> which copy this is?  We know that the destination bo is 98304 in
>>> size, so use that to avoid printing out too much.  The destination
>>> bo should be in op.dst.bo, and its size can be found via
>>> etna_bo_size(op.dst.bo).
>>>
>>> Thanks.
>>>
>>> --
>>> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
>>> FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down
>>> 630kbps up
>>> According to speedtest.net: 8.21Mbps down 510kbps up
>>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/etnaviv/attachments/20171102/f63625d9/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: cmd-00001000.bin
Type: application/octet-stream
Size: 800 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/etnaviv/attachments/20171102/f63625d9/attachment-0001.bin>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: sylog.tar.xz
Type: application/x-xz
Size: 3680 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/etnaviv/attachments/20171102/f63625d9/attachment-0001.xz>


More information about the etnaviv mailing list