GPU hang trying to run OpenCL kernels on x86_64
Luís Mendes
luis.p.mendes at gmail.com
Fri May 4 23:15:41 UTC 2018
Hi Slava,
The two x86_64 systems I tried are:
- System One
Tyan S7025 with dual Xeon X5675 and 48GB registered ECC memory, with
a NVIDIA GTX 1050Ti 4GB(also used for display) and an AMD RX 550 4GB
Running standard Ubuntu 16.04.4 with kernels
linux-image-4.13.0-38-generic and linux-image-4.4.0-122-generic,
mesa-17.2.8-0ubuntu0, libdrm-2.4.83-1
and amdgpu-pro 17.50/amdgpu-pro 18.10
lsb_release -a
Description: Ubuntu 16.04.4 LTS
BIOS configuration:
ACPI enabled v3.0
ACPI APIC support Enabled
ACPI SRAT table Enabled
SR-IOV Enabled
Intel VT-d Disabled
PCI MMIO 64 Bits support Disabled
- System Two
Tyan S7002 with dual Xeon X5670 and 12GB registered ECC memory, with
an AMD RX 480
Running Ubuntu 18.04 with kernels vanilla 4.16.7 and
linux-image-4.15.0-20-generic, mesa-18.0.0~rc5-1ubuntu1,
libdrm-2.4.91-2
and mesa-opencl-icd, libclc-0.2.0+git20180312-1
BIOS configuration:
ACPI enabled v2.0
ACPI APIC support Enabled
ACPI SRAT table Enabled
SR-IOV Enabled
Intel VT-d Disabled
PCI MMIO 64 Bits support Disabled
amdgpu-pro-install --headless --opencl=legacy
When I try to run the attached openCL code (which computes a
cross-correlation between two square matrices directly by cross
correlation function definition), the GPU hangs, but there are also
other kernels where this also happens.
As soon as I try to run the kernel the system hangs at the first
kernel computation on all the two systems, and after a couple of
seconds dmesg shows:
[drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring gfx timeout, last
signaled seq=2, last emitted seq=3
[drm] IP block:gmc_v8_0 is hung!
[drm] IP block:tonga_ih is hung!
[drm] IP block:gfx_v8_0 is hung!
[drm] IP block:sdma_v3_0 is hung!
[drm] IP block:uvd_v6_0 is hung!
[drm] IP block:vce_v3_0 is hung!
[drm] GPU recovery disabled.
- On another system with armhf 32 bits, 1GB ram, 512GB SSD, AMD RX 480
or AMD RX 550
with Ubuntu 17.10, vanilla kernel 4.16.7, mesa-18.0.2,
libdrm-2.4.92-git, libclc-git at commit
3d994f2ff2cbb4531223fe2657144cb19f0c5328 (15/Nov/2017)
The kernels work properly on the same AMD cards.
On Fri, May 4, 2018 at 7:18 PM, Abramov, Slava <Slava.Abramov at amd.com> wrote:
> Luis,
>
>
> Can you please provide more details on your system environment and steps on
> configuring the software and reproducing the issue?
>
>
>
> Slava A
>
> ________________________________
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> on behalf of Luís
> Mendes <luis.p.mendes at gmail.com>
> Sent: Friday, May 4, 2018 12:27:47 PM
> To: amd-gfx list; Koenig, Christian; Michel Dänzer
> Subject: GPU hang trying to run OpenCL kernels on x86_64
>
> Hi,
>
> I am a collaborator with Syncleus/aparapi project on github and I've
> been testing OpenCL on AMD and NVIDIA cards.
>
> Currently I have a set of kernels that hang the GPU (AMD RX 460 and
> AMD RX 550) across all compute units on x86_64 running vanilla kernel
> 4.16.7 on Ubuntu 18.04, also on Ubuntu 16.04.4 with AMDGPU PRO 17.50
> and 18.10 show the same problems, in fact, AMDGPU-PRO 18.10 is even
> worse.
>
> However the same set of kernels run happily on armhf with vanilla
> Linux 4.16.7 and mesa 18.0 (mesa-opencl-icd and libclc for amdgcn),
> Ubuntu 17.10, on an AMD RX460 and an AMD RX 550.
>
> Luís Mendes
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
-------------- next part --------------
May 04, 2018 10:37:41 PM com.aparapi.internal.kernel.KernelRunner executeInternalInner
INFO: typedef struct This_s{
__global int *tilesGeometry;
__global int *inputGeometry;
__global int *threadOutputStart;
__global int *outputGeometry;
__global int *threadOffsetI;
__global int *threadOffsetJ;
__global float *matrixInF;
__global float *matrixInG;
__global float *matrixOut;
int passid;
}This;
int get_pass_id(This *this){
return this->passid;
}
short pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__signX(This *this, short x){
short value = (short)((x + x) + 1);
return((short)(value / abs(value)));
}
short pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__relocateX(This *this, short x, short dimX){
short result = (short)(((pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__signX(this, x) + 1) * (x + 1)) / 2);
result = (short)(((pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__signX(this, (short)(dimX - result)) + 1) * result) / 2);
return(result);
}
__kernel void run(
__global int *tilesGeometry,
__global int *inputGeometry,
__global int *threadOutputStart,
__global int *outputGeometry,
__global int *threadOffsetI,
__global int *threadOffsetJ,
__global float *matrixInF,
__global float *matrixInG,
__global float *matrixOut,
int passid
){
This thisStruct;
This* this=&thisStruct;
this->tilesGeometry = tilesGeometry;
this->inputGeometry = inputGeometry;
this->threadOutputStart = threadOutputStart;
this->outputGeometry = outputGeometry;
this->threadOffsetI = threadOffsetI;
this->threadOffsetJ = threadOffsetJ;
this->matrixInF = matrixInF;
this->matrixInG = matrixInG;
this->matrixOut = matrixOut;
this->passid = passid;
{
int sizeI = get_global_size(1);
int sizeJ = get_global_size(0);
int i = get_global_id(1);
int j = get_global_id(0);
int k = get_global_id(2);
int il = get_local_id(0);
int jl = get_local_id(1);
int matrixInputStart = ((((k * this->tilesGeometry[0]) * this->tilesGeometry[1]) * (this->inputGeometry[1] + 1)) * (this->inputGeometry[0] + 1)) + (((this->threadOutputStart[((i * sizeJ) + j)] / (this->outputGeometry[0] * this->outputGeometry[1])) * (this->inputGeometry[0] + 1)) * (this->inputGeometry[1] + 1));
int matrixOutputStart = ((((k * this->tilesGeometry[0]) * this->tilesGeometry[1]) * this->outputGeometry[0]) * this->outputGeometry[1]) + this->threadOutputStart[((i * sizeJ) + j)];
short subMatrixI = (short)this->threadOffsetI[i];
short subMatrixJ = (short)this->threadOffsetJ[j];
float accum = 0.0f;
for (short indexN = (short)(-this->outputGeometry[0] / 2); indexN<=(this->outputGeometry[0] / 2); indexN = (short)(indexN + 1)){
for (short indexM = (short)(-this->outputGeometry[1] / 2); indexM<=(this->outputGeometry[1] / 2); indexM = (short)(indexM + 1)){
short fi = pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__relocateX(this, indexN, (short)this->inputGeometry[0]);
short fj = pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__relocateX(this, indexM, (short)this->inputGeometry[1]);
short gi = pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__relocateX(this, (short)(subMatrixI + indexN), (short)this->inputGeometry[0]);
short gj = pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__relocateX(this, (short)(subMatrixJ + indexM), (short)this->inputGeometry[1]);
accum = accum + (this->matrixInF[((matrixInputStart + (fi * (this->inputGeometry[1] + 1))) + fj)] * this->matrixInG[((matrixInputStart + (gi * (this->inputGeometry[1] + 1))) + gj)]);
}
}
int outIndex = (matrixOutputStart + ((subMatrixI + (this->outputGeometry[0] / 2)) * this->outputGeometry[1])) + (subMatrixJ + (this->outputGeometry[1] / 2));
this->matrixOut[outIndex] = accum;
return;
}
}
More information about the amd-gfx
mailing list