[Nouveau] [RFC PATCH v2 5/5] HACK force fences updated on error
Konsta Hölttä
kholtta at nvidia.com
Mon Aug 31 04:38:35 PDT 2015
Some error conditions just stop a channel and fences get stuck, so they
either need to be kicked ready in overwriting hw seq numbers (as nvgpu
does) or faked with a sw flag like this. This is just a hack as an
example of what would be needed.
Here, a channel id whose fences should be forced updated is passed
upwards with the uevent response. Normally, this is -1 to match no
channel id, but some error paths fake an update event with an explicit
channel id.
Note: if userspace has some meaningful timeouts on the fences, then they
do finish but without any notification that the channel is broken now
(how do you distinguish a too long gpu job from a stuck one?). In many
cases, a channel needs to be shut down completely when it breaks (e.g.,
mmu fault).
Signed-off-by: Konsta Hölttä <kholtta at nvidia.com>
---
drm/nouveau/include/nvif/event.h | 1 +
drm/nouveau/include/nvkm/engine/fifo.h | 2 +-
drm/nouveau/nouveau_fence.c | 13 ++++++++-----
drm/nouveau/nvkm/engine/fifo/base.c | 3 ++-
drm/nouveau/nvkm/engine/fifo/gf100.c | 2 +-
drm/nouveau/nvkm/engine/fifo/gk104.c | 7 ++++++-
drm/nouveau/nvkm/engine/fifo/nv04.c | 2 +-
7 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/drm/nouveau/include/nvif/event.h b/drm/nouveau/include/nvif/event.h
index d148b85..a9ff4ee 100644
--- a/drm/nouveau/include/nvif/event.h
+++ b/drm/nouveau/include/nvif/event.h
@@ -52,16 +52,17 @@ struct nvif_notify_conn_rep_v0 {
};
struct nvif_notify_uevent_req {
/* nvif_notify_req ... */
};
struct nvif_notify_uevent_rep {
/* nvif_notify_rep ... */
+ __u32 force_chid;
};
struct nvif_notify_eevent_req {
/* nvif_notify_req ... */
u32 chid;
};
struct nvif_notify_eevent_rep {
diff --git a/drm/nouveau/include/nvkm/engine/fifo.h b/drm/nouveau/include/nvkm/engine/fifo.h
index cbca477..946eb68 100644
--- a/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drm/nouveau/include/nvkm/engine/fifo.h
@@ -117,15 +117,15 @@ extern struct nvkm_oclass *gf100_fifo_oclass;
extern struct nvkm_oclass *gk104_fifo_oclass;
extern struct nvkm_oclass *gk20a_fifo_oclass;
extern struct nvkm_oclass *gk208_fifo_oclass;
extern struct nvkm_oclass *gm204_fifo_oclass;
extern struct nvkm_oclass *gm20b_fifo_oclass;
int nvkm_fifo_uevent_ctor(struct nvkm_object *, void *, u32,
struct nvkm_notify *);
-void nvkm_fifo_uevent(struct nvkm_fifo *);
+void nvkm_fifo_uevent(struct nvkm_fifo *, u32 force_chid);
void nvkm_fifo_eevent(struct nvkm_fifo *, u32 chid, u32 error);
void nv04_fifo_intr(struct nvkm_subdev *);
int nv04_fifo_context_attach(struct nvkm_object *, struct nvkm_object *);
#endif
diff --git a/drm/nouveau/nouveau_fence.c b/drm/nouveau/nouveau_fence.c
index 38bccb0..b7d9987 100644
--- a/drm/nouveau/nouveau_fence.c
+++ b/drm/nouveau/nouveau_fence.c
@@ -123,50 +123,53 @@ nouveau_fence_context_put(struct kref *fence_ref)
void
nouveau_fence_context_free(struct nouveau_fence_chan *fctx)
{
kref_put(&fctx->fence_ref, nouveau_fence_context_put);
}
static int
-nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
+nouveau_fence_update(struct nouveau_channel *chan,
+ struct nouveau_fence_chan *fctx, u32 force_chid)
{
struct nouveau_fence *fence;
int drop = 0;
u32 seq = fctx->read(chan);
+ bool force = force_chid == chan->chid;
while (!list_empty(&fctx->pending)) {
fence = list_entry(fctx->pending.next, typeof(*fence), head);
- if ((int)(seq - fence->base.seqno) < 0)
+ if ((int)(seq - fence->base.seqno) < 0 && !force)
break;
drop |= nouveau_fence_signal(fence);
}
return drop;
}
static int
nouveau_fence_wait_uevent_handler(struct nvif_notify *notify)
{
struct nouveau_fence_chan *fctx =
container_of(notify, typeof(*fctx), notify);
+ const struct nvif_notify_uevent_rep *rep = notify->data;
unsigned long flags;
int ret = NVIF_NOTIFY_KEEP;
spin_lock_irqsave(&fctx->lock, flags);
if (!list_empty(&fctx->pending)) {
struct nouveau_fence *fence;
struct nouveau_channel *chan;
fence = list_entry(fctx->pending.next, typeof(*fence), head);
chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
- if (nouveau_fence_update(fence->channel, fctx))
+ if (nouveau_fence_update(fence->channel, fctx, rep->force_chid))
ret = NVIF_NOTIFY_DROP;
}
spin_unlock_irqrestore(&fctx->lock, flags);
return ret;
}
void
@@ -278,17 +281,17 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
kref_get(&fctx->fence_ref);
trace_fence_emit(&fence->base);
ret = fctx->emit(fence);
if (!ret) {
fence_get(&fence->base);
spin_lock_irq(&fctx->lock);
- if (nouveau_fence_update(chan, fctx))
+ if (nouveau_fence_update(chan, fctx, -1))
nvif_notify_put(&fctx->notify);
list_add_tail(&fence->head, &fctx->pending);
spin_unlock_irq(&fctx->lock);
}
return ret;
}
@@ -302,17 +305,17 @@ nouveau_fence_done(struct nouveau_fence *fence)
struct nouveau_channel *chan;
unsigned long flags;
if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
return true;
spin_lock_irqsave(&fctx->lock, flags);
chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
- if (chan && nouveau_fence_update(chan, fctx))
+ if (chan && nouveau_fence_update(chan, fctx, -1))
nvif_notify_put(&fctx->notify);
spin_unlock_irqrestore(&fctx->lock, flags);
}
return fence_is_signaled(&fence->base);
}
static long
nouveau_fence_wait_legacy(struct fence *f, bool intr, long wait)
diff --git a/drm/nouveau/nvkm/engine/fifo/base.c b/drm/nouveau/nvkm/engine/fifo/base.c
index a5dc6c9..e35d711 100644
--- a/drm/nouveau/nvkm/engine/fifo/base.c
+++ b/drm/nouveau/nvkm/engine/fifo/base.c
@@ -184,19 +184,20 @@ nvkm_fifo_uevent_ctor(struct nvkm_object *object, void *data, u32 size,
notify->types = 1;
notify->index = 0;
}
return ret;
}
void
-nvkm_fifo_uevent(struct nvkm_fifo *fifo)
+nvkm_fifo_uevent(struct nvkm_fifo *fifo, u32 force_chid)
{
struct nvif_notify_uevent_rep rep = {
+ .force_chid = force_chid
};
nvkm_event_send(&fifo->uevent, 1, 0, &rep, sizeof(rep));
}
static int
nvkm_fifo_eevent_ctor(struct nvkm_object *object, void *data, u32 size,
struct nvkm_notify *notify)
{
diff --git a/drm/nouveau/nvkm/engine/fifo/gf100.c b/drm/nouveau/nvkm/engine/fifo/gf100.c
index b745252..ca86dfe 100644
--- a/drm/nouveau/nvkm/engine/fifo/gf100.c
+++ b/drm/nouveau/nvkm/engine/fifo/gf100.c
@@ -732,17 +732,17 @@ gf100_fifo_intr_engine_unit(struct gf100_fifo_priv *priv, int engn)
u32 inte = nv_rd32(priv, 0x002628);
u32 unkn;
nv_wr32(priv, 0x0025a8 + (engn * 0x04), intr);
for (unkn = 0; unkn < 8; unkn++) {
u32 ints = (intr >> (unkn * 0x04)) & inte;
if (ints & 0x1) {
- nvkm_fifo_uevent(&priv->base);
+ nvkm_fifo_uevent(&priv->base, -1);
ints &= ~1;
}
if (ints) {
nv_error(priv, "ENGINE %d %d %01x", engn, unkn, ints);
nv_mask(priv, 0x002628, ints, 0);
}
}
}
diff --git a/drm/nouveau/nvkm/engine/fifo/gk104.c b/drm/nouveau/nvkm/engine/fifo/gk104.c
index 53a464d..caecef1 100644
--- a/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -908,16 +908,18 @@ gk104_fifo_intr_fault(struct gk104_fifo_priv *priv, int unit)
object = engctx;
while (object) {
switch (nv_mclass(object)) {
case KEPLER_CHANNEL_GPFIFO_A:
case MAXWELL_CHANNEL_GPFIFO_A:
nvkm_fifo_eevent(&priv->base,
((struct nvkm_fifo_chan*)object)->chid,
NOUVEAU_GEM_CHANNEL_FIFO_ERROR_MMU_ERR_FLT);
+ nvkm_fifo_uevent(&priv->base,
+ ((struct nvkm_fifo_chan*)object)->chid);
gk104_fifo_recover(priv, engine, (void *)object);
break;
}
object = object->parent;
}
nvkm_engctx_put(engctx);
}
@@ -978,18 +980,21 @@ gk104_fifo_intr_pbdma_0(struct gk104_fifo_priv *priv, int unit)
nv_error(priv, "PBDMA%d:", unit);
nvkm_bitfield_print(gk104_fifo_pbdma_intr_0, show);
pr_cont("\n");
nv_error(priv,
"PBDMA%d: ch %d [%s] subc %d mthd 0x%04x data 0x%08x\n",
unit, chid,
nvkm_client_name_for_fifo_chid(&priv->base, chid),
subc, mthd, data);
+
nvkm_fifo_eevent(&priv->base, chid,
NOUVEAU_GEM_CHANNEL_PBDMA_ERROR);
+
+ nvkm_fifo_uevent(&priv->base, chid);
}
nv_wr32(priv, 0x040108 + (unit * 0x2000), stat);
}
static const struct nvkm_bitfield gk104_fifo_pbdma_intr_1[] = {
{ 0x00000001, "HCE_RE_ILLEGAL_OP" },
{ 0x00000002, "HCE_RE_ALIGNB" },
@@ -1030,17 +1035,17 @@ gk104_fifo_intr_runlist(struct gk104_fifo_priv *priv)
nv_wr32(priv, 0x002a00, 1 << engn);
mask &= ~(1 << engn);
}
}
static void
gk104_fifo_intr_engine(struct gk104_fifo_priv *priv)
{
- nvkm_fifo_uevent(&priv->base);
+ nvkm_fifo_uevent(&priv->base, -1);
}
static void
gk104_fifo_intr(struct nvkm_subdev *subdev)
{
struct gk104_fifo_priv *priv = (void *)subdev;
u32 mask = nv_rd32(priv, 0x002140);
u32 stat = nv_rd32(priv, 0x002100) & mask;
diff --git a/drm/nouveau/nvkm/engine/fifo/nv04.c b/drm/nouveau/nvkm/engine/fifo/nv04.c
index 043e429..1749614 100644
--- a/drm/nouveau/nvkm/engine/fifo/nv04.c
+++ b/drm/nouveau/nvkm/engine/fifo/nv04.c
@@ -536,17 +536,17 @@ nv04_fifo_intr(struct nvkm_subdev *subdev)
if (device->card_type == NV_50) {
if (stat & 0x00000010) {
stat &= ~0x00000010;
nv_wr32(priv, 0x002100, 0x00000010);
}
if (stat & 0x40000000) {
nv_wr32(priv, 0x002100, 0x40000000);
- nvkm_fifo_uevent(&priv->base);
+ nvkm_fifo_uevent(&priv->base, -1);
stat &= ~0x40000000;
}
}
if (stat) {
nv_warn(priv, "unknown intr 0x%08x\n", stat);
nv_mask(priv, NV03_PFIFO_INTR_EN_0, stat, 0x00000000);
nv_wr32(priv, NV03_PFIFO_INTR_0, stat);
--
2.1.4
More information about the Nouveau
mailing list