[igt-dev] [RFC v6] tests/gem_watchdog: Initial set of tests for GPU watchdog

Thu Jun 20 18:33:18 UTC 2019

This test adds basic set of tests to reset the different
GPU engines through the gpu watchdog timer.

Credits to Antonio for the original codebase this is based on.

v2: remove gem_context_get_param() during set (Antonio)
    remove clearing of the engines_threshold[] in the default case
    inside context_set_watchdog(). (Antonio)
    fix indexing when creating low/high priority contexts
    get rid of 2 threads idea (Antonio)
    fix context prio bug due to wrong indexing (Antonio)
v3: no need to force clear drm_i915_gem_watchdog_timeout struct
    when setting the watchdog timeouts (Antonio)
    -use gem_context_set_priority() instead to use error checking
    -always use a looper to test all engines (Antonio)
    -modify test gpu_watchodg_hang_long_batch_single_engine()
    to use a couple of batches and set the timeout in between them (Antonio)
    -make use of for_each_engine_class_instance() (Antonio)
v4: -tests should be able to handle s/w watchdog timeout (Tvrtko)
    -remove dead code inside batch_buffer factory (Tvrtko)
    -set_watchdog() should return ENODEV for fail cases (Tvrtko)
    -enclose name, priority and reset params inside array (Tvrtko)
v5: -Rebased. Tests 5,6 and 8 from the test plan are now added
v6: -Rebased. Added Test #10 Timing_batch (Chris)
    -use i915_engine_class_instance, use spinner to track out_fence,
    -use a struct to describe # of ctx/scratch[], increase # of engines
    from 8 (Chris)

Test Plan:

Assumptions:
1. Use fence status to figure out which ctx was reset
2. Use spin batches w/ user space timers to control duration
and corking to control ordering
3. Use context priorities to force preemption

Tests:
1. ctx1, long_batch -> execute
2. ctx1, set_watchdog -> reset
3. ctx2/ctx1 -> execute, reset
4. ctx1/ctx2 -> reset, execute
5. ctx1_just_below_threshold -> execute
6. ctx_over_the_threshold -> reset
7. set watchdog on some engines -> execute
8. submit long_batch and after half of the
expected runtime submit higher prio batch
9. submit low prio batch w/o watchdog then
higher prio with watchdog
10. submit timing_batch, record timestamps
and calculate the delta after recursing.

Unsresolved items:
1. The check for sync_fence_status(*fence)
returns -1 instead of EIO.

Cc: Ursulin Tvrtko <tvrtko.ursulin at intel.com>
Cc: Antonio Argenziano <antonio.argenziano at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Carlos Santa <carlos.santa at intel.com>
---
 tests/Makefile.sources    |   3 +
 tests/i915/gem_watchdog.c | 730 ++++++++++++++++++++++++++++++++++++++
 tests/meson.build         |   1 +
 3 files changed, 734 insertions(+)
 create mode 100644 tests/i915/gem_watchdog.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 027ed82fc38a..2ceaaa0a44af 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -459,6 +459,9 @@ gem_userptr_blits_SOURCES = i915/gem_userptr_blits.c
 TESTS_progs += gem_wait
 gem_wait_SOURCES = i915/gem_wait.c
 
+TESTS_progs += gem_watchdog
+gem_watchdog_SOURCES = i915/gem_watchdog.c
+
 TESTS_progs += gem_workarounds
 gem_workarounds_SOURCES = i915/gem_workarounds.c
 
diff --git a/tests/i915/gem_watchdog.c b/tests/i915/gem_watchdog.c
new file mode 100644
index 000000000000..7119e9877702
--- /dev/null
+++ b/tests/i915/gem_watchdog.c
@@ -0,0 +1,730 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "igt.h"
+#include "igt_sysfs.h"
+#include "sw_sync.h"
+
+#include <pthread.h>
+#include <fcntl.h>
+
+#include <sys/time.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/signal.h>
+#include "i915/gem_ring.h"
+
+#define MI_STORE_REGISTER_MEM 		(0x24 << 23)
+#define LOCAL_I915_EXEC_BSD_SHIFT	(13)
+#define LOCAL_I915_EXEC_BSD_RING1 	(1 << LOCAL_I915_EXEC_BSD_SHIFT)
+#define LOCAL_I915_EXEC_BSD_RING2 	(2 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
+#define DEFAULT_PRIO LOCAL_I915_CONTEXT_DEFAULT_PRIORITY
+#define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
+#define HIGH 1
+#define LOW 0
+#define WATCHDOG_OVER_THRESHOLD (2070000)
+#define WATCHDOG_BELOW_THRESHOLD (1200000)
+
+#define WATCHDOG_THRESHOLD (1000000)
+#define MAX_ENGINES 16
+#define RENDER_CLASS 0
+#define VIDEO_DECODE_CLASS 1
+#define VIDEO_ENHANCEMENT_CLASS 2
+#define COPY_ENGINE_CLASS 3
+#define LOCAL_I915_CONTEXT_PARAM_WATCHDOG 0x10
+
+#define GET_RESET_STATS_IOCTL DRM_IOWR(DRM_COMMAND_BASE + 0x32, struct local_drm_i915_reset_stats)
+struct local_drm_i915_reset_stats {
+	__u32 ctx_id;
+	__u32 flags;
+	__u32 reset_count;
+	__u32 batch_active;
+	__u32 batch_pending;
+	__u32 pad;
+};
+
+const uint64_t timeout_100ms = 100000000LL;
+float timedifference_msec(struct timeval t0, struct timeval t1);
+void sleep_nsec(int ms);
+
+struct drm_i915_gem_watchdog_timeout {
+	union {
+		struct {
+			/*
+			 * Engine class & instance to be configured or queried.
+			 */
+			__u16 engine_class;
+			__u16 i915_engine_class_instance;
+		};
+		/* Index based addressing mode */
+		__u32 index;
+	};
+	/* GPU Engine watchdog resets timeout in us */
+	__u32 timeout_us;
+};
+
+static void clear_error_state(int fd)
+{
+	int dir;
+
+	dir = igt_sysfs_open(fd);
+
+	if (dir < 0)
+		return;
+
+	/* Any write to the error state clears it */
+	igt_sysfs_set(dir, "error", "");
+	close(dir);
+}
+
+static int context_set_watchdog(int fd, unsigned engine_id,
+				 const char *engine_name,
+                                 unsigned ctx_id, unsigned threshold)
+{
+	struct drm_i915_gem_watchdog_timeout engines_threshold[MAX_ENGINES];
+	struct drm_i915_gem_context_param arg = {
+		.param = LOCAL_I915_CONTEXT_PARAM_WATCHDOG,
+		.ctx_id = ctx_id,
+		.size = sizeof(engines_threshold),
+		.value = (uint64_t)&engines_threshold
+	};
+
+	switch (engine_id & I915_EXEC_RING_MASK) {
+	case I915_EXEC_RENDER:
+		engines_threshold[0].timeout_us = threshold;
+		engines_threshold[0].engine_class = RENDER_CLASS;
+		engines_threshold[0].i915_engine_class_instance = 0;
+		break;
+	case I915_EXEC_BLT:
+		if (__gem_context_get_param(fd, &arg) == -ENODEV)
+			return -ENODEV;
+		else
+			engines_threshold[3].timeout_us = threshold;
+			engines_threshold[3].engine_class = COPY_ENGINE_CLASS;
+			engines_threshold[3].i915_engine_class_instance = 0;
+		break;
+	case I915_EXEC_BSD:
+		engines_threshold[1].timeout_us = threshold;
+		engines_threshold[1].engine_class = VIDEO_DECODE_CLASS;
+		engines_threshold[1].i915_engine_class_instance = 0;
+		break;
+	case I915_EXEC_VEBOX:
+		engines_threshold[2].timeout_us = threshold;
+		engines_threshold[2].engine_class = VIDEO_ENHANCEMENT_CLASS;
+		engines_threshold[2].i915_engine_class_instance = 0;
+		break;
+	default:
+		break;
+	}
+
+	gem_context_set_param(fd, &arg);
+
+	return 0;
+}
+
+static double clockrate(int i915, int reg)
+{
+	volatile uint32_t *mmio;
+	uint32_t r_start, r_end;
+	struct timespec tv;
+	uint64_t t_start, t_end;
+	uint64_t elapsed;
+	int cs_timestamp_freq;
+	drm_i915_getparam_t gp = {
+		.value = &cs_timestamp_freq,
+		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+	};
+
+	if (igt_ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp) == 0)
+		return cs_timestamp_freq;
+
+	mmio = (volatile uint32_t *)((volatile char *)igt_global_mmio + reg);
+
+	t_start = igt_nsec_elapsed(&tv);
+	r_start = *mmio;
+	elapsed = igt_nsec_elapsed(&tv) - t_start;
+
+	usleep(1000);
+
+	t_end = igt_nsec_elapsed(&tv);
+	r_end = *mmio;
+	elapsed += igt_nsec_elapsed(&tv) - t_end;
+
+	elapsed = (t_end - t_start) + elapsed / 2;
+	return (r_end - r_start) * 1e9 / elapsed;
+}
+
+#define RCS_TIMESTAMP (0x2000 + 0x358)
+static void batch_buffer_timed(uint32_t fd, uint32_t ctx_id, unsigned exec_id,
+				 uint32_t target, uint32_t offset,
+				 uint32_t *handle, uint64_t timeout,
+				 int *fence, int fence_index)
+{
+    struct drm_i915_gem_exec_object2 obj[2];
+    struct drm_i915_gem_relocation_entry reloc[3];
+    struct drm_i915_gem_execbuffer2 execbuf;
+    uint32_t batch[16];
+    uint32_t *batch_ptr;
+    uint32_t *batch_bo_ptr;
+    uint64_t delta;
+    uint32_t timestamp_offset;
+    float elapsed_time;
+    uint32_t start_timestamp, stop_timestamp;
+    const int start_timestamp_pos = 0;
+    const int stop_timestamp_pos = 1;
+    double rcs_clock;
+    int i = 0;
+
+    gem_quiescent_gpu(fd);
+
+    switch (exec_id & I915_EXEC_RING_MASK) {
+    case I915_EXEC_RENDER:
+         timestamp_offset = 0x02358;
+         break;
+    case I915_EXEC_BSD:
+         timestamp_offset = 0x12358;
+         break;
+    case I915_EXEC_VEBOX:
+         timestamp_offset = 0x1a358;
+         break;
+    default:
+        igt_assert_f(0, "No timestamp for ring.");
+        break;
+    }
+
+    memset(&execbuf, 0, sizeof(execbuf));
+    memset(&obj, 0, sizeof(obj));
+    memset(&reloc, 0, sizeof(reloc));
+
+    execbuf.buffers_ptr = to_user_pointer(obj);
+
+    execbuf.buffer_count = 2;
+    execbuf.flags = exec_id | I915_EXEC_FENCE_OUT ;
+
+    obj[0].handle = target;
+    obj[1].handle = gem_create(fd, 4096);
+    obj[1].relocation_count = 3;
+    obj[1].relocs_ptr = to_user_pointer(&reloc);
+
+    memset(batch, 0xc5, sizeof(batch));
+
+    batch[i++] = MI_STORE_REGISTER_MEM | (4 - 2);
+    batch[i++] = timestamp_offset;
+
+    reloc[0].offset = i * sizeof(uint32_t);
+    batch[i++] = 0x0;
+    batch[i++] = 0x0;
+
+    /* First relocation on Buffer Object */
+    reloc[0].target_handle = obj[0].handle;
+    reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;
+    reloc[0].write_domain = I915_GEM_DOMAIN_COMMAND;
+
+    reloc[0].delta = start_timestamp_pos * sizeof(uint32_t);
+
+    reloc[2].delta = i * sizeof(uint32_t);
+    batch[i++] = MI_STORE_REGISTER_MEM | (4 - 2);
+    batch[i++] = timestamp_offset;
+
+    reloc[1].offset = i * sizeof(uint32_t);
+    batch[i++] = 0x4;
+    batch[i++] = 0x0;
+
+    /* Second relocation on Buffer object */
+    reloc[1].target_handle = obj[0].handle;
+    reloc[1].read_domains = I915_GEM_DOMAIN_COMMAND;
+    reloc[1].write_domain = I915_GEM_DOMAIN_COMMAND;
+
+    reloc[1].delta = stop_timestamp_pos * sizeof(uint32_t);
+
+    batch[i++] = MI_BATCH_BUFFER_START | (1 << 8) | (3 - 2);
+    reloc[2].offset = i * sizeof(uint32_t);
+    batch[i++] = MI_NOOP;
+    batch[i++] = MI_NOOP;
+
+    batch[i++] = MI_BATCH_BUFFER_END;
+    batch[i++] = MI_NOOP;
+
+    gem_write(fd, obj[1].handle, 0, &batch, sizeof(batch));
+
+    reloc[2].target_handle = obj[1].handle;
+    reloc[2].read_domains = I915_GEM_DOMAIN_COMMAND;
+
+    gem_sync(fd, obj[1].handle);
+    execbuf.rsvd1 = ctx_id;
+    execbuf.rsvd2 = -1;
+
+    gem_execbuf_wr(fd, &execbuf);
+
+    batch_ptr = gem_mmap__cpu(fd, obj[1].handle, 0, 4096, PROT_READ);
+    gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_CPU, 0);
+
+    /* Read values from BO */
+    batch_bo_ptr = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_READ);
+    gem_set_domain(fd, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+
+    start_timestamp = batch_bo_ptr[start_timestamp_pos];
+    stop_timestamp = batch_bo_ptr[stop_timestamp_pos];
+
+    delta = (uint64_t)stop_timestamp - (uint64_t)start_timestamp;
+
+    delta += (delta >= 0) ? 0 : 0xFFFFFFFF;
+
+    rcs_clock = clockrate(fd, RCS_TIMESTAMP);
+    igt_info("RCS timestamp clock: %.0fKHz, %.1fns\n", rcs_clock / 1e3, 1e9 / rcs_clock);
+    rcs_clock = 1e9 / rcs_clock;
+    elapsed_time = delta * rcs_clock; //10^-9 [s]
+    elapsed_time /= 1000; //10^-6 [s]
+    elapsed_time /= 1000; //10^-3 [s]
+
+    if (delta <= 0)
+        igt_assert_f(0, "Negative time elapsed!");
+
+    printf("Elapsed time in start_timestamp: %llu\n", (uint64_t)start_timestamp);
+    printf("Elapsed time in stop_timestamp: %llu\n", (uint64_t)stop_timestamp);
+    printf("Elapsed time in milliseconds: %f\n", elapsed_time);
+    float thr = WATCHDOG_THRESHOLD;
+    printf("Total Timeout during the hang: %f\n", elapsed_time - thr/1000. );
+
+    fence[fence_index] = execbuf.rsvd2 >> 32;
+
+    gem_close(fd, obj[1].handle);
+    gem_quiescent_gpu(fd);
+}
+
+static void batch_buffer_factory(uint32_t fd, uint32_t ctx_id, unsigned exec_id,
+				 uint32_t target, uint32_t offset,
+				 uint32_t *handle, uint64_t timeout,
+				 int *fence, int fence_index)
+{
+    struct drm_i915_gem_exec_object2 obj[2];
+    struct drm_i915_gem_relocation_entry reloc;
+    struct drm_i915_gem_execbuffer2 execbuf;
+    igt_spin_t *spin = NULL;
+    const uint32_t bbe = MI_BATCH_BUFFER_END;
+    int i = 0;
+
+    gem_quiescent_gpu(fd);
+
+    memset(&execbuf, 0, sizeof(execbuf));
+    memset(&obj, 0, sizeof(obj));
+    memset(&reloc, 0, sizeof(reloc));
+
+    execbuf.buffers_ptr = to_user_pointer(obj);
+
+    execbuf.buffer_count = 2;
+    execbuf.flags = exec_id | I915_EXEC_FENCE_OUT ;
+
+    obj[0].handle = target;
+    obj[1].handle = gem_create(fd, 4096);
+
+    obj[1].relocation_count = 1;
+    obj[1].relocs_ptr = to_user_pointer(&reloc);
+
+    reloc.target_handle = obj[0].handle;
+    reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
+    reloc.write_domain = I915_GEM_DOMAIN_COMMAND;
+    reloc.delta = offset * sizeof(uint32_t);
+
+    reloc.offset = i * sizeof(uint32_t);
+    gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+    gem_sync(fd, obj[1].handle);
+    execbuf.rsvd1 = ctx_id;
+    execbuf.rsvd2 = -1;
+
+    spin = igt_spin_new(fd,
+			.dependency = obj[0].handle,
+			.engine = exec_id,
+			.flags = IGT_SPIN_FENCE_OUT);
+    igt_assert(spin->out_fence != -1);
+    igt_spin_set_timeout(spin, timeout);
+    igt_assert(gem_bo_busy(fd, obj[0].handle));
+
+    gem_execbuf_wr(fd, &execbuf);
+    igt_spin_free(fd, spin);
+
+    fence[fence_index] = spin->out_fence;//execbuf.rsvd2 >> 32;
+
+    gem_close(fd, obj[1].handle);
+    gem_quiescent_gpu(fd);
+}
+
+static void inject_hang(uint32_t fd, unsigned engine_id, uint32_t ctx_id,
+			 unsigned flags)
+{
+	igt_hang_t hang;
+	hang = igt_hang_ctx(fd, ctx_id, engine_id, flags);
+	gem_sync(fd, hang.spin->handle);
+}
+
+/* 
+ * Test#1: Write the initial timestamp to scratch, and write a second
+ * timestamp and recurse back to writing the second timestamp.
+ * Compare the two timestamps to see how long it executed for. 
+ */
+static void long_batch_test1(int fd, int prio1, int prio2, int reset_ctx1,
+			int reset_ctx2, unsigned threshold)
+{
+	unsigned engine_id = 0;
+	unsigned nengine = 0;
+	struct intel_execution_engine2 *e_;
+	struct {
+		uint32_t ctx[MAX_ENGINES];
+		uint32_t scratch[MAX_ENGINES];
+	} bo[2];
+	int *fence, i = 0;
+	const uint64_t batch_timeout_ms = timeout_100ms * 4;
+
+	fence = malloc(sizeof(int) * MAX_ENGINES);
+	igt_assert(fence);
+
+	__for_each_physical_engine(fd, e_) {
+		engine_id = e_->flags;
+
+		bo[0].scratch[nengine] = gem_create(fd, 4096);
+		bo[0].ctx[nengine] = gem_context_create(fd);
+		gem_context_set_priority(fd, bo[0].ctx[nengine], prio1);
+
+		if(nengine < 1){
+		if (context_set_watchdog(fd, engine_id, e_->name,
+					 bo[0].ctx[nengine],
+					 threshold) == -ENODEV) {
+			igt_info("No support for gpu h/w watchdog on %s\n",
+			 e_->name);
+			goto skip_case;
+		}
+
+		batch_buffer_timed(fd, bo[0].ctx[nengine], engine_id,
+				 bo[0].scratch[nengine], 0, NULL,
+				 batch_timeout_ms, fence, nengine);
+
+		igt_assert_eq(sync_fence_status(*fence), -EIO);
+		igt_info("Test #1: batch_buffer_timed --> execute on %s\n",e_->name);
+		}
+skip_case:
+		nengine++;
+	}
+
+	for (i = 0; i < /*nengine*/1; i++) {
+		close(fence[i]);
+		gem_context_destroy(fd, bo[0].ctx[i]);
+		gem_close(fd, bo[0].scratch[i]);
+	}
+}
+
+/*
+ * Test#2: Create some work and let it run on all engines
+ */
+static void long_batch_test2(int fd, int prio1, int prio2, int reset_ctx1,
+			int reset_ctx2, unsigned threshold)
+{
+	unsigned engine_id = 0;
+	unsigned nengine = 0;
+	struct intel_execution_engine2 *e_;
+	struct {
+		uint32_t ctx[MAX_ENGINES];
+		uint32_t scratch[MAX_ENGINES];
+	} bo[2];
+	int *fence, i = 0;
+	const uint64_t batch_timeout_ms = timeout_100ms * 4;
+
+	fence = malloc(sizeof(int) * MAX_ENGINES);
+	igt_assert(fence);
+
+	__for_each_physical_engine(fd, e_) {
+		engine_id = e_->flags;
+
+		bo[1].scratch[nengine] = gem_create(fd, 4096);
+		bo[1].ctx[nengine] = gem_context_create(fd);
+		gem_context_set_priority(fd, bo[1].ctx[nengine], prio2);
+		batch_buffer_factory(fd, bo[1].ctx[nengine], engine_id,
+				 bo[1].scratch[nengine], 0, NULL,
+				 batch_timeout_ms, fence, nengine);
+
+		bo[0].scratch[nengine] = gem_create(fd, 4096);
+		bo[0].ctx[nengine] = gem_context_create(fd);
+		gem_context_set_priority(fd, bo[0].ctx[nengine], prio1);
+		batch_buffer_factory(fd, bo[0].ctx[nengine], engine_id,
+				 bo[0].scratch[nengine], 0, NULL,
+				 batch_timeout_ms, fence, nengine);
+		igt_info("Test #2: ctx1/ctx2 --> execute on %s"
+			" with fence: %d\n",e_->name, sync_fence_status(*fence));
+		nengine++;
+	}
+
+	for (i = 0; i < nengine; i++) {
+		close(fence[i]);
+		gem_context_destroy(fd, bo[0].ctx[i]);
+		gem_context_destroy(fd, bo[1].ctx[i]);
+		gem_close(fd, bo[0].scratch[i]);
+		gem_close(fd, bo[1].scratch[i]);
+	}
+}
+
+void sleep_nsec(int ms) {
+	struct timespec req = {0};
+	req.tv_sec = 0;
+	req.tv_nsec = ms * 1000000L;
+	nanosleep(&req, (struct timespec *)NULL);
+}
+
+/*
+ * Test#3: Submit a long batch and after half of the expected runtime
+ * submit a higher priority batch and then try to cancel the execution
+ */
+static void long_batch_test3(int fd, int prio1, int prio2, int reset_ctx1,
+			int reset_ctx2, unsigned threshold)
+{
+	unsigned engine_id = 0;
+	unsigned nengine = 0;
+	struct intel_execution_engine2 *e_;
+	struct {
+		uint32_t ctx[MAX_ENGINES];
+		uint32_t scratch[MAX_ENGINES];
+	} bo[2];
+	int *fence, i = 0;
+	unsigned flags = HANG_ALLOW_CAPTURE;
+	const uint64_t batch_timeout_ms = timeout_100ms * 4;
+
+	fence = malloc(sizeof(int) * MAX_ENGINES);
+	igt_assert(fence);
+
+	nengine = 0;
+
+	__for_each_physical_engine(fd, e_) {
+		engine_id = e_->flags;
+
+		bo[1].scratch[nengine] = gem_create(fd, 4096);
+		bo[1].ctx[nengine] = gem_context_create(fd);
+		gem_context_set_priority(fd, bo[1].ctx[nengine], prio2);
+		batch_buffer_factory(fd, bo[1].ctx[nengine], engine_id,
+				 bo[1].scratch[nengine], 0, NULL,
+				 batch_timeout_ms, fence, nengine);
+
+		sleep_nsec(timeout_100ms * 2);
+
+		bo[0].scratch[nengine] = gem_create(fd, 4096);
+		bo[0].ctx[nengine] = gem_context_create(fd);
+		gem_context_set_priority(fd, bo[0].ctx[nengine], prio1);
+		batch_buffer_factory(fd, bo[0].ctx[nengine], engine_id,
+				 bo[0].scratch[nengine], 0, NULL,
+				 batch_timeout_ms, fence, nengine);
+
+		if (context_set_watchdog(fd, engine_id, e_->name,
+					 bo[0].ctx[nengine],
+					 threshold) == -ENODEV) {
+			igt_info("No support for gpu h/w watchdog on %s\n",
+			 e_->name);
+			goto skip_case;
+		}
+		clear_error_state(fd);
+		inject_hang(fd, engine_id, bo[0].ctx[nengine], flags);
+		/* Now check the engine was reset successfully */
+		//igt_assert_eq(sync_fence_status(*fence), -EIO);
+		igt_info("Test #3 ctx1/ctx2 --> set watchdog and"
+				" cancel ctx2 at half expected run with higher"
+				" priority engine: %s, fence status: %d \n",
+					e_->name, sync_fence_status(*fence));
+skip_case:
+		nengine++;
+	}
+
+	for (i = 0; i < nengine; i++) {
+		close(fence[i]);
+		gem_context_destroy(fd, bo[0].ctx[i]);
+		gem_context_destroy(fd, bo[1].ctx[i]);
+		gem_close(fd, bo[0].scratch[i]);
+		gem_close(fd, bo[1].scratch[i]);
+	}
+}
+
+/*
+ * Test#4: Create 2 ctx, set a gpu watchdog timeout on both,
+ * and either execute or cancel the execution.
+ */
+static void long_batch_test4(int fd, int prio1, int prio2, int reset_ctx1,
+			int reset_ctx2, unsigned threshold)
+{
+	unsigned engine_id = 0;
+	unsigned nengine = 0;
+	struct intel_execution_engine2 *e_;
+	struct {
+		uint32_t ctx[MAX_ENGINES];
+		uint32_t scratch[MAX_ENGINES];
+	} bo[2];
+	int *fence, i = 0;
+	unsigned flags = HANG_ALLOW_CAPTURE;
+	const uint64_t batch_timeout_ms = timeout_100ms * 4;
+
+	fence = malloc(sizeof(int) * MAX_ENGINES);
+	igt_assert(fence);
+
+	nengine = 0;
+
+	__for_each_physical_engine(fd, e_) {
+		engine_id = e_->flags;
+
+		bo[1].scratch[nengine] = gem_create(fd, 4096);
+		bo[1].ctx[nengine] = gem_context_create(fd);
+		gem_context_set_priority(fd, bo[1].ctx[nengine], prio2);
+		batch_buffer_factory(fd, bo[1].ctx[nengine], engine_id,
+				 bo[1].scratch[nengine], 0, NULL,
+				 batch_timeout_ms, fence, nengine);
+
+		bo[0].scratch[nengine] = gem_create(fd, 4096);
+		bo[0].ctx[nengine] = gem_context_create(fd);
+		gem_context_set_priority(fd, bo[0].ctx[nengine], prio1);
+		batch_buffer_factory(fd, bo[0].ctx[nengine], engine_id,
+				 bo[0].scratch[nengine], 0, NULL,
+				 batch_timeout_ms, fence, nengine);
+
+		if (context_set_watchdog(fd, engine_id, e_->name,
+					 bo[1].ctx[nengine],
+					 threshold) == -ENODEV) {
+			igt_info("No support for gpu h/w watchdog on %s\n",
+			 e_->name);
+			goto skip_case;
+		}
+
+		if (reset_ctx2) {
+			clear_error_state(fd);
+			inject_hang(fd, engine_id, bo[1].ctx[nengine], flags);
+
+			/* Now check the engine was reset successfully */
+			//igt_assert_eq(*fence, EIO);
+			igt_info("Test #4 ctx1/ctx2 --> set watchdog and"
+				" cancel ctx2 on %s with fence status: %d \n",
+					e_->name, sync_fence_status(*fence));
+		}
+
+		context_set_watchdog(fd, engine_id, e_->name,
+					 bo[0].ctx[nengine],
+					 threshold);
+		if (reset_ctx1) {
+			clear_error_state(fd);
+			inject_hang(fd, engine_id, bo[0].ctx[nengine], flags);
+
+			/* Now check the engine was reset successfully */
+			//igt_assert_eq(*fence, EIO);
+			igt_info("Test #4: ctx1/ctx2 --> set watchdog and"
+				" cancel ctx1 on %s with fence status: %d\n",
+				e_->name, sync_fence_status(*fence));
+		}
+skip_case:
+		nengine++;
+	}
+
+	for (i = 0; i < nengine; i++) {
+		close(fence[i]);
+		gem_context_destroy(fd, bo[0].ctx[i]);
+		gem_context_destroy(fd, bo[1].ctx[i]);
+		gem_close(fd, bo[0].scratch[i]);
+		gem_close(fd, bo[1].scratch[i]);
+	}
+}
+
+igt_main
+{
+	int fd;
+	unsigned int i=0;
+	struct {
+		char *name;
+		int prio[2];
+		bool reset[2];
+		unsigned threshold;
+	} tests[] = {
+		{"ctx1-scratch-timestamp",
+		{DEFAULT_PRIO, DEFAULT_PRIO}, {false, false}, WATCHDOG_THRESHOLD},
+		{"ctx1-exec-ctx2-exec-all-engines",
+		{DEFAULT_PRIO, DEFAULT_PRIO}, {false, false}, WATCHDOG_THRESHOLD},
+		{"ctx1-low-prio-exec-ctx2-high-prio-reset-after-half-time-all-engines",
+		{MAX_PRIO, MIN_PRIO}, {false, false}, WATCHDOG_THRESHOLD},
+		{"ctx1-reset-ctx2-exec-all-engines",
+		{DEFAULT_PRIO, DEFAULT_PRIO}, {true, false}, WATCHDOG_THRESHOLD},
+		{"ctx1-reset-ctx2-exec-all-engines-below-threshold",
+		{DEFAULT_PRIO, DEFAULT_PRIO}, {true, false},
+		 WATCHDOG_BELOW_THRESHOLD},
+		{"ctx1-reset-ctx2-exec-all-engines-over-threshold",
+		{DEFAULT_PRIO, DEFAULT_PRIO}, {true, false},
+		 WATCHDOG_OVER_THRESHOLD},
+		{"ctx2-reset-ctx1-exec-all-engines",
+		{DEFAULT_PRIO, DEFAULT_PRIO}, {false, true}, WATCHDOG_THRESHOLD},
+		{"ctx2-reset-ctx1-reset-all-engines",
+		{DEFAULT_PRIO, DEFAULT_PRIO}, {true, true}, WATCHDOG_THRESHOLD},
+		{"ctx1-high-prio-reset-ctx2-low-prio-exec-all-engines",
+		{MAX_PRIO, MIN_PRIO}, {true, false}, WATCHDOG_THRESHOLD},
+		{"ctx1-low-prio-reset-ctx2-high-prio-exec-all-engines",
+		{MIN_PRIO, MAX_PRIO}, {true, false}, WATCHDOG_THRESHOLD},
+		{"ctx1-low-prio-reset-ctx2-high-prio-reset-all-engines",
+		{MIN_PRIO, MAX_PRIO}, {true, true}, WATCHDOG_THRESHOLD},
+	};
+
+	igt_skip_on_simulation();
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+	}
+
+	igt_subtest_group {
+		igt_subtest_f("%s", tests[0].name) {
+			long_batch_test1( fd, tests[0].prio[0],
+					tests[0].prio[1],
+					tests[0].reset[0],
+					tests[0].reset[1],
+					tests[0].threshold);
+		}
+
+		igt_subtest_f("%s", tests[1].name) {
+			long_batch_test2( fd, tests[0].prio[0],
+					tests[0].prio[1],
+					tests[0].reset[0],
+					tests[0].reset[1],
+					tests[0].threshold);
+		}
+
+		igt_subtest_f("%s", tests[2].name) {
+			long_batch_test3( fd, tests[1].prio[0],
+					tests[1].prio[1],
+					tests[1].reset[0],
+					tests[1].reset[1],
+					tests[1].threshold);
+		}
+
+		for(i = 3; i < ARRAY_SIZE(tests); i++) {
+			igt_subtest_f("%s", tests[i].name) {
+				long_batch_test4( fd, tests[i].prio[0],
+						tests[i].prio[1],
+						tests[i].reset[0],
+						tests[i].reset[1],
+						tests[i].threshold);
+			}
+		}
+	}
+
+    igt_fixture {
+	close(fd);
+    }
+}
diff --git a/tests/meson.build b/tests/meson.build
index f168fbbae2a8..033842dbc2d1 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -220,6 +220,7 @@ i915_progs = [
 	'gem_userptr_blits',
 	'gem_vm_create',
 	'gem_wait',
+        'gem_watchdog',
 	'gem_workarounds',
 	'gem_write_read_ring_switch',
 	'i915_fb_tiling',
-- 
2.19.1