[Intel-gfx] [PATCH] drm/i915/selftests: Avoid repeatedly harming the same innocent context
kbuild test robot
lkp at intel.com
Sun Apr 1 02:45:31 UTC 2018
Hi Chris,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on drm-intel/for-linux-next]
[also build test WARNING on v4.16-rc7 next-20180329]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-selftests-Avoid-repeatedly-harming-the-same-innocent-context/20180401-022503
base: git://anongit.freedesktop.org/drm-intel for-linux-next
reproduce:
# apt-get install sparse
make ARCH=x86_64 allmodconfig
make C=1 CF=-D__CHECK_ENDIAN__
sparse warnings: (new ones prefixed by >>)
drivers/gpu/drm/i915/selftests/intel_hangcheck.c:988:33: sparse: undefined identifier 'GEM_TRACE_DUMP'
>> drivers/gpu/drm/i915/selftests/intel_hangcheck.c:988:47: sparse: call with no type!
In file included from drivers/gpu/drm/i915/intel_hangcheck.c:465:0:
drivers/gpu/drm/i915/selftests/intel_hangcheck.c: In function 'igt_reset_queue':
drivers/gpu/drm/i915/selftests/intel_hangcheck.c:988:5: error: implicit declaration of function 'GEM_TRACE_DUMP'; did you mean 'GEM_TRACE'? [-Werror=implicit-function-declaration]
GEM_TRACE_DUMP();
^~~~~~~~~~~~~~
GEM_TRACE
cc1: some warnings being treated as errors
vim +988 drivers/gpu/drm/i915/selftests/intel_hangcheck.c
922
923 static int igt_reset_queue(void *arg)
924 {
925 struct drm_i915_private *i915 = arg;
926 struct intel_engine_cs *engine;
927 enum intel_engine_id id;
928 struct hang h;
929 int err;
930
931 /* Check that we replay pending requests following a hang */
932
933 global_reset_lock(i915);
934
935 mutex_lock(&i915->drm.struct_mutex);
936 err = hang_init(&h, i915);
937 if (err)
938 goto unlock;
939
940 for_each_engine(engine, i915, id) {
941 struct i915_request *prev;
942 IGT_TIMEOUT(end_time);
943 unsigned int count;
944
945 if (!intel_engine_can_store_dword(engine))
946 continue;
947
948 prev = hang_create_request(&h, engine);
949 if (IS_ERR(prev)) {
950 err = PTR_ERR(prev);
951 goto fini;
952 }
953
954 i915_request_get(prev);
955 __i915_request_add(prev, true);
956
957 count = 0;
958 do {
959 struct i915_request *rq;
960 unsigned int reset_count;
961
962 rq = hang_create_request(&h, engine);
963 if (IS_ERR(rq)) {
964 err = PTR_ERR(rq);
965 goto fini;
966 }
967
968 i915_request_get(rq);
969 __i915_request_add(rq, true);
970
971 /*
972 * XXX We don't handle resetting the kernel context
973 * very well. If we trigger a device reset twice in
974 * quick succession while the kernel context is
975 * executing, we may end up skipping the breadcrumb.
976 * This is really only a problem for the selftest as
977 * normally there is a large interlude between resets
978 * (hangcheck), or we focus on resetting just one
979 * engine and so avoid repeatedly resetting innocents.
980 */
981 err = wait_for_others(i915, engine);
982 if (err) {
983 pr_err("%s(%s): Failed to idle other inactive engines after device reset\n",
984 __func__, engine->name);
985 i915_request_put(rq);
986 i915_request_put(prev);
987
> 988 GEM_TRACE_DUMP();
989 i915_gem_set_wedged(i915);
990 goto fini;
991 }
992
993 if (!wait_for_hang(&h, prev)) {
994 struct drm_printer p = drm_info_printer(i915->drm.dev);
995
996 pr_err("%s(%s): Failed to start request %x, at %x\n",
997 __func__, engine->name,
998 prev->fence.seqno, hws_seqno(&h, prev));
999 intel_engine_dump(engine, &p,
1000 "%s\n", engine->name);
1001
1002 i915_request_put(rq);
1003 i915_request_put(prev);
1004
1005 i915_reset(i915, 0);
1006 i915_gem_set_wedged(i915);
1007
1008 err = -EIO;
1009 goto fini;
1010 }
1011
1012 reset_count = fake_hangcheck(prev);
1013
1014 i915_reset(i915, I915_RESET_QUIET);
1015
1016 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
1017 &i915->gpu_error.flags));
1018
1019 if (prev->fence.error != -EIO) {
1020 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
1021 prev->fence.error);
1022 i915_request_put(rq);
1023 i915_request_put(prev);
1024 err = -EINVAL;
1025 goto fini;
1026 }
1027
1028 if (rq->fence.error) {
1029 pr_err("Fence error status not zero [%d] after unrelated reset\n",
1030 rq->fence.error);
1031 i915_request_put(rq);
1032 i915_request_put(prev);
1033 err = -EINVAL;
1034 goto fini;
1035 }
1036
1037 if (i915_reset_count(&i915->gpu_error) == reset_count) {
1038 pr_err("No GPU reset recorded!\n");
1039 i915_request_put(rq);
1040 i915_request_put(prev);
1041 err = -EINVAL;
1042 goto fini;
1043 }
1044
1045 i915_request_put(prev);
1046 prev = rq;
1047 count++;
1048 } while (time_before(jiffies, end_time));
1049 pr_info("%s: Completed %d resets\n", engine->name, count);
1050
1051 *h.batch = MI_BATCH_BUFFER_END;
1052 i915_gem_chipset_flush(i915);
1053
1054 i915_request_put(prev);
1055
1056 err = flush_test(i915, I915_WAIT_LOCKED);
1057 if (err)
1058 break;
1059 }
1060
1061 fini:
1062 hang_fini(&h);
1063 unlock:
1064 mutex_unlock(&i915->drm.struct_mutex);
1065 global_reset_unlock(i915);
1066
1067 if (i915_terminally_wedged(&i915->gpu_error))
1068 return -EIO;
1069
1070 return err;
1071 }
1072
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
More information about the Intel-gfx
mailing list