[Spice-devel] [PATCH qxl-win v2] display: fix deadlock when dbg_level >= 15

Uri Lublin uril at redhat.com
Tue Jun 25 06:18:37 PDT 2013


On 06/25/2013 12:01 AM, Yonit Halperin wrote:
> DebugPrintV first locks print_sem, and then locks io_sem.
> async_io, locks io_sem.
> In ordr to avoid a deadlock, DebugPrintV MUSTN'T be called when
> io_sem is locked.
> I also moved the locking of io_sem, so that reading pdev->use_async
> will also be protected (async_io can modify use_async to 0 upon a
> timeout).
>
> Also notice, that locking io_sem during DebugPrintV limits our ability
> to use the log_port for debugging concurrency problems related to ios.

Hi Yonit,

Ack.

Thanks,
     Uri.


> ---
>   xddm/display/qxldd.h | 30 ++++++++++++++++++++++--------
>   1 file changed, 22 insertions(+), 8 deletions(-)
>
> diff --git a/xddm/display/qxldd.h b/xddm/display/qxldd.h
> index 1a1b5d5..a0b2eab 100644
> --- a/xddm/display/qxldd.h
> +++ b/xddm/display/qxldd.h
> @@ -472,9 +472,19 @@ static _inline void async_io(PDev *pdev, asyncable_t op, UCHAR val)
>       ENG_TIME_FIELDS start, finish;
>       LARGE_INTEGER timeout;                      // 1 => 100 nanoseconds
>       ULONG64 millis;
> +    BOOL is_async = FALSE;
> +    /*
> +     * calling DEBUG_PRINT after locking io_sem can cause deadlock because
> +     * DebugPrintV locks print_sem and then io_sem. Instead, we defer the
> +     * the DEBUG_PRINT calls.
> +     */
> +    BOOL error_timeout = FALSE;
> +    BOOL error_bad_sync = FALSE;
>   
> +    DEBUG_PRINT((pdev, 3, "%s: start io op %d\n", __FUNCTION__, (int)op));
> +    EngAcquireSemaphore(pdev->io_sem);
>       if (pdev->use_async) {
> -        EngAcquireSemaphore(pdev->io_sem);
> +        is_async = TRUE;
>           WRITE_PORT_UCHAR(pdev->asyncable[op][ASYNC], val);
>           /* Our Interrupt may be taken from us unexpectedly, by a surprise removal.
>            * in which case this event will never be set. This happens only during WHQL
> @@ -482,26 +492,30 @@ static _inline void async_io(PDev *pdev, asyncable_t op, UCHAR val)
>            * we get reset. We use EngQueryLocalTime because there is no way to differentiate a return on
>            * timeout from a return on event set otherwise. */
>           timeout.QuadPart = -INTERRUPT_NOT_PRESENT_TIMEOUT_100NS; // negative  => relative
> -        DEBUG_PRINT((pdev, 15, "WAIT_FOR_EVENT %d\n", (int)op));
>           EngQueryLocalTime(&start);
>           WAIT_FOR_EVENT(pdev, pdev->io_cmd_event, &timeout);
>           EngQueryLocalTime(&finish);
>           millis = eng_time_diff_ms(&finish, &start);
>           if (millis >= INTERRUPT_NOT_PRESENT_TIMEOUT_MS) {
> +            error_timeout = TRUE;
>               pdev->use_async = 0;
> -            DEBUG_PRINT((pdev, 0, "%s: timeout reached, disabling async io!\n", __FUNCTION__));
>           }
> -        EngReleaseSemaphore(pdev->io_sem);
> -        DEBUG_PRINT((pdev, 3, "finished async %d\n", (int)op));
>       } else {
> +        is_async = FALSE;
>           if (pdev->asyncable[op][SYNC] == NULL) {
> -            DEBUG_PRINT((pdev, 0, "ERROR: trying calling sync io on NULL port %d\n", op));
> +            error_bad_sync = TRUE;
>           } else {
> -            EngAcquireSemaphore(pdev->io_sem);
>               WRITE_PORT_UCHAR(pdev->asyncable[op][SYNC], val);
> -            EngReleaseSemaphore(pdev->io_sem);
>           }
>       }
> +    EngReleaseSemaphore(pdev->io_sem);
> +    if (error_timeout) {
> +        DEBUG_PRINT((pdev, 0, "%s: timeout reached, disabling async io!\n", __FUNCTION__));
> +    } else if (error_bad_sync) {
> +        DEBUG_PRINT((pdev, 0, "%s: ERROR: trying calling sync io on NULL port %d\n", __FUNCTION__, op));
> +    } else {
> +        DEBUG_PRINT((pdev, 3, "%s: finished op %d async %d\n", __FUNCTION__, (int)op, is_async));
> +    }
>   }
>   
>   /*



More information about the Spice-devel mailing list