Mesa (main): virgl/ci: Setup virtio-vsock based IPC

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Feb 17 07:43:50 UTC 2022


Module: Mesa
Branch: main
Commit: 9ef8af357ddabef47e25414a1ab96e46273a01d1
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9ef8af357ddabef47e25414a1ab96e46273a01d1

Author: Cristian Ciocaltea <cristian.ciocaltea at collabora.com>
Date:   Tue Feb  8 22:48:39 2022 +0200

virgl/ci: Setup virtio-vsock based IPC

The mechanism currently used to pass data from the dEQP child process
executed in a crosvm guest environment towards the deqp-runner wrapper
script that starts the crosvm instance is based on creating, writing
and reading regular files.

In addition to the main drawback of using the storage, this approach
is potentially unreliable because the data cannot be transferred in
real-time and there is no control on ending the transmission. It also
requires a forced sleep for syncing the content, while the minimum
amount of time necessary to wait cannot be easily and safely
determined.

Replace this with an IPC based on the virtio transport for virtual
sockets (virtio-vsock).

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea at collabora.com>
Reviewed-by: Daniel Stone <daniels at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14995>

---

 .gitlab-ci/crosvm-init.sh   |  34 ++++++-----
 .gitlab-ci/crosvm-runner.sh | 133 +++++++++++++++++++++++++++++++-------------
 2 files changed, 113 insertions(+), 54 deletions(-)

diff --git a/.gitlab-ci/crosvm-init.sh b/.gitlab-ci/crosvm-init.sh
index c6d9095b2f5..15e68f835f7 100755
--- a/.gitlab-ci/crosvm-init.sh
+++ b/.gitlab-ci/crosvm-init.sh
@@ -2,7 +2,9 @@
 
 set -e
 
-export DEQP_TEMP_DIR="$1"
+VSOCK_STDOUT=$1
+VSOCK_STDERR=$2
+VSOCK_TEMP_DIR=$3
 
 mount -t proc none /proc
 mount -t sysfs none /sys
@@ -10,29 +12,31 @@ mkdir -p /dev/pts
 mount -t devpts devpts /dev/pts
 mount -t tmpfs tmpfs /tmp
 
-. $DEQP_TEMP_DIR/crosvm-env.sh
+. ${VSOCK_TEMP_DIR}/crosvm-env.sh
 
 # .gitlab-ci.yml script variable is using relative paths to install directory,
 # so change to that dir before running `crosvm-script`
 cd "${CI_PROJECT_DIR}"
 
-# The exception is the dEQP binary, since it needs to run from the directory
-# it's in
-if [ -d "${DEQP_BIN_DIR}" ]
-then
-    cd "${DEQP_BIN_DIR}"
-fi
+# The exception is the dEQP binary, as it needs to run from its own directory
+[ -z "${DEQP_BIN_DIR}" ] || cd "${DEQP_BIN_DIR}"
 
-dmesg --level crit,err,warn -w >> $DEQP_TEMP_DIR/stderr &
+# Use a FIFO to collect relevant error messages
+STDERR_FIFO=/tmp/crosvm-stderr.fifo
+mkfifo -m 600 ${STDERR_FIFO}
 
-set +e
-stdbuf -oL sh $DEQP_TEMP_DIR/crosvm-script.sh 2>> $DEQP_TEMP_DIR/stderr >> $DEQP_TEMP_DIR/stdout
-echo $? > $DEQP_TEMP_DIR/exit_code
-set -e
+dmesg --level crit,err,warn -w > ${STDERR_FIFO} &
+DMESG_PID=$!
 
-sync
-sleep 1
+# Transfer the errors and crosvm-script output via a pair of virtio-vsocks
+socat -d -u pipe:${STDERR_FIFO} vsock-listen:${VSOCK_STDERR} &
+socat -d -U vsock-listen:${VSOCK_STDOUT} \
+    system:"stdbuf -eL sh ${VSOCK_TEMP_DIR}/crosvm-script.sh 2> ${STDERR_FIFO}; echo \$? > ${VSOCK_TEMP_DIR}/exit_code",nofork
 
+kill ${DMESG_PID}
+wait
+
+sync
 poweroff -d -n -f || true
 
 sleep 1   # Just in case init would exit before the kernel shuts down the VM
diff --git a/.gitlab-ci/crosvm-runner.sh b/.gitlab-ci/crosvm-runner.sh
index 4e0f64ae6b0..c3b56eb6543 100755
--- a/.gitlab-ci/crosvm-runner.sh
+++ b/.gitlab-ci/crosvm-runner.sh
@@ -1,58 +1,113 @@
 #!/bin/sh
 
-set -ex
+set -e
 
-# This script can be called concurrently, pass arguments and env in a
-# per-instance tmp dir
-DEQP_TEMP_DIR=$(mktemp -d /tmp.XXXXXXXXXX)
-export DEQP_TEMP_DIR
+#
+# Helper to generate CIDs for virtio-vsock based communication with processes
+# running inside crosvm guests.
+#
+# A CID is a 32-bit Context Identifier to be assigned to a crosvm instance
+# and must be unique across the host system. For this purpose, let's take
+# the least significant 26 bits from CI_JOB_ID as a base and generate a 6-bit
+# prefix number to handle up to 64 concurrent crosvm instances per job runner.
+#
+# As a result, the following variables are set:
+#  - VSOCK_CID: the crosvm unique CID to be passed as a run argument
+#
+#  - VSOCK_STDOUT, VSOCK_STDERR: the port numbers the guest should accept
+#    vsock connections on in order to transfer output messages
+#
+#  - VSOCK_TEMP_DIR: the temporary directory path used to pass additional
+#    context data towards the guest
+#
+set_vsock_context() {
+    [ -n "${CI_JOB_ID}" ] || {
+        echo "Missing or unset CI_JOB_ID env variable" >&2
+        exit 1
+    }
+
+    local dir_prefix="/tmp-vsock."
+    local cid_prefix=0
+    unset VSOCK_TEMP_DIR
+
+    while [ ${cid_prefix} -lt 64 ]; do
+        VSOCK_TEMP_DIR=${dir_prefix}${cid_prefix}
+        mkdir "${VSOCK_TEMP_DIR}" >/dev/null 2>&1 && break || unset VSOCK_TEMP_DIR
+        cid_prefix=$((cid_prefix + 1))
+    done
+
+    [ -n "${VSOCK_TEMP_DIR}" ] || return 1
+
+    VSOCK_CID=$(((CI_JOB_ID & 0x3ffffff) | ((cid_prefix & 0x3f) << 26)))
+    VSOCK_STDOUT=5001
+    VSOCK_STDERR=5002
+    return 0
+}
 
 # The dEQP binary needs to run from the directory it's in
 if [ -n "${1##*.sh}" ] && [ -z "${1##*"deqp"*}" ]; then
-  DEQP_BIN_DIR=$(dirname "$1")
-  export DEQP_BIN_DIR
+    DEQP_BIN_DIR=$(dirname "$1")
+    export DEQP_BIN_DIR
 fi
 
+set_vsock_context || { echo "Could not generate crosvm vsock CID" >&2; exit 1; }
+
+# Ensure cleanup on script exit
+trap 'exit ${exit_code}' INT TERM
+trap 'exit_code=$?; [ -z "${SOCAT_PIDS}" ] || kill ${SOCAT_PIDS} >/dev/null 2>&1 || true; rm -rf ${VSOCK_TEMP_DIR}' EXIT
+
 # Securely pass the current variables to the crosvm environment
-CI_COMMON="$CI_PROJECT_DIR"/install/common
+CI_COMMON="${CI_PROJECT_DIR}"/install/common
 echo "Variables passed through:"
-"${CI_COMMON}"/generate-env.sh | tee ${DEQP_TEMP_DIR}/crosvm-env.sh
+"${CI_COMMON}"/generate-env.sh | tee ${VSOCK_TEMP_DIR}/crosvm-env.sh
 
-CROSVM_KERNEL_ARGS="quiet console=null root=my_root rw rootfstype=virtiofs init=$CI_PROJECT_DIR/install/crosvm-init.sh ip=192.168.30.2::192.168.30.1:255.255.255.0:crosvm:eth0 -- $DEQP_TEMP_DIR"
+# Set the crosvm-script as the arguments of the current script
+echo "$@" > ${VSOCK_TEMP_DIR}/crosvm-script.sh
 
-# Set the crosvm-script as the arguments of the current script.
-echo "$@" > $DEQP_TEMP_DIR/crosvm-script.sh
-
-unset DISPLAY
-unset XDG_RUNTIME_DIR
+# Start background processes to receive output from guest
+socat -u vsock-connect:${VSOCK_CID}:${VSOCK_STDERR},retry=200,interval=0.1 stderr &
+SOCAT_PIDS=$!
+socat -u vsock-connect:${VSOCK_CID}:${VSOCK_STDOUT},retry=200,interval=0.1 stdout &
+SOCAT_PIDS="${SOCAT_PIDS} $!"
 
+# Setup networking
 /usr/sbin/iptables-legacy -w -t nat -A POSTROUTING -o eth0 -j MASQUERADE
 echo 1 > /proc/sys/net/ipv4/ip_forward
 
-# Send output from guest to host
-touch $DEQP_TEMP_DIR/stderr $DEQP_TEMP_DIR/stdout
-tail -f $DEQP_TEMP_DIR/stderr >> /dev/stderr &
-ERR_TAIL_PID=$!
-tail -f $DEQP_TEMP_DIR/stdout >> /dev/stdout &
-OUT_TAIL_PID=$!
+# Prepare to start crosvm
+unset DISPLAY
+unset XDG_RUNTIME_DIR
+
+CROSVM_KERN_ARGS="quiet console=null root=my_root rw rootfstype=virtiofs ip=192.168.30.2::192.168.30.1:255.255.255.0:crosvm:eth0"
+CROSVM_KERN_ARGS="${CROSVM_KERN_ARGS} init=${CI_PROJECT_DIR}/install/crosvm-init.sh -- ${VSOCK_STDOUT} ${VSOCK_STDERR} ${VSOCK_TEMP_DIR}"
 
-trap "exit \$exit_code" INT TERM
-trap "exit_code=\$?; kill $ERR_TAIL_PID $OUT_TAIL_PID; rm -rf $DEQP_TEMP_DIR" EXIT
+set +e -x
 
 # We aren't testing LLVMPipe here, so we don't need to validate NIR on the host
-NIR_DEBUG="novalidate" LIBGL_ALWAYS_SOFTWARE="true" GALLIUM_DRIVER="$CROSVM_GALLIUM_DRIVER" crosvm run \
-  --gpu "$CROSVM_GPU_ARGS" \
-  -m 4096 \
-  -c 2 \
-  --disable-sandbox \
-  --shared-dir /:my_root:type=fs:writeback=true:timeout=60:cache=always \
-  --host_ip=192.168.30.1 --netmask=255.255.255.0 --mac "AA:BB:CC:00:00:12" \
-  -p "$CROSVM_KERNEL_ARGS" \
-  /lava-files/bzImage > $DEQP_TEMP_DIR/crosvm 2>&1
-
-RET=$(cat $DEQP_TEMP_DIR/exit_code || true)
-
-# Got no exit code from the script, show crosvm output to help with debugging
-[ -n "$RET" ] || cat $DEQP_TEMP_DIR/crosvm || true
-
-exit ${RET:-1}
+NIR_DEBUG="novalidate" LIBGL_ALWAYS_SOFTWARE="true" GALLIUM_DRIVER=${CROSVM_GALLIUM_DRIVER} \
+crosvm run \
+    --gpu "${CROSVM_GPU_ARGS}" -m 4096 -c 2 --disable-sandbox \
+    --shared-dir /:my_root:type=fs:writeback=true:timeout=60:cache=always \
+    --host_ip "192.168.30.1" --netmask "255.255.255.0" --mac "AA:BB:CC:00:00:12" \
+    --cid ${VSOCK_CID} -p "${CROSVM_KERN_ARGS}" \
+    /lava-files/bzImage > ${VSOCK_TEMP_DIR}/crosvm 2>&1
+
+CROSVM_RET=$?
+[ ${CROSVM_RET} -eq 0 ] && {
+    # socat bg processes should terminate as soon as the remote peers exit
+    wait
+    # The actual return code is the crosvm guest script's exit code
+    CROSVM_RET=$(cat ${VSOCK_TEMP_DIR}/exit_code 2>/dev/null)
+    # Force error when the guest script's exit code is not available
+    CROSVM_RET=${CROSVM_RET:-1}
+}
+
+# Show crosvm output on error to help with debugging
+[ ${CROSVM_RET} -eq 0 ] || {
+    set +x
+    echo "Dumping crosvm output.." >&2
+    cat ${VSOCK_TEMP_DIR}/crosvm >&2
+    set -x
+}
+
+exit ${CROSVM_RET}



More information about the mesa-commit mailing list