Mesa (main): ci: Add Intel GPU frequency utility

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 6 13:35:52 UTC 2022


Module: Mesa
Branch: main
Commit: 5d5af8bffbb76e2e88ad4b8e20d74ff5d591eab7
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d5af8bffbb76e2e88ad4b8e20d74ff5d591eab7

Author: Cristian Ciocaltea <cristian.ciocaltea at collabora.com>
Date:   Tue Mar 29 15:55:38 2022 +0300

ci: Add Intel GPU frequency utility

Add script to manage Intel GPU frequencies.

It can be used for debugging performance problems or to lock a stable
frequency while executing benchmark tests.

Typical use cases:

- Get all available GPU frequency information
$ ./intel-gpu-freq.sh -g all

* Hardware capabilities
   RP0: 1350 MHz
   RPn:  100 MHz
   RP1:  400 MHz

* Enforcements
   max: 1350 MHz
   min:  100 MHz
 boost: 1350 MHz

* Actual
   act:  100 MHz
   cur:  400 MHz

- Lock frequency to 80% of the maximum allowed by hardware and enable
  throttling detection
$ ./intel-gpu-freq.sh -s 80% -d

GPU throttling detected: act=1050 min=1350 cur=1350 RPn=100
GPU throttling detected: act=1100 min=1350 cur=1350 RPn=100

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea at collabora.com>
Acked-by: Tomeu Vizoso <tomeu.vizoso at collabora.com>
Reviewed-by: Guilherme Gallo <guilherme.gallo at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15662>

---

 .gitlab-ci/common/intel-gpu-freq.sh | 567 ++++++++++++++++++++++++++++++++++++
 1 file changed, 567 insertions(+)

diff --git a/.gitlab-ci/common/intel-gpu-freq.sh b/.gitlab-ci/common/intel-gpu-freq.sh
new file mode 100755
index 00000000000..10a72eea7a3
--- /dev/null
+++ b/.gitlab-ci/common/intel-gpu-freq.sh
@@ -0,0 +1,567 @@
+#!/bin/sh
+#
+# The Intel i915 GPU driver allows to change the minimum, maximum and boost
+# frequencies in steps of 50 MHz via /sys/class/drm/card<n>/<freq_info>,
+# where <n> is the DRM card index and <freq_info> one of the following:
+#
+# - gt_max_freq_mhz (enforced maximum freq)
+# - gt_min_freq_mhz (enforced minimum freq)
+# - gt_boost_freq_mhz (enforced boost freq)
+#
+# The hardware capabilities can be accessed via:
+#
+# - gt_RP0_freq_mhz (supported maximum freq)
+# - gt_RPn_freq_mhz (supported minimum freq)
+# - gt_RP1_freq_mhz (most efficient freq)
+#
+# The current frequency can be read from:
+# - gt_act_freq_mhz (the actual GPU freq)
+# - gt_cur_freq_mhz (the last requested freq)
+#
+# Copyright (C) 2022 Collabora Ltd.
+# Author: Cristian Ciocaltea <cristian.ciocaltea at collabora.com>
+#
+# SPDX-License-Identifier: MIT
+#
+
+#
+# Constants
+#
+DRM_FREQ_SYSFS_PATTERN="/sys/class/drm/card%d/gt_%s_freq_mhz"
+ENF_FREQ_INFO="max min boost"
+CAP_FREQ_INFO="RP0 RPn RP1"
+ACT_FREQ_INFO="act cur"
+THROTT_DETECT_SLEEP_SEC=2
+THROTT_DETECT_PID_FILE_PATH=/tmp/thrott-detect.pid
+
+#
+# Global variables.
+#
+unset INTEL_DRM_CARD_INDEX
+unset GET_ACT_FREQ GET_ENF_FREQ GET_CAP_FREQ
+unset SET_MIN_FREQ SET_MAX_FREQ
+unset MONITOR_FREQ
+unset DETECT_THROTT
+unset DRY_RUN
+
+#
+# Simple printf based stderr logger.
+#
+log() {
+    local msg_type=$1
+
+    shift
+    printf "%s: %s: " "${msg_type}" "${0##*/}" >&2
+    printf "$@" >&2
+    printf "\n" >&2
+}
+
+#
+# Helper to print sysfs path for the given card index and freq info.
+#
+# arg1: Frequency info sysfs name, one of *_FREQ_INFO constants above
+# arg2: Video card index, defaults to INTEL_DRM_CARD_INDEX
+#
+print_freq_sysfs_path() {
+    printf ${DRM_FREQ_SYSFS_PATTERN} "${2:-${INTEL_DRM_CARD_INDEX}}" "$1"
+}
+
+#
+# Helper to set INTEL_DRM_CARD_INDEX for the first identified Intel video card.
+#
+identify_intel_gpu() {
+    local i=0 vendor path
+
+    while [ ${i} -lt 16 ]; do
+        [ -c "/dev/dri/card$i" ] || {
+            i=$((i + 1))
+            continue
+        }
+
+        path=$(print_freq_sysfs_path "" ${i})
+        path=${path%/*}/device/vendor
+
+        [ -r "${path}" ] && read vendor < "${path}" && \
+            [ "${vendor}" = "0x8086" ] && INTEL_DRM_CARD_INDEX=$i && return 0
+
+        i=$((i + 1))
+    done
+
+    return 1
+}
+
+#
+# Read the specified freq info from sysfs.
+#
+# arg1: Flag (y/n) to also enable printing the freq info.
+# arg2...: Frequency info sysfs name(s), see *_FREQ_INFO constants above
+# return: Global variable(s) FREQ_${arg} containing the requested information
+#
+read_freq_info() {
+    local var val path print=0 ret=0
+
+    [ "$1" = "y" ] && print=1
+    shift
+
+    while [ $# -gt 0 ]; do
+        var=FREQ_$1
+        path=$(print_freq_sysfs_path "$1")
+
+        [ -r ${path} ] && read ${var} < ${path} || {
+            log ERROR "Failed to read freq info from: %s" "${path}"
+            ret=1
+            continue
+        }
+
+        [ -n "${var}" ] || {
+            log ERROR "Got empty freq info from: %s" "${path}"
+            ret=1
+            continue
+        }
+
+        [ ${print} -eq 1 ] && {
+            eval val=\$${var}
+            printf "%6s: %4s MHz\n" "$1" "${val}"
+        }
+
+        shift
+    done
+
+    return ${ret}
+}
+
+#
+# Display requested info.
+#
+print_freq_info() {
+    local req_freq
+
+    [ -n "${GET_CAP_FREQ}" ] && {
+        printf "* Hardware capabilities\n"
+        read_freq_info y ${CAP_FREQ_INFO}
+        printf "\n"
+    }
+
+    [ -n "${GET_ENF_FREQ}" ] && {
+        printf "* Enforcements\n"
+        read_freq_info y ${ENF_FREQ_INFO}
+        printf "\n"
+    }
+
+    [ -n "${GET_ACT_FREQ}" ] && {
+        printf "* Actual\n"
+        read_freq_info y ${ACT_FREQ_INFO}
+        printf "\n"
+    }
+}
+
+#
+# Helper to print frequency value as requested by user via '-s, --set' option.
+# arg1: user requested freq value
+#
+compute_freq_set() {
+    local val
+
+    case "$1" in
+    +)
+        val=${FREQ_RP0}
+        ;;
+    -)
+        val=${FREQ_RPn}
+        ;;
+    *%)
+        val=$((${1%?} * ${FREQ_RP0} / 100))
+        # Adjust freq to comply with 50 MHz increments
+        val=$((val / 50 * 50))
+        ;;
+    *[!0-9]*)
+        log ERROR "Cannot set freq to invalid value: %s" "$1"
+        return 1
+        ;;
+    "")
+        log ERROR "Cannot set freq to unspecified value"
+        return 1
+        ;;
+    *)
+        # Adjust freq to comply with 50 MHz increments
+        val=$(($1 / 50 * 50))
+        ;;
+    esac
+
+    printf "%s" "${val}"
+}
+
+#
+# Helper for set_freq().
+#
+set_freq_max() {
+    log INFO "Setting GPU max freq to %s MHz" "${SET_MAX_FREQ}"
+
+    read_freq_info n min || return $?
+
+    [ ${SET_MAX_FREQ} -gt ${FREQ_RP0} ] && {
+        log ERROR "Cannot set GPU max freq (%s) to be greater than hw max freq (%s)" \
+            "${SET_MAX_FREQ}" "${FREQ_RP0}"
+        return 1
+    }
+
+    [ ${SET_MAX_FREQ} -lt ${FREQ_RPn} ] && {
+        log ERROR "Cannot set GPU max freq (%s) to be less than hw min freq (%s)" \
+            "${SET_MIN_FREQ}" "${FREQ_RPn}"
+        return 1
+    }
+
+    [ ${SET_MAX_FREQ} -lt ${FREQ_min} ] && {
+        log ERROR "Cannot set GPU max freq (%s) to be less than min freq (%s)" \
+            "${SET_MAX_FREQ}" "${FREQ_min}"
+        return 1
+    }
+
+    [ -z "${DRY_RUN}" ] || return 0
+
+    printf "%s" ${SET_MAX_FREQ} | tee $(print_freq_sysfs_path max) \
+        $(print_freq_sysfs_path boost) > /dev/null
+    [ $? -eq 0 ] || {
+        log ERROR "Failed to set GPU max frequency"
+        return 1
+    }
+}
+
+#
+# Helper for set_freq().
+#
+set_freq_min() {
+    log INFO "Setting GPU min freq to %s MHz" "${SET_MIN_FREQ}"
+
+    read_freq_info n max || return $?
+
+    [ ${SET_MIN_FREQ} -gt ${FREQ_max} ] && {
+        log ERROR "Cannot set GPU min freq (%s) to be greater than max freq (%s)" \
+            "${SET_MIN_FREQ}" "${FREQ_max}"
+        return 1
+    }
+
+    [ ${SET_MIN_FREQ} -lt ${FREQ_RPn} ] && {
+        log ERROR "Cannot set GPU min freq (%s) to be less than hw min freq (%s)" \
+            "${SET_MIN_FREQ}" "${FREQ_RPn}"
+        return 1
+    }
+
+    [ -z "${DRY_RUN}" ] || return 0
+
+    printf "%s" ${SET_MIN_FREQ} > $(print_freq_sysfs_path min)
+    [ $? -eq 0 ] || {
+        log ERROR "Failed to set GPU min frequency"
+        return 1
+    }
+}
+
+#
+# Set min or max or both GPU frequencies to the user indicated values.
+#
+set_freq() {
+    # Get hw max & min frequencies
+    read_freq_info n RP0 RPn || return $?
+
+    [ -z "${SET_MAX_FREQ}" ] || {
+        SET_MAX_FREQ=$(compute_freq_set "${SET_MAX_FREQ}")
+        [ -z "${SET_MAX_FREQ}" ] && return 1
+    }
+
+    [ -z "${SET_MIN_FREQ}" ] || {
+        SET_MIN_FREQ=$(compute_freq_set "${SET_MIN_FREQ}")
+        [ -z "${SET_MIN_FREQ}" ] && return 1
+    }
+
+    #
+    # Ensure correct operation order, to avoid setting min freq
+    # to a value which is larger than max freq.
+    #
+    # E.g.:
+    #   crt_min=crt_max=600; new_min=new_max=700
+    #   > operation order: max=700; min=700
+    #
+    #   crt_min=crt_max=600; new_min=new_max=500
+    #   > operation order: min=500; max=500
+    #
+    if [ -n "${SET_MAX_FREQ}" ] && [ -n "${SET_MIN_FREQ}" ]; then
+        [ ${SET_MAX_FREQ} -lt ${SET_MIN_FREQ} ] && {
+            log ERROR "Cannot set GPU max freq to be less than min freq"
+            return 1
+        }
+
+        read_freq_info n min || return $?
+
+        if [ ${SET_MAX_FREQ} -lt ${FREQ_min} ]; then
+            set_freq_min || return $?
+            set_freq_max
+        else
+            set_freq_max || return $?
+            set_freq_min
+        fi
+    elif [ -n "${SET_MAX_FREQ}" ]; then
+        set_freq_max
+    elif [ -n "${SET_MIN_FREQ}" ]; then
+        set_freq_min
+    else
+        log "Unexpected call to set_freq()"
+        return 1
+    fi
+}
+
+#
+# Helper for detect_throttling().
+#
+get_thrott_detect_pid() {
+    [ -e ${THROTT_DETECT_PID_FILE_PATH} ] || return 0
+
+    local pid
+    read pid < ${THROTT_DETECT_PID_FILE_PATH} || {
+        log ERROR "Failed to read pid from: %s" "${THROTT_DETECT_PID_FILE_PATH}"
+        return 1
+    }
+
+    local proc_path=/proc/${pid:-invalid}/cmdline
+    [ -r ${proc_path} ] && grep -qs "${0##*/}" ${proc_path} && {
+        printf "%s" "${pid}"
+        return 0
+    }
+
+    # Remove orphaned PID file
+    rm -rf ${THROTT_DETECT_PID_FILE_PATH}
+    return 1
+}
+
+#
+# Control detection and reporting of GPU throttling events.
+# arg1: start - run throttle detector in background
+#       stop - stop throttle detector process, if any
+#       status - verify if throttle detector is running
+#
+detect_throttling() {
+    local pid
+    pid=$(get_thrott_detect_pid)
+
+    case "$1" in
+    status)
+        printf "Throttling detector is "
+        [ -z "${pid}" ] && printf "not running\n" && return 0
+        printf "running (pid=%s)\n" ${pid}
+        ;;
+
+    stop)
+        [ -z "${pid}" ] && return 0
+
+        log INFO "Stopping throttling detector (pid=%s)" "${pid}"
+        kill ${pid}; sleep 1; kill -0 ${pid} 2>/dev/null && kill -9 ${pid}
+        rm -rf ${THROTT_DETECT_PID_FILE_PATH}
+        ;;
+
+    start)
+        [ -n "${pid}" ] && {
+            log WARN "Throttling detector is already running (pid=%s)" ${pid}
+            return 0
+        }
+
+        (
+            read_freq_info n RPn || exit $?
+
+            while true; do
+                sleep ${THROTT_DETECT_SLEEP_SEC}
+                read_freq_info n act min cur || exit $?
+
+                #
+                # The throttling seems to occur when act freq goes below min.
+                # However, it's necessary to exclude the idle states, where
+                # act freq normally reaches RPn and cur goes below min.
+                #
+                [ ${FREQ_act} -lt ${FREQ_min} ] && \
+                [ ${FREQ_act} -gt ${FREQ_RPn} ] && \
+                [ ${FREQ_cur} -ge ${FREQ_min} ] && \
+                    printf "GPU throttling detected: act=%s min=%s cur=%s RPn=%s\n" \
+                    ${FREQ_act} ${FREQ_min} ${FREQ_cur} ${FREQ_RPn}
+            done
+        ) &
+
+        pid=$!
+        log INFO "Started GPU throttling detector (pid=%s)" ${pid}
+
+        printf "%s\n" ${pid} > ${THROTT_DETECT_PID_FILE_PATH} || \
+            log WARN "Failed to write throttle detector PID file"
+        ;;
+    esac
+}
+
+#
+# Show help message.
+#
+print_usage() {
+    cat <<EOF
+Usage: ${0##*/} [OPTION]...
+
+A script to manage Intel GPU frequencies. Can be used for debugging performance
+problems or trying to obtain a stable frequency while benchmarking.
+
+Note Intel GPUs only accept specific frequencies, usually multiples of 50 MHz.
+
+Options:
+  -g, --get [act|enf|cap|all]
+                        Get frequency information: active (default), enforced,
+                        hardware capabilities or all of them.
+
+  -s, --set [{min|max}=]{FREQUENCY[%]|+|-}
+                        Set min or max frequency to the given value (MHz).
+                        Append '%' to interpret FREQUENCY as % of hw max.
+                        Use '+' or '-' to set frequency to hardware max or min.
+                        Omit min/max prefix to set both frequencies.
+
+  -r, --reset           Reset frequencies to hardware defaults.
+
+  -m, --monitor [act|enf|cap|all]
+                        Monitor the indicated frequencies via 'watch' utility.
+                        See '-g, --get' option for more details.
+
+  -d|--detect-thrott [start|stop|status]
+                        Start (default operation) the throttling detector
+                        as a background process. Use 'stop' or 'status' to
+                        terminate the detector process or verify its status.
+
+  --dry-run             See what the script will do without applying any
+                        frequency changes.
+
+  -h, --help            Display this help text and exit.
+EOF
+}
+
+#
+# Parse user input for '-g, --get' option.
+# Returns 0 if a value has been provided, otherwise 1.
+#
+parse_option_get() {
+    local ret=0
+
+    case "$1" in
+    act) GET_ACT_FREQ=1;;
+    enf) GET_ENF_FREQ=1;;
+    cap) GET_CAP_FREQ=1;;
+    all) GET_ACT_FREQ=1; GET_ENF_FREQ=1; GET_CAP_FREQ=1;;
+    -*|"")
+        # No value provided, using default.
+        GET_ACT_FREQ=1
+        ret=1
+        ;;
+    *)
+        print_usage
+        exit 1
+        ;;
+    esac
+
+    return ${ret}
+}
+
+#
+# Validate user input for '-s, --set' option.
+#
+validate_option_set() {
+    case "$1" in
+    +|-|[0-9]%|[0-9][0-9]%)
+        return 0
+        ;;
+    *[!0-9]*|"")
+        print_usage
+        exit 1
+        ;;
+    esac
+}
+
+#
+# Parse script arguments.
+#
+[ $# -eq 0 ] && { print_usage; exit 1; }
+
+while [ $# -gt 0 ]; do
+    case "$1" in
+    -g|--get)
+        parse_option_get "$2" && shift
+        ;;
+
+    -s|--set)
+        shift
+        case "$1" in
+        min=*)
+            SET_MIN_FREQ=${1#min=}
+            validate_option_set "${SET_MIN_FREQ}"
+            ;;
+        max=*)
+            SET_MAX_FREQ=${1#max=}
+            validate_option_set "${SET_MAX_FREQ}"
+            ;;
+        *)
+            SET_MIN_FREQ=$1
+            validate_option_set "${SET_MIN_FREQ}"
+            SET_MAX_FREQ=${SET_MIN_FREQ}
+            ;;
+        esac
+        ;;
+
+    -r|--reset)
+        RESET_FREQ=1
+        SET_MIN_FREQ="-"
+        SET_MAX_FREQ="+"
+        ;;
+
+    -m|--monitor)
+        MONITOR_FREQ=act
+        parse_option_get "$2" && MONITOR_FREQ=$2 && shift
+        ;;
+
+    -d|--detect-thrott)
+        DETECT_THROTT=start
+        case "$2" in
+        start|stop|status)
+            DETECT_THROTT=$2
+            shift
+            ;;
+        esac
+        ;;
+
+    --dry-run)
+        DRY_RUN=1
+        ;;
+
+    -h|--help)
+        print_usage
+        exit 0
+        ;;
+
+    *)
+        print_usage
+        exit 1
+        ;;
+    esac
+
+    shift
+done
+
+#
+# Main
+#
+RET=0
+
+identify_intel_gpu || {
+    log INFO "No Intel GPU detected"
+    exit 0
+}
+
+[ -n "${SET_MIN_FREQ}${SET_MAX_FREQ}" ] && { set_freq || RET=$?; }
+print_freq_info
+
+[ -n "${DETECT_THROTT}" ] && detect_throttling ${DETECT_THROTT}
+
+[ -n "${MONITOR_FREQ}" ] && {
+    log INFO "Entering frequency monitoring mode"
+    sleep 2
+    exec watch -d -n 1 "$0" -g "${MONITOR_FREQ}"
+}
+
+exit ${RET}



More information about the mesa-commit mailing list