[PATCH v4] drm/edid: add CTA Video Format Data Block support

Tue Nov 5 22:48:50 UTC 2024

On 9/10/24 08:05, Jani Nikula wrote:
> On Mon, 09 Sep 2024, Hamza Mahfooz <hamza.mahfooz at amd.com> wrote:
>> Video Format Data Blocks (VFDBs) contain the necessary information that
>> needs to be fed to the Optimized Video Timings (OVT) Algorithm.
>> Also, we require OVT support to cover modes that aren't supported by
>> earlier standards (e.g. CVT). So, parse all of the relevant VFDB data
>> and feed it to the OVT Algorithm, to extract all of the missing OVT
>> modes.
>>
>> Suggested-by: Karol Herbst <kherbst at redhat.com>
>> Signed-off-by: Hamza Mahfooz <hamza.mahfooz at amd.com>
>> ---
>> v3: move ovt stuff above add_cea_modes() and break up
>>      calculate_ovt_mode() to make it easier to read.
>>
>> v4: fix 32 bit build and constify read-only vars.
> 
> Please find some review inline. I'm not finished, ran out of time/steam
> a bit, and there's something to work on.
> 
> BR,
> Jani.
> 
> 
>> ---
>>   drivers/gpu/drm/drm_edid.c | 452 +++++++++++++++++++++++++++++++++++++
>>   1 file changed, 452 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
>> index 855beafb76ff..01de2117aaf2 100644
>> --- a/drivers/gpu/drm/drm_edid.c
>> +++ b/drivers/gpu/drm/drm_edid.c
>> @@ -31,6 +31,7 @@
>>   #include <linux/bitfield.h>
>>   #include <linux/byteorder/generic.h>
>>   #include <linux/cec.h>
>> +#include <linux/gcd.h>
>>   #include <linux/hdmi.h>
>>   #include <linux/i2c.h>
>>   #include <linux/kernel.h>
>> @@ -741,6 +742,93 @@ static const struct minimode extra_modes[] = {
>>   	{ 2048, 1536, 60, 0 },
>>   };
>>   
>> +struct cta_rid {
>> +	u16 hactive;
>> +	u16 vactive;
>> +	u8 hratio;
>> +	u8 vratio;
>> +};
>> +
>> +/* CTA-861-I Table 11 - Resolution Identification (RID) */
>> +static const struct cta_rid rids[] = {
>> +	/* RID 0-9 */
>> +	{ 0, 0, 0, 0 },
>> +	{ 1280, 720, 16, 9 },
>> +	{ 1280, 720, 64, 27 },
>> +	{ 1680, 720, 64, 27 },
>> +	{ 1920, 1080, 16, 9 },
>> +	{ 1920, 1080, 64, 27 },
>> +	{ 2560, 1080, 64, 27 },
>> +	{ 3840, 1080, 32, 9 },
>> +	{ 2560, 1440, 16, 9 },
>> +	{ 3440, 1440, 64, 27 },
>> +	/* RID 10-19 */
>> +	{ 5120, 1440, 32, 9 },
>> +	{ 3840, 2160, 16, 9 },
>> +	{ 3840, 2160, 64, 27 },
>> +	{ 5120, 2160, 64, 27 },
>> +	{ 7680, 2160, 32, 9 },
>> +	{ 5120, 2880, 16, 9 },
>> +	{ 5120, 2880, 64, 27 },
>> +	{ 6880, 2880, 64, 27 },
>> +	{ 10240, 2880, 32, 9 },
>> +	{ 7680, 4320, 16, 9 },
>> +	/* RID 20-28 */
>> +	{ 7680, 4320, 64, 27 },
>> +	{ 10240, 4320, 64, 27 },
>> +	{ 15360, 4320, 32, 9 },
>> +	{ 11520, 6480, 16, 9 },
>> +	{ 11520, 6480, 64, 27 },
>> +	{ 15360, 6480, 64, 27 },
>> +	{ 15360, 8640, 16, 9 },
>> +	{ 15360, 8640, 64, 27 },
>> +	{ 20480, 8640, 64, 27 },
> 
> Designated initializers would remove the need for the /* RID 0-9 */
> comments:
> 
> 	[1] = { ... },
> 
>> +};
>> +
>> +/* CTA-861-I Table 12 - AVI InfoFrame Video Format Frame Rate */
>> +static const u16 cta_vf_fr[] = {
> 
> We don't have to make the names as cryptic as possible. This is only
> needed in a few places, so could be video_format_frame_rates or
> something. With a comment that it's indexed with rate index.
> 
>> +	/* Frame Rate 0-7 */
>> +	0, 24, 25, 30, 48, 50, 60, 100,
>> +	/* Frame Rate 8-15 */
>> +	120, 144, 200, 240, 300, 360, 400, 480,
> 
> What if the preference is for 24/1.001 instead of 24?

According to section 4 the spec:

"""
A Video Timing with a vertical frequency that is an integer multiple of
6.00 Hz (e.g., 24.00 or 120.00 Hz) is considered to be the same as a
Video Timing with the equivalent detailed timing information but where
the vertical frequency is adjusted by a factor of 1000/1001 (e.g.,
24/1.001 or 120/1.001). That is, they are considered two versions of
the same Video Timing but with slightly different pixel clock
frequencies.
"""

The different pixel is calculated as part of step 9 of the OVT
algorthim, though I'm not sure it makes sense to include it here since
it would require an FPU.

> 
>> +};
>> +
>> +/* CTA-861-I Table 13 - RID To VIC Mapping */
>> +static const u8 rid_to_vic[][8] = {
>> +	/* RID 0-9 */
>> +	{},
>> +	{ 60, 61, 62, 108, 19, 4, 41, 47 },
>> +	{ 65, 66, 67, 109, 68, 69, 70, 71 },
>> +	{ 79, 80, 81, 110, 82, 83, 84, 85 },
>> +	{ 32, 33, 34, 111, 31, 16, 64, 63 },
>> +	{ 72, 73, 74, 112, 75, 76, 77, 78 },
>> +	{ 86, 87, 88, 113, 89, 90, 91, 92 },
>> +	{},
>> +	{},
>> +	{},
>> +	/* RID 10-19 */
>> +	{},
>> +	{ 93, 94, 95, 114, 96, 97, 117, 118 },
>> +	{ 103, 104, 105, 116, 106, 107, 119, 120 },
>> +	{ 121, 122, 123, 124, 125, 126, 127, 193 },
>> +	{},
>> +	{},
>> +	{},
>> +	{},
>> +	{},
>> +	{ 194, 195, 196, 197, 198, 199, 200, 201 },
>> +	/* RID 20-28 */
>> +	{ 202, 203, 204, 205, 206, 207, 208, 209 },
>> +	{ 210, 211, 212, 213, 214, 215, 216, 217 },
>> +	{},
>> +	{},
>> +	{},
>> +	{},
>> +	{},
>> +	{},
>> +	{},
> 
> Sparse stuff like this would work better with designated initializers?
> 
> 	[1] = { ... },
> 
> Also removes the need for the /* RID 0-9 */ comments.
> 
>> +};
>> +
>>   /*
>>    * From CEA/CTA-861 spec.
>>    *
>> @@ -4131,6 +4219,7 @@ static int add_detailed_modes(struct drm_connector *connector,
>>   #define CTA_DB_VIDEO			2
>>   #define CTA_DB_VENDOR			3
>>   #define CTA_DB_SPEAKER			4
>> +#define CTA_DB_VIDEO_FORMAT		6
>>   #define CTA_DB_EXTENDED_TAG		7
>>   
>>   /* CTA-861-H Table 62 - CTA Extended Tag Codes */
>> @@ -4972,6 +5061,16 @@ struct cea_db {
>>   	u8 data[];
>>   } __packed;
>>   
>> +struct cta_vfd {
>> +	u8 rid;
>> +	u8 fr_fact;
>> +	bool bfr50;
>> +	bool fr24;
>> +	bool bfr60;
>> +	bool fr144;
>> +	bool fr48;
>> +};
>> +
>>   static int cea_db_tag(const struct cea_db *db)
>>   {
>>   	return db->tag_length >> 5;
>> @@ -5250,6 +5349,357 @@ static int edid_hfeeodb_extension_block_count(const struct edid *edid)
>>   	return cta[4 + 2];
>>   }
>>   
>> +/* CTA-861 Video Format Descriptor (CTA VFD) */
>> +static void parse_cta_vfd(struct cta_vfd *vfd, const u8 *data, int vfd_len)
>> +{
>> +	vfd->rid = data[0] & 0x3f;
>> +	vfd->bfr50 = data[0] & 0x80;
>> +	vfd->fr24 = data[0] & 0x40;
>> +	vfd->bfr60 = vfd_len > 1 ? (data[1] & 0x80) : 0x1;
> 
> It's a bool member, so true instead of 0x1.
> 
>> +	vfd->fr144 = vfd_len > 1 ? (data[1] & 0x40) : 0x0;
> 
> false instead of 0x0
> 
>> +	vfd->fr_fact = vfd_len > 1 ? (data[1] & 0x3f) : 0x3;
>> +	vfd->fr48 = vfd_len > 2 ? (data[2] & 0x1) : 0x0;
> 
> false instead of 0x0
> 
>> +}
>> +
>> +static bool vfd_has_fr(const struct cta_vfd *vfd, int rate_idx)
>> +{
>> +	static const u8 factors[] = {
>> +		1, 2, 4, 8, 12, 16
>> +	};
>> +	u16 rate = cta_vf_fr[rate_idx];
> 
> Would make more sense to have int rate parameter instead of rate_idx, so
> you don't have to access cta_vf_fr here.
> 
> Having it int makes sense because it's used in calculation.
> 
>> +	u16 factor = 0;
>> +	unsigned int i;
> 
> Just use int, for both.
> 
>> +
>> +	switch (rate) {
>> +	case 24:
>> +		return vfd->fr24;
>> +	case 48:
>> +		return vfd->fr48;
>> +	case 144:
>> +		return vfd->fr144;
>> +	}
>> +
>> +	if (!(rate % 25)) {
>> +		if (!vfd->bfr50)
>> +			return false;
>> +
>> +		factor = rate / 25;
>> +	} else if (!(rate % 30)) {
>> +		if (!vfd->bfr60)
>> +			return false;
>> +
>> +		factor = rate / 30;
>> +	}
>> +
>> +	for (i = 0; i < ARRAY_SIZE(factors); i++)
>> +		if (factor == factors[i] && (vfd->fr_fact & (1 << i)))
>> +			return true;
>> +
>> +	return false;
>> +}
>> +
>> +#define OVT_PIXEL_CLOCK_GRANULARITY	1000		/* Hz */
>> +#define OVT_MIN_HTOTAL_GRANULARITY	8		/* pixels */
>> +#define OVT_MIN_VBLANK_DURATION	460000000	/* ps */
>> +#define OVT_MIN_VBLANK_LINES		20
>> +#define OVT_MIN_VSYNC_LEADING_EDGE	400		/* us */
>> +#define OVT_MIN_VSYNC_LE_LINES		14
>> +#define OVT_MIN_CLOCK_RATE_420		590000000	/* Hz */
>> +#define OVT_PIXEL_FACTOR_420		2
>> +#define OVT_MIN_HBLANK_444		80		/* pixels */
>> +#define OVT_MIN_HBLANK_420		128		/* pixels */
>> +#define OVT_MAX_CHUNK_RATE		650000000	/* Hz */
>> +#define OVT_AUDIO_PACKET_RATE		195000		/* Hz */
>> +#define OVT_AUDIO_PACKET_SIZE		32
>> +#define OVT_LINE_OVERHEAD		32
>> +#define OVT_HSYNC_WIDTH		32
>> +#define OVT_VSYNC_WIDTH		8
>> +
>> +static u32 calculate_ovt_min_vtotal(const struct cta_rid *rid, u64 max_vrate,
>> +				    u32 vtotal_granularity)
>> +{
>> +	u64 max_active_time;
>> +	u32 min_line_time;
>> +	u32 min_vblank;
>> +	u32 min_vtotal;
>> +
>> +	/* step 2 */
>> +	max_active_time = div64_u64(1000000000000, max_vrate) -
>> +		(u64)OVT_MIN_VBLANK_DURATION;
>> +
>> +	min_line_time = div_u64(max_active_time, rid->vactive);
>> +
>> +	min_vblank = max_t(u64, (u64)OVT_MIN_VBLANK_LINES,
>> +			   DIV64_U64_ROUND_UP(OVT_MIN_VBLANK_DURATION,
>> +					      min_line_time));
>> +
>> +	min_vtotal = rid->vactive + min_vblank;
>> +
>> +	if (min_vtotal % vtotal_granularity)
>> +		min_vtotal += vtotal_granularity - (min_vtotal %
>> +						    vtotal_granularity);
>> +
>> +	return min_vtotal;
>> +}
>> +
>> +static u32 calculate_ovt_min_htotal(const struct cta_rid *rid,
>> +				    const u32 max_vrate,
>> +				    const u32 min_vtotal,
>> +				    u32 *min_hblank,
>> +				    u32 *htotal_granularity)
>> +{
>> +	u32 max_audio_packets_per_line;
>> +	u32 htotal_granularity_chunk;
>> +	u64 min_pixel_clock_rate;
>> +	u32 min_line_rate;
>> +	u32 min_htotal;
>> +
>> +	/* step 3 */
>> +	min_line_rate = max_vrate * min_vtotal;
>> +
>> +	max_audio_packets_per_line = DIV_ROUND_UP(OVT_AUDIO_PACKET_RATE,
>> +						  min_line_rate);
>> +
>> +	/* step 4 */
>> +	*min_hblank = OVT_LINE_OVERHEAD + OVT_AUDIO_PACKET_SIZE *
>> +		max_audio_packets_per_line;
>> +
>> +	min_htotal = rid->hactive + max(OVT_MIN_HBLANK_444, *min_hblank);
>> +
>> +	min_pixel_clock_rate = max_vrate * min_htotal * min_vtotal;
>> +
>> +	htotal_granularity_chunk =
>> +		roundup_pow_of_two(DIV64_U64_ROUND_UP(min_pixel_clock_rate,
>> +						      OVT_MAX_CHUNK_RATE));
>> +
>> +	*htotal_granularity = max(OVT_MIN_HTOTAL_GRANULARITY,
>> +				  htotal_granularity_chunk);
>> +
>> +	if (min_htotal % *htotal_granularity)
>> +		min_htotal += *htotal_granularity - (min_htotal %
>> +						     *htotal_granularity);
>> +
>> +	return min_htotal;
>> +}
>> +
>> +static u64 calculate_ovt_pixel_clock_rate(const struct cta_rid *rid,
>> +					  const u32 max_vrate,
>> +					  const u32 min_hblank,
>> +					  u32 min_htotal,
>> +					  u32 min_vtotal,
>> +					  const u32 htotal_granularity,
>> +					  const u32 vtotal_granularity,
>> +					  u32 *htotal, u32 *vtotal)
>> +{
>> +	u32 resolution_granularity;
>> +	u64 pixel_clock_rate;
>> +	u64 min_resolution;
>> +	u64 rem;
>> +	u32 h;
>> +	u64 r;
>> +	u32 v;
>> +
>> +	resolution_granularity = OVT_PIXEL_CLOCK_GRANULARITY /
>> +		gcd(OVT_PIXEL_CLOCK_GRANULARITY, max_vrate);
>> +
>> +	do {
>> +		/* step 5 */
>> +		min_resolution = 0;
>> +		v = min_vtotal;
>> +
>> +		goto loop_end;
>> +
>> +		while (!min_resolution || r <= min_resolution) {
>> +			goto inner_loop_end;
>> +
>> +			while (rem || div64_u64(max_vrate * r, (h & ~(h - 1))) >
>> +			       OVT_MAX_CHUNK_RATE) {
>> +				h += htotal_granularity;
>> +				r = (u64)h * (u64)v;
>> +inner_loop_end:
>> +				div64_u64_rem(r, resolution_granularity, &rem);
>> +			}
>> +
>> +			if (!min_resolution || r < min_resolution) {
>> +				*htotal = h;
>> +				*vtotal = v;
>> +				min_resolution = r;
>> +			}
>> +
>> +			v += vtotal_granularity;
>> +
>> +loop_end:
>> +			h = min_htotal;
>> +			r = (u64)h * (u64)v;
>> +		}
>> +
>> +		pixel_clock_rate = max_vrate * min_resolution;
>> +
>> +		/* step 6 */
>> +		min_htotal = rid->hactive + max(OVT_MIN_HBLANK_420,
>> +						OVT_PIXEL_FACTOR_420 *
>> +						min_hblank);
>> +
>> +	} while (pixel_clock_rate >= OVT_MIN_CLOCK_RATE_420 &&
>> +		 *htotal < min_htotal);
>> +
>> +	return pixel_clock_rate;
>> +}
>> +
>> +/* OVT Algorthim as specified in CTA-861-I */
>> +static struct drm_display_mode *
>> +calculate_ovt_mode(struct drm_connector *connector, const struct cta_rid *rid,
>> +		   u16 vrate)
>> +{
>> +	struct drm_display_mode *mode;
>> +	u32 vtotal_granularity = 1;
>> +	u32 htotal_granularity;
>> +	u32 max_vrate = vrate;
>> +	u64 pixel_clock_rate;
>> +	u32 vsync_position;
>> +	u32 min_hblank;
>> +	u32 min_htotal;
>> +	u32 min_vtotal;
>> +	u32 htotal;
>> +	u32 vtotal;
>> +
>> +	/* step 1 */
>> +	switch (vrate) {
>> +	case 24:
>> +	case 25:
>> +		max_vrate = 30;
>> +		fallthrough;
>> +	case 30:
>> +		vtotal_granularity = 20;
>> +		break;
>> +	case 48:
>> +	case 50:
>> +		max_vrate = 60;
>> +		fallthrough;
>> +	case 60:
>> +		vtotal_granularity = 20;
>> +		break;
>> +	case 100:
>> +		max_vrate = 120;
>> +		fallthrough;
>> +	case 120:
>> +		vtotal_granularity = 5;
>> +		break;
>> +	case 200:
>> +		max_vrate = 240;
>> +		fallthrough;
>> +	case 240:
>> +		vtotal_granularity = 5;
>> +		break;
>> +	case 300:
>> +		max_vrate = 360;
>> +		fallthrough;
>> +	case 360:
>> +		vtotal_granularity = 5;
>> +		break;
>> +	case 400:
>> +		max_vrate = 480;
>> +		fallthrough;
>> +	case 480:
>> +		vtotal_granularity = 5;
>> +		break;
>> +	}
>> +
>> +	min_vtotal = calculate_ovt_min_vtotal(rid, max_vrate,
>> +					      vtotal_granularity);
>> +
>> +	min_htotal = calculate_ovt_min_htotal(rid, max_vrate, min_vtotal,
>> +					      &min_hblank, &htotal_granularity);
>> +
>> +	pixel_clock_rate = calculate_ovt_pixel_clock_rate(rid, max_vrate,
>> +							  min_hblank,
>> +							  min_htotal,
>> +							  min_vtotal,
>> +							  htotal_granularity,
>> +							  vtotal_granularity,
>> +							  &htotal, &vtotal);
>> +
>> +	/* step 7 */
>> +	vtotal = vtotal * max_vrate / (u32)vrate;
>> +
>> +	/* step 8 */
>> +	vsync_position = max(OVT_MIN_VSYNC_LE_LINES,
>> +			     DIV64_U64_ROUND_UP((u64)OVT_MIN_VSYNC_LE_LINES *
>> +						pixel_clock_rate,
>> +						(u64)htotal * (u64)1000000));
>> +
>> +	mode = drm_mode_create(connector->dev);
>> +
>> +	if (!mode)
>> +		return NULL;
>> +
>> +	mode->clock = div_u64(pixel_clock_rate, 1000);
>> +	mode->hdisplay = rid->hactive;
>> +	mode->hsync_start = htotal - OVT_HSYNC_WIDTH * 2;
>> +	mode->hsync_end = mode->hsync_start + OVT_HSYNC_WIDTH;
>> +	mode->htotal = htotal;
>> +
>> +	mode->vdisplay = rid->vactive;
>> +	mode->vsync_start = vtotal - vsync_position;
>> +	mode->vsync_end = mode->vsync_start + OVT_VSYNC_WIDTH;
>> +	mode->vtotal = vtotal;
>> +
>> +	return mode;
>> +}
>> +
>> +/* CTA-861 Video Format Data Block (CTA VFDB) */
>> +static int add_modes_from_vfdb(struct drm_connector *connector,
>> +			       const struct cea_db *db)
>> +{
>> +	struct drm_display_info *info = &connector->display_info;
> 
> const
> 
>> +	int vfdb_len = cea_db_payload_len(db);
>> +	struct drm_display_mode *mode;
>> +	struct cta_vfd vfd;
>> +	int num_modes = 0;
>> +	int vfd_len;
>> +	int i;
>> +	int j;
>> +
>> +	if (!vfdb_len)
>> +		return 0;
>> +
>> +	vfd_len = (db->data[0] & 0x3) + 1;
>> +
>> +	if (!vfd_len)
>> +		return 0;
> 
> This can never happen on the account of + 1.
> 
>> +
>> +	vfdb_len--;
>> +
>> +	vfdb_len -= (vfdb_len % vfd_len);
> 
> Matter of taste, I'd probably write the above lines as:
> 
> 	num_vfd = (vfdb_len - 1) / vfd_len
> 
>> +
>> +	for (i = 0; i < vfdb_len; i += vfd_len) {
> 
> And iterate like this:
> 
> 	for (i = 0; i < num_vfd; i++) {
> 		const u8 *vfd = &db->data[1 + i * vfd_len];
> 
> Up to you.
> 
>> +		parse_cta_vfd(&vfd, &db->data[i + 1], vfd_len);
>> +
>> +		if (!vfd.rid || vfd.rid >= ARRAY_SIZE(rids))
>> +			continue;
>> +
>> +		for (j = 1; j < ARRAY_SIZE(cta_vf_fr); j++) {
> 
> Maybe this should be int rate_index instead of j, because it does have a
> name in the spec instead of just random iteration?
> 
> Probably helps to have
> 
> 			int rate = video_format_frame_rate[rate_index];
> 
> 
>> +			if (!vfd_has_fr(&vfd, j) ||
> 
> Pass in rate there.
> 
>> +			    (cta_vf_fr[j] < 144 && rid_to_vic[vfd.rid][j - 1]))
> 
> This makes my head spin. Probably want a function that says what it
> does, and can explain in comments inside. Also useful because you'll
> need to bounds check rid_to_vic access.
> 
>> +				continue;
>> +
>> +			mode = calculate_ovt_mode(connector, &rids[vfd.rid],
>> +						  cta_vf_fr[j]);
>> +
>> +			if (!mode)
>> +				continue;
>> +
>> +			mode->height_mm = info->height_mm;
>> +			mode->width_mm = info->width_mm;
>> +
>> +			drm_mode_probed_add(connector, mode);
>> +			num_modes++;
>> +		}
>> +	}
>> +
>> +	return num_modes;
>> +}
>> +
>>   /*
>>    * CTA-861 YCbCr 4:2:0 Capability Map Data Block (CTA Y420CMDB)
>>    *
>> @@ -5318,6 +5768,8 @@ static int add_cea_modes(struct drm_connector *connector,
>>   			/* Add 4:2:0(only) modes present in EDID */
>>   			modes += do_y420vdb_modes(connector, vdb420,
>>   						  cea_db_payload_len(db) - 1);
>> +		} else if (cea_db_tag(db) == CTA_DB_VIDEO_FORMAT) {
>> +			modes += add_modes_from_vfdb(connector, db);
>>   		}
>>   	}
>>   	cea_db_iter_end(&iter);
> 

-- 
Hamza