[Mesa-dev] [PATCH 02/11] i965: downsize 64PASSTHRU formats to equivalent 32FLOAT formats on gen < 8

Tue Jan 10 10:48:55 UTC 2017

On 09/01/17 23:54, Jordan Justen wrote:
> Is this code doing the 'downsize' for gen >= 8 as well?

No. As mentioned on the commit message, gen >= 8 supports the PASSTHRU
formats natively, so they use it directly.

Note that this patch only touches brw_draw_upload.c, that includes the
implementation of .emit (brw_emit_vertices) (see brw_draw_upload.c line
1047) for gen < 8.

For gen >= 8, there is an alternative implementation of .emit,
gen8_emit_vertices (see gen8_draw_upload.c, line 364).

Trying to add more details:
  * Right now:
     * gen >= 8 supports natively PASSTHRU formats
     * So, for double types, brw_get_vertex_surface_type (at
brw_draw_upload.c) returns PASSTHRU format for gen >= 8
     * gen8_emit_vertices (at gen8_draw_upload.c) configures and uses
that format directly.
     * gen7 doesn't even try to do emissions with real double types, as
va64 is not marked as supported.
  * With this series
     * Patch 1 of the series ("i965: return PASSTHRU surface types also
on gen7") returns PASSTHRU formats for gen < 8 too
     * The idea behind is that any generation wants the same
functionality, so I used the format to represent that. I vaguely
remember wip patches that did that in a different way, but I feel that
this was more natural.
     * Now brw_emit_vertices, that is the emit vertices function for gen
< 8 receives PASSTRHU formats, that is not supported by hw.
     * Patch 2 (this one), implements a downsize of that format, that is
basically adding extra uploads if needed, and mapping for a supported
format.

I hope this answer your question.

> -Jordan
>
> On 2017-01-09 09:10:00, Juan A. Suarez Romero wrote:
>> From: Alejandro Piñeiro <apinheiro at igalia.com>
>>
>> gen < 8 doesn't support *64*PASSTHRU formats when emitting
>> vertices. So in order to provide the equivalent functionality, we need
>> to downsize the format to equivalent *32*FLOAT, and in some cases
>> (R64G64B64 and R64G64B64A64) submit two 3DSTATE_VERTEX_ELEMENTS for
>> each vertex element.
>>
>> Signed-off-by: Alejandro Piñeiro <apinheiro at igalia.com>
>> Signed-off-by: Juan A. Suarez Romero <jasuarez at igalia.com>
>> ---
>>  src/mesa/drivers/dri/i965/brw_draw_upload.c | 169 +++++++++++++++++++++++-----
>>  1 file changed, 139 insertions(+), 30 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> index 79eb634..9c36d05 100644
>> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
>> @@ -242,6 +242,86 @@ double_types(struct brw_context *brw,
>>             : double_types_float[size]);
>>  }
>>  
>> +static bool
>> +is_passthru_format(uint32_t format)
>> +{
>> +   switch (format) {
>> +   case BRW_SURFACEFORMAT_R64_PASSTHRU:
>> +   case BRW_SURFACEFORMAT_R64G64_PASSTHRU:
>> +   case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU:
>> +   case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU:
>> +      return true;
>> +   default:
>> +      return false;
>> +   }
>> +}
>> +
>> +static int
>> +uploads_needed(uint32_t format)
>> +{
>> +   if (!is_passthru_format(format))
>> +      return 1;
>> +
>> +   switch (format) {
>> +   case BRW_SURFACEFORMAT_R64_PASSTHRU:
>> +   case BRW_SURFACEFORMAT_R64G64_PASSTHRU:
>> +      return 1;
>> +   case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU:
>> +   case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU:
>> +      return 2;
>> +   default:
>> +      unreachable("not reached");
>> +   }
>> +}
>> +
>> +/*
>> + * Returns the number of componentes associated with a format that is used on
>> + * a 64 to 32 format split. See downsize_format()
>> + */
>> +static int
>> +upload_format_size(uint32_t upload_format)
>> +{
>> +   switch (upload_format) {
>> +   case BRW_SURFACEFORMAT_R32G32_FLOAT:
>> +      return 2;
>> +   case BRW_SURFACEFORMAT_R32G32B32A32_FLOAT:
>> +      return 4;
>> +   default:
>> +      unreachable("not reached");
>> +   }
>> +}
>> +
>> +/*
>> + * Returns the format that we are finally going to use when upload a vertex
>> + * element. It will only change if we are using *64*PASSTHRU formats, as for
>> + * gen < 8 they need to be splitted on two *32*FLOAT formats.
>> + *
>> + * @upload points in which upload we are. Valid values are [0,1]
>> + */
>> +static uint32_t
>> +downsize_format_if_needed(uint32_t format,
>> +                          int upload)
>> +{
>> +   assert(upload == 0 || upload == 1);
>> +
>> +   if (!is_passthru_format(format))
>> +      return format;
>> +
>> +   switch (format) {
>> +   case BRW_SURFACEFORMAT_R64_PASSTHRU:
>> +      return BRW_SURFACEFORMAT_R32G32_FLOAT;
>> +   case BRW_SURFACEFORMAT_R64G64_PASSTHRU:
>> +      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
>> +   case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU:
>> +      return !upload ? BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
>> +                     : BRW_SURFACEFORMAT_R32G32_FLOAT;
>> +   case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU:
>> +      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
>> +   default:
>> +      unreachable("not reached");
>> +   }
>> +}
>> +
>>  /**
>>   * Given vertex array type/size/format/normalized info, return
>>   * the appopriate hardware surface type.
>> @@ -805,6 +885,18 @@ brw_emit_vertices(struct brw_context *brw)
>>     if (vs_prog_data->uses_drawid)
>>        nr_elements++;
>>  
>> +   /* If any of the formats of vb.enabled needs more that one upload, we need
>> +    * to add it to nr_elements */
>> +   unsigned extra_uploads = 0;
>> +   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
>> +      struct brw_vertex_element *input = brw->vb.enabled[i];
>> +      uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
>> +
>> +      if (uploads_needed(format) > 1)
>> +         extra_uploads++;
>> +   }
>> +   nr_elements += extra_uploads;
>> +
>>     /* If the VS doesn't read any inputs (calculating vertex position from
>>      * a state variable for some reason, for example), emit a single pad
>>      * VERTEX_ELEMENT struct and bail.
>> @@ -908,6 +1000,10 @@ brw_emit_vertices(struct brw_context *brw)
>>        uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
>>        uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
>>        uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
>> +      unsigned num_uploads = 1;
>> +      unsigned c;
>> +
>> +      num_uploads = uploads_needed(format);
>>  
>>        if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
>>           /* Gen6+ passes edgeflag as sideband along with the vertex, instead
>> @@ -920,38 +1016,51 @@ brw_emit_vertices(struct brw_context *brw)
>>           }
>>        }
>>  
>> -      switch (input->glarray->Size) {
>> -      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
>> -      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
>> -      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
>> -      case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
>> -                                              : BRW_VE1_COMPONENT_STORE_1_FLT;
>> -        break;
>> -      }
>> +      for (c = 0; c < num_uploads; c++) {
>> +         uint32_t upload_format = downsize_format_if_needed(format, c);
>> +         /* If we need more that one upload, the offset stride would be 128
>> +          * bits (16 bytes), as for previous uploads we are using the full
>> +          * entry. */
>> +         unsigned int offset = input->offset + c * 16;
>> +         int size = input->glarray->Size;
>> +
>> +         if (is_passthru_format(format))
>> +            size = upload_format_size(upload_format);
>> +
>> +         switch (size) {
>> +         case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
>> +         case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
>> +         case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
>> +         case 3: comp3 = input->glarray->Integer
>> +                         ? BRW_VE1_COMPONENT_STORE_1_INT
>> +                         : BRW_VE1_COMPONENT_STORE_1_FLT;
>> +            break;
>> +         }
>>  
>> -      if (brw->gen >= 6) {
>> -        OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
>> -                  GEN6_VE0_VALID |
>> -                  (format << BRW_VE0_FORMAT_SHIFT) |
>> -                  (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
>> -      } else {
>> -        OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
>> -                  BRW_VE0_VALID |
>> -                  (format << BRW_VE0_FORMAT_SHIFT) |
>> -                  (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
>> -      }
>> +         if (brw->gen >= 6) {
>> +            OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
>> +                      GEN6_VE0_VALID |
>> +                      (upload_format << BRW_VE0_FORMAT_SHIFT) |
>> +                      (offset << BRW_VE0_SRC_OFFSET_SHIFT));
>> +         } else {
>> +            OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) |
>> +                      BRW_VE0_VALID |
>> +                      (upload_format << BRW_VE0_FORMAT_SHIFT) |
>> +                      (offset << BRW_VE0_SRC_OFFSET_SHIFT));
>> +         }
>>  
>> -      if (brw->gen >= 5)
>> -          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
>> -                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
>> -                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
>> -                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
>> -      else
>> -          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
>> -                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
>> -                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
>> -                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
>> -                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
>> +         if (brw->gen >= 5)
>> +            OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
>> +                      (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
>> +                      (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
>> +                      (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
>> +         else
>> +            OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
>> +                      (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
>> +                      (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
>> +                      (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
>> +                      ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
>> +      }
>>     }
>>  
>>     if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid ||
>> -- 
>> 2.9.3
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 02/11] i965: downsize *64*PASSTHRU formats to equivalent *32*FLOAT formats on gen < 8

[Mesa-dev] [PATCH 02/11] i965: downsize 64PASSTHRU formats to equivalent 32FLOAT formats on gen < 8