# Videoconvert needs to be optimized

```I  try to experiment videoconvert_convert_matrix8 function.

#define CLIP(X) ( (X) > 255 ? 255 : (X) < 0 ? 0 : X)
#define RGB2Y(R, G, B) CLIP(( (  66 * (R) + 129 * (G) +  25 * (B) + 128) >>
8) +  16)
#define RGB2U(R, G, B) CLIP(( ( -38 * (R) -  74 * (G) + 112 * (B) + 128) >>
8) + 128)
#define RGB2V(R, G, B) CLIP(( ( 112 * (R) -  94 * (G) -  18 * (B) + 128) >>
8) + 128)

static void
videoconvert_convert_matrix8 (VideoConvert * convert, gpointer pixels)
{
int i,t;
int y, u, v;
int r, g, b;
guint8 *p = pixels;

for (i = 0; i < convert->width; i++) {
t=i*4;
y=t+1;
u=y+1;
v=u+1;
r=p[t+1];
g=p[t+2];
b=p[t+3];
p[t] = RGB2Y(r,g,b);
p[u] = RGB2U(r,g,b);
p[v] = RGB2V(r,g,b);
}
}

gst-launch-1.0 -e matroskamux name=muxer ! progressreport ! queue !
filesink location=/home/pont/disk/rec_2014-06-17_161811.mkv \ ximagesrc
use-damage=0 ! queue ! video/x-raw,format=BGRx ! videoconvert !
video/x-raw,format=NV12,framerate=25/1 ! queue ! omxh264enc ! h264parse !
queue ! muxer.video_0

Before

perf stat -p `pidof gst-launch-1.0`  -d -a
^C
Performance counter stats for process id '18980':

30384.173422 task-clock                #    0.481 CPUs utilized

27,332 context-switches          #    0.900 K/sec

5,241 cpu-migrations            #    0.172 K/sec

1,642 page-faults               #    0.054 K/sec

90,790,820,778 cycles                    #    2.988 GHz

25,993,859,696 stalled-cycles-frontend   #   28.63% frontend cycles
idle
<not supported> stalled-cycles-backend
245,329,236,634 instructions              #    2.70  insns per cycle

#    0.11  stalled cycles per
insn
17,607,065,972 branches                  #  579.481 M/sec

31,268,606 branch-misses             #    0.18% of all branches

704,453,875 L1-dcache-load-misses:HG  #    0.00% of all L1-dcache
hits

63.130500236 seconds time elapsed

after

perf stat -p `pidof gst-launch-1.0`  -d -a
^C
Performance counter stats for process id '18804':

21621.851703 task-clock                #    0.364 CPUs utilized

22,496 context-switches          #    0.001 M/sec

2,712 cpu-migrations            #    0.125 K/sec

1,547 page-faults               #    0.072 K/sec

67,110,958,855 cycles                    #    3.104 GHz

12,420,679,145 stalled-cycles-frontend   #   18.51% frontend cycles
idle
<not supported> stalled-cycles-backend
202,995,433,561 instructions              #    3.02  insns per cycle

#    0.06  stalled cycles per
insn
7,312,441,083 branches                  #  338.197 M/sec

26,623,263 branch-misses             #    0.36% of all branches

658,903,226 L1-dcache-load-misses:HG  #    0.00% of all L1-dcache
hits

59.371335826 seconds time elapsed

20% is a nice boost as for me.

2014-05-20 5:09 GMT+00:00 Edward Hervey <bilboed at bilboed.com>:

> Hi,
>
>    Since:
>     * your biggest cpu usage is videoconvert and x264
>     * You are most likely using a multi-core system
>
>    I would strongly recommend putting queue element before/after
> videoconvert and x264:
>    ... ! queue ! videoconvert ! queue ! <capsfilter> ! <videoencoder> !
> queue ! ...
>
>    That will essentially decouple videoconvert processing and h264
>
>      Edward
> >
> >
> >
> > OMX
> > gst-launch-1.0 -e matroskamux name=muxer ! progressreport ! filesink
> > location=/home/pont/disk/rec_2014-05-19_165945.mkv \ ximagesrc
> > use-damage=0 ! video/x-raw,format=BGRx ! videoconvert !
> > video/x-raw,format=NV12,framerate=25/1 ! omxh264enc ! h264parse !
> > queue ! muxer.video_0
> >
> >
> >
> > VAAPI
> > gst-launch-1.0 -e matroskamux name=muxer ! progressreport ! filesink
> > location=/home/pont/disk/rec_2014-05-19_170015.mkv \ ximagesrc
> > use-damage=0 ! video/x-raw,format=BGRx ! videoconvert !
> > video/x-raw,format=NV12,framerate=25/1 ! vaapiencode_h264 ! queue !
> > muxer.video_0
> >
> >
> >
> > Software
> > gst-launch-1.0 -e matroskamux name=muxer ! progressreport ! filesink
> > location=/home/pont/disk/rec_2014-05-19_170044.mkv \ ximagesrc
> > use-damage=0 ! video/x-raw,format=BGRx ! videoconvert !
> > video/x-raw,format=NV12,framerate=25/1 ! x264enc speed-preset=faster !
> > queue ! muxer.video_0
> >
> >
> >
> > pert top for Gstreamer software fullhd 25fps
> > Samples: 297K of event 'cycles', Event count (approx.): 99651686548
> >
> >
> >  23.51%  libgstvideoconvert.so          [.]
> > videoconvert_convert_matrix8
> >  23.44%  libx264.so.142                 [.]
> >  10.35%  libx264.so.142                 [.] 0x000000000009ce74
> >   8.45%  libx264.so.142                 [.]
> >   2.97%  orcexec.47KToS                 [.] 0x00000000000000c4
> >   2.62%  intel_drv.so                   [.] 0x0000000000035dde
> >   2.45%  libgstvideo-1.0.so.0.204.0     [.]
> > video_chroma_down_h2_guint8
> >   2.39%  libgstvideo-1.0.so.0.204.0     [.]
> > video_chroma_down_v2_guint8
> >   2.23%  libx264.so.142                 [.]
> >   2.21%  libx264.so.142                 [.] x264_me_search_ref
> >   1.31%  libx264.so.142                 [.] x264_macroblock_analyse
> >   1.27%  libx264.so.142                 [.]
> > x264_sub8x8_dct_avx.skip_prologue
> >   1.11%  libx264.so.142                 [.]
> > x264_mb_predict_mv_ref16x16
> >   0.93%  libx264.so.142                 [.] x264_macroblock_cache_save
> >   0.61%  libx264.so.142                 [.]
> > x264_mb_predict_mv_direct16x16
> >   0.56%  libx264.so.142                 [.] x264_macroblock_probe_skip
> >
> >
> >
> >
> > perf top for simple screen recorder fullhd 25 fps
> > Samples: 243K of event 'cycles', Event count (approx.): 63217744816
> >
> >
> >  33.70%  libx264.so.142                 [.]
> >  13.57%  libx264.so.142                 [.] 0x000000000001a7b6
> >  11.25%  libx264.so.142                 [.]
> >   3.62%  intel_drv.so                   [.] 0x00000000000168c9
> >   3.54%  simplescreenrecorder           [.]
> > Convert_BGRA_YUV420_SSSE3(unsigned int, unsigned int, unsigned char
> > const*, int, unsigned char* const*, int const*)
> >   3.34%  libx264.so.142                 [.] x264_me_search_ref
> >   3.27%  libx264.so.142                 [.]
> >   1.92%  libx264.so.142                 [.] x264_macroblock_analyse
> >   1.66%  libx264.so.142                 [.]
> > x264_sub8x8_dct_avx.skip_prologue
> >   1.61%  libx264.so.142                 [.]
> > x264_mb_predict_mv_ref16x16
> >   1.42%  libx264.so.142                 [.] x264_macroblock_cache_save
> >   0.91%  libx264.so.142                 [.]
> > x264_mb_predict_mv_direct16x16
> >   0.76%  libpepflashplayer.so           [.] 0x00000000001f4980
> >   0.64%  libx264.so.142                 [.] x264_macroblock_probe_skip
> >   0.54%  libx264.so.142                 [.] x264_ratecontrol_mb_qp
> >   0.54%  perf                           [.] 0x0000000000067856
> >   0.43%  i965_dri.so                    [.] 0x00000000002e59a9
> >
> >
> >
> >
> >
> >
> >
> >
