<div dir="ltr"><div>Hello!</div><div><br></div><div>As the title says I'm trying to interface gstreamer pipeline(receiver) to opencv for some video frame processing and need to get the processed frame to another gstreamer pipeline(sender).</div><div>Receiver pipeline:</div><div>rtspsrc ! rtph264depay ! h264parse ! nvh264dec ! glcolorconvert ! appsink</div><div><br></div><div>Sender pipeline:</div><div>appsrc ! nvh264enc ! rtph264pay (rtsp-server will provide a stream for rtsp clients)</div><div><br></div><div>Basically: Receiver -> OpenCV -> Sender</div><div><br></div><div>Because I need to deliver 16(16 pipelines in, 16 pipelines out) 4k H264/H265 streams simultaneously I need to do all the heavy lifting with a GPU (I have access to Nvidia rtx 5000).</div><div>So far I have managed to set up appsink and appsrc elements to copy GL textures to cuda memory and back. However the performance is poor, only with 1 output stream(sender pipeline) it managed to perform adequately.</div><div>Running with 10 receiver streams and no sender streams attached it will also perform quite nicely. So I guess problem comes from sender side.<br></div><div><br></div><div>I have not managed find the reason what is causing the bottleneck. Any suggestions or help is welcomed.</div><div><br></div><div>Here is my (simplified) code</div><div><br></div><div>Receiver side(appsink):</div><div><br></div><div>// Callback from 'new-sample' signal</div><div>static GstFlowReturn newSample(GstElement* sink, gpointer /*user_data*/)<br>{<br> GstSample* sample = nullptr;<br> g_signal_emit_by_name(sink, "pull-sample", &sample);<br><br> GstBuffer* buffer = gst_sample_get_buffer(sample);<br> GstMemory* memory = gst_buffer_get_memory(buffer, 0);<br><br> PullGpuMatData data;<br> data.glMemory = GST_GL_MEMORY_CAST(memory);<br> data.handler = d;<br><br> gst_gl_context_thread_add(<br> data.glMemory->mem.context,<br> (GstGLContextThreadFunc)pullGpuMat,<br> &data<br> );<br><br> gst_memory_unref(memory);<br> gst_sample_unref(sample);<br> return GST_FLOW_OK;<br>}<br><br>static void pullGpuMat(GstGLContext* context, PullGpuMatData* data)<br>{<br> const int width = gst_gl_memory_get_texture_width(data->glMemory);<br> const int height = gst_gl_memory_get_texture_height(data->glMemory);<br> const guint textureId = gst_gl_memory_get_texture_id(data->glMemory);<br> <br> // Wraps a GL texture for OpenCV<br> cv::ogl::Texture2D texture = cv::ogl::Texture2D({ width, height },<br> cv::ogl::Texture2D::Format::RGBA,<br> textureId, false);<br> cv::ogl::Buffer glBuffer;<br> texture.copyTo(glBuffer, CV_8U, true);<br> <br> cv::cuda::GpuMat tempMat = glBuffer.mapDevice();<br> tempMat.copyTo(gpuMat);<br> glBuffer.unmapDevice();<br> <br> // Got the result in gpuMat<br>}<br><br><br><div></div><div>Sender side(appsrc):</div><br>// For every sender pipeline a GstGLContext is created<br>bool init()<br>{<br> GstGLDisplay* glDisplay = gst_gl_display_new();<br> glContext = gst_gl_context_new(glDisplay);<br><br> GError *error = NULL;<br> bool ret = gst_gl_context_create(glContext, 0, &error);<br> gst_object_unref(glDisplay);<br> return ret;<br>}<br><br><br>void pushFrameFromGpuMat(cv::cuda::GpuMat mat)<br>{<br> PushFrameData data;<br> data.mat = mat;<br> <br> gst_gl_context_activate(glContext, TRUE);<br> gst_gl_context_thread_add(<br> glContext,<br> (GstGLContextThreadFunc)pushFrame,<br> &data<br> );<br> gst_gl_context_activate(glContext, FALSE);<br> <br> // cuda memory release<br> mat.release();<br>}<br><br>static void pushFrame(GstGLContext* context, PushFrameData* data)<br>{ <br> // Copy from cuda memory to a GL texture<br> <br> cv::ogl::Buffer glBuffer;<br> glBuffer.copyFrom(mat);<br> cv::ogl::Texture2D* texture = new cv::ogl::Texture2D(); // using heap as I need to release the texture manually<br> texture->copyFrom(glBuffer);<br> <br> <br> // Wrap the texture into GstGLMemory<br> <br> GstVideoInfo vinfo;<br> gst_video_info_set_format(&vinfo, GST_VIDEO_FORMAT_RGBA, mat.cols, mat.rows);<br> <br> GstAllocator* allocator = GST_ALLOCATOR(gst_gl_memory_allocator_get_default(context));<br><br> FreeTextureData* data = new FreeTextureData;<br> data->context = context;<br> data->texture = texture;<br> <br> GstGLVideoAllocationParams* params = gst_gl_video_allocation_params_new_wrapped_texture(<br> context, NULL, &vinfo, 0, NULL, GST_GL_TEXTURE_TARGET_2D, GST_GL_RGBA, texture->texId(),<br> data, (GDestroyNotify)glMemoryFree);<br> <br> GstGLMemory* glMemory = GST_GL_MEMORY_CAST(gst_gl_base_memory_alloc(<br> GST_GL_BASE_MEMORY_ALLOCATOR_CAST(allocator), (GstGLAllocationParams*) params));<br> <br> gst_gl_allocation_params_free((GstGLAllocationParams *)params);<br> gst_object_unref(allocator);<br> <br> <br> // Attach GstGLMemory object into buffer, timestamp the buffer and push it downstream <br> <br> GstBuffer* buffer = gst_buffer_new();<br> gst_buffer_append_memory(buffer, GST_MEMORY_CAST(glMemory));<br> <br> GST_BUFFER_PTS(buffer) = timestamp;<br> GST_BUFFER_DURATION(buffer) = gst_util_uint64_scale_int(1, GST_SECOND, framerate);<br> timestamp += GST_BUFFER_DURATION(buffer);<br> <br> GstFlowReturn ret;<br> g_signal_emit_by_name(elems.src, "push-buffer", buffer, &ret);<br> <br> gst_buffer_unref(buffer);<br>}<br><br>// To free the wrapped texture<br>static void glMemoryFree(FreeTextureData* data)<br>{<br> gst_gl_context_thread_add(<br> data->context,<br> (GstGLContextThreadFunc)releaseTexture,<br> data<br> );<br> <br> delete data->texture;<br> delete data;<br>}<br><br>static void releaseTexture(GstGLContext* , FreeTextureData* data)<br>{<br> data->texture->release();<br>}<br></div><div><br></div><div><br></div><div>Viljar Hera</div></div>