Interfacing gstreamer with OpenCV using opengl and cuda

meistrimees at gmail.com meistrimees at gmail.com
Wed Dec 16 12:43:08 UTC 2020


Hello!

As the title says I'm trying to interface gstreamer pipeline(receiver) to
opencv for some video frame processing and need to get the processed frame
to another gstreamer pipeline(sender).
Receiver pipeline:
rtspsrc ! rtph264depay ! h264parse ! nvh264dec ! glcolorconvert ! appsink

Sender pipeline:
appsrc ! nvh264enc ! rtph264pay (rtsp-server will provide a stream for rtsp
clients)

Basically: Receiver -> OpenCV -> Sender

Because I need to deliver 16(16 pipelines in, 16 pipelines out) 4k
H264/H265 streams simultaneously I need to do all the heavy lifting with a
GPU (I have access to Nvidia rtx 5000).
So far I have managed to set up appsink and appsrc elements to copy GL
textures to cuda memory and back. However the performance is poor, only
with 1 output stream(sender pipeline) it managed to perform adequately.
Running with 10 receiver streams and no sender streams attached it will
also perform quite nicely. So I guess problem comes from sender side.

I have not managed find the reason what is causing the bottleneck. Any
suggestions or help is welcomed.

Here is my (simplified) code

Receiver side(appsink):

// Callback from 'new-sample' signal
static GstFlowReturn newSample(GstElement* sink, gpointer /*user_data*/)
{
    GstSample* sample = nullptr;
    g_signal_emit_by_name(sink, "pull-sample", &sample);

    GstBuffer* buffer = gst_sample_get_buffer(sample);
    GstMemory* memory = gst_buffer_get_memory(buffer, 0);

    PullGpuMatData data;
    data.glMemory = GST_GL_MEMORY_CAST(memory);
    data.handler = d;

    gst_gl_context_thread_add(
        data.glMemory->mem.context,
        (GstGLContextThreadFunc)pullGpuMat,
        &data
    );

    gst_memory_unref(memory);
    gst_sample_unref(sample);
    return GST_FLOW_OK;
}

static void pullGpuMat(GstGLContext* context, PullGpuMatData* data)
{
    const int width = gst_gl_memory_get_texture_width(data->glMemory);
    const int height = gst_gl_memory_get_texture_height(data->glMemory);
    const guint textureId = gst_gl_memory_get_texture_id(data->glMemory);

    // Wraps a GL texture for OpenCV
    cv::ogl::Texture2D texture = cv::ogl::Texture2D({ width, height },

cv::ogl::Texture2D::Format::RGBA,
                                                    textureId, false);
    cv::ogl::Buffer glBuffer;
    texture.copyTo(glBuffer, CV_8U, true);

    cv::cuda::GpuMat tempMat = glBuffer.mapDevice();
    tempMat.copyTo(gpuMat);
    glBuffer.unmapDevice();

    // Got the result in gpuMat
}


Sender side(appsrc):

// For every sender pipeline a GstGLContext is created
bool init()
{
    GstGLDisplay* glDisplay = gst_gl_display_new();
    glContext = gst_gl_context_new(glDisplay);

    GError *error = NULL;
    bool ret = gst_gl_context_create(glContext, 0, &error);
    gst_object_unref(glDisplay);
    return ret;
}


void pushFrameFromGpuMat(cv::cuda::GpuMat mat)
{
    PushFrameData data;
    data.mat = mat;

    gst_gl_context_activate(glContext, TRUE);
    gst_gl_context_thread_add(
        glContext,
        (GstGLContextThreadFunc)pushFrame,
        &data
    );
    gst_gl_context_activate(glContext, FALSE);

    // cuda memory release
    mat.release();
}

static void pushFrame(GstGLContext* context, PushFrameData* data)
{
    // Copy from cuda memory to a GL texture

    cv::ogl::Buffer glBuffer;
    glBuffer.copyFrom(mat);
    cv::ogl::Texture2D* texture = new cv::ogl::Texture2D(); // using heap
as I need to release the texture manually
    texture->copyFrom(glBuffer);


    // Wrap the texture into GstGLMemory

    GstVideoInfo vinfo;
    gst_video_info_set_format(&vinfo, GST_VIDEO_FORMAT_RGBA, mat.cols,
mat.rows);

    GstAllocator* allocator =
GST_ALLOCATOR(gst_gl_memory_allocator_get_default(context));

    FreeTextureData* data = new FreeTextureData;
    data->context = context;
    data->texture = texture;

    GstGLVideoAllocationParams* params =
gst_gl_video_allocation_params_new_wrapped_texture(
      context, NULL, &vinfo, 0, NULL, GST_GL_TEXTURE_TARGET_2D,
GST_GL_RGBA, texture->texId(),
      data, (GDestroyNotify)glMemoryFree);

    GstGLMemory* glMemory = GST_GL_MEMORY_CAST(gst_gl_base_memory_alloc(
      GST_GL_BASE_MEMORY_ALLOCATOR_CAST(allocator),
(GstGLAllocationParams*) params));

    gst_gl_allocation_params_free((GstGLAllocationParams *)params);
    gst_object_unref(allocator);


    // Attach GstGLMemory object into buffer, timestamp the buffer and push
it downstream

    GstBuffer* buffer = gst_buffer_new();
    gst_buffer_append_memory(buffer, GST_MEMORY_CAST(glMemory));

    GST_BUFFER_PTS(buffer) = timestamp;
    GST_BUFFER_DURATION(buffer) = gst_util_uint64_scale_int(1, GST_SECOND,
framerate);
    timestamp += GST_BUFFER_DURATION(buffer);

    GstFlowReturn ret;
    g_signal_emit_by_name(elems.src, "push-buffer", buffer, &ret);

    gst_buffer_unref(buffer);
}

// To free the wrapped texture
static void glMemoryFree(FreeTextureData* data)
{
    gst_gl_context_thread_add(
      data->context,
      (GstGLContextThreadFunc)releaseTexture,
      data
    );

    delete data->texture;
    delete data;
}

static void releaseTexture(GstGLContext* , FreeTextureData* data)
{
    data->texture->release();
}


Viljar Hera
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/gstreamer-devel/attachments/20201216/d8762981/attachment.htm>


More information about the gstreamer-devel mailing list