<div dir="ltr">Hello guys, I'm new to GStreamer. I have a Python code about object detection app using PyTorch, OpenCV to draw bounding boxes and GStreamer to process multiple RTSP links. In this pipeline I'm using a video file to demo:<div>filesrc ---> decodebin ---> videorate ---> videoconvert ---> queue ---> autovideosink<br clear="all"><div>I have a problem, when I implement a simple code to save the output image, the expected image must be an image with a frame cutted from input video and have bounding boxes on that. Instead of that, I have an image with a series of frames, like a flow of frames cut from input video with bounding boxes on it (check this image: <a href="https://imgur.com/a/QKr4tN2">https://imgur.com/a/QKr4tN2</a>).</div><div>Now I have no idea what to do, can someone give me advice on this?</div><div><br></div><div>This is my code:</div><div>import gi<br>gi.require_version('Gst', '1.0')<br>gi.require_version('GObject', '2.0')<br>from gi.repository import Gst<br>import numpy as np<br>import torch<br>from PIL import Image<br>import cv2<br>import time<br><br>Gst.init(None)<br><br>src = Gst.ElementFactory.make('filesrc', 'source')<br>src.set_property('location', 'video/test.mp4')<br>src.set_property('num-buffers', 9000)<br>decodebin = Gst.ElementFactory.make('decodebin', 'decoder')<br>videorate = Gst.ElementFactory.make('videorate', 'rate')<br>videorate.set_property('max-rate', 1)<br>videoconvert = Gst.ElementFactory.make('videoconvert', 'converter')<br>queue = Gst.ElementFactory.make('queue', 'queue')<br>autovideosink = Gst.ElementFactory.make('autovideosink', 's')<br><br>pipeline = Gst.Pipeline.new('new-pipeline')<br><br>pipeline.add(src)<br>pipeline.add(decodebin)<br>pipeline.add(videorate)<br>pipeline.add(videoconvert)<br>pipeline.add(queue)<br>pipeline.add(autovideosink)<br><br>def on_pad_added(element, pad):<br>    sinkpad = videoconvert.get_static_pad('sink')<br>    pad.link(sinkpad)<br>    <br>    decodebin.link(videorate)<br>    videorate.link(videoconvert)<br>    videoconvert.link(queue)<br>    queue.link(autovideosink)<br>    <br>    sinkpad = autovideosink.get_static_pad('sink')<br>    videoconvert.link(queue)<br>    <br>src.link(decodebin)<br>decodebin.connect('pad-added', on_pad_added)<br><br>device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')<br>detector = torch.hub.load('ultralytics/yolov5', 'custom', '<a href="http://yolov5s.pt">yolov5s.pt</a>').eval().to(device)<br><br>fps = 0<br>prev_time = time.time()<br><br>def on_frame_probe(pad, info):<br>    global fps, prev_time<br>    buf = info.get_buffer()<br>    # print(buf.get_size())<br>    print(f'[{buf.pts / Gst.SECOND:6.2f}]')<br>    image_tensor = buffer_to_image_tensor(buf, pad.get_current_caps())<br>    with torch.no_grad():<br>        detections = detector(image_tensor)<br>        current_time = time.time()<br>        fps = 1 / (current_time - prev_time)<br>        print("FPS: {0:.2f}".format(fps))<br>        prev_time = current_time<br>        print(detections)<br>        objects = (detections.xyxy[0]).tolist()<br>        img_np = np.array(image_tensor)<br>        for i in range(len(objects)):<br>            x_min, y_min, x_max, y_max = int(objects[i][0]), int(objects[i][1]), int(objects[i][2]), int(objects[i][3])<br>            cv2.rectangle(img_np, (x_min, y_min), (x_max, y_max), (255,0,0), 2)<br>        # Save the output image<br>        Image.fromarray(img_np[:,:,:3]).save("output.jpg")<br>    return Gst.PadProbeReturn.OK<br><br><br>def buffer_to_image_tensor(buf, caps):<br>    global fps<br>    caps_structure = caps.get_structure(0)<br>    height, width = caps_structure.get_value('height'), caps_structure.get_value('width')<br>    channels = 3<br>    is_mapped, map_info = buf.map(Gst.MapFlags.READ)<br>    if is_mapped:<br>        try:<br>            image_array = np.frombuffer(map_info.data, dtype=np.uint8).reshape((640, 1637, channels)).copy()<br>            image_array.resize((height, width, channels))<br>            image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)<br>            image_array = cv2.resize(image_array, (1920, 1080))<br>            return Image.fromarray(image_array) # RGBA -> RGB<br>        finally:<br>            buf.unmap(map_info)<br><br>pipeline.get_by_name('s').get_static_pad('sink').add_probe(<br>    Gst.PadProbeType.BUFFER,<br>    on_frame_probe<br>)<br><br>pipeline.set_state(Gst.State.PLAYING)<br><br>while True:<br>    msg = pipeline.get_bus().timed_pop_filtered(<br>        Gst.SECOND,<br>        Gst.MessageType.EOS | Gst.MessageType.ERROR<br>    )<br>    if msg:<br>        text = msg.get_structure().to_string() if msg.get_structure() else ''<br>        msg_type = Gst.message_type_get_name(msg.type)<br>        print(f'{<a href="http://msg.src.name">msg.src.name</a>}: [{msg_type}] {text}')<br>        break<br><br>pipeline.set_state(Gst.State.NULL)<br></div><div><br></div><div>The input video i got from this youtube link: <a href="https://www.youtube.com/watch?v=MNn9qKG2UFI&list=PLcQZGj9lFR7y5WikozDSrdk6UCtAnM9mB">https://www.youtube.com/watch?v=MNn9qKG2UFI&list=PLcQZGj9lFR7y5WikozDSrdk6UCtAnM9mB</a></div><div>I appreciate you so much.</div>-- <br><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><div>Trân trọng,</div><div><br></div>Triệu Việt Hùng</div></div></div></div>