<div dir="ltr">Hello guys, I'm new to GStreamer. I have a Python code about object detection app using PyTorch, OpenCV to draw bounding boxes and GStreamer to process multiple RTSP links. In this pipeline I'm using a video file to demo:<div>filesrc ---> decodebin ---> videorate ---> videoconvert ---> queue ---> autovideosink<br clear="all"><div>I have a problem, when I implement a simple code to save the output image, the expected image must be an image with a frame cutted from input video and have bounding boxes on that. Instead of that, I have an image with a series of frames, like a flow of frames cut from input video with bounding boxes on it (check this image: <a href="https://imgur.com/a/QKr4tN2">https://imgur.com/a/QKr4tN2</a>).</div><div>Now I have no idea what to do, can someone give me advice on this?</div><div><br></div><div>This is my code:</div><div>import gi<br>gi.require_version('Gst', '1.0')<br>gi.require_version('GObject', '2.0')<br>from gi.repository import Gst<br>import numpy as np<br>import torch<br>from PIL import Image<br>import cv2<br>import time<br><br>Gst.init(None)<br><br>src = Gst.ElementFactory.make('filesrc', 'source')<br>src.set_property('location', 'video/test.mp4')<br>src.set_property('num-buffers', 9000)<br>decodebin = Gst.ElementFactory.make('decodebin', 'decoder')<br>videorate = Gst.ElementFactory.make('videorate', 'rate')<br>videorate.set_property('max-rate', 1)<br>videoconvert = Gst.ElementFactory.make('videoconvert', 'converter')<br>queue = Gst.ElementFactory.make('queue', 'queue')<br>autovideosink = Gst.ElementFactory.make('autovideosink', 's')<br><br>pipeline = Gst.Pipeline.new('new-pipeline')<br><br>pipeline.add(src)<br>pipeline.add(decodebin)<br>pipeline.add(videorate)<br>pipeline.add(videoconvert)<br>pipeline.add(queue)<br>pipeline.add(autovideosink)<br><br>def on_pad_added(element, pad):<br> sinkpad = videoconvert.get_static_pad('sink')<br> pad.link(sinkpad)<br> <br> decodebin.link(videorate)<br> videorate.link(videoconvert)<br> videoconvert.link(queue)<br> queue.link(autovideosink)<br> <br> sinkpad = autovideosink.get_static_pad('sink')<br> videoconvert.link(queue)<br> <br>src.link(decodebin)<br>decodebin.connect('pad-added', on_pad_added)<br><br>device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')<br>detector = torch.hub.load('ultralytics/yolov5', 'custom', '<a href="http://yolov5s.pt">yolov5s.pt</a>').eval().to(device)<br><br>fps = 0<br>prev_time = time.time()<br><br>def on_frame_probe(pad, info):<br> global fps, prev_time<br> buf = info.get_buffer()<br> # print(buf.get_size())<br> print(f'[{buf.pts / Gst.SECOND:6.2f}]')<br> image_tensor = buffer_to_image_tensor(buf, pad.get_current_caps())<br> with torch.no_grad():<br> detections = detector(image_tensor)<br> current_time = time.time()<br> fps = 1 / (current_time - prev_time)<br> print("FPS: {0:.2f}".format(fps))<br> prev_time = current_time<br> print(detections)<br> objects = (detections.xyxy[0]).tolist()<br> img_np = np.array(image_tensor)<br> for i in range(len(objects)):<br> x_min, y_min, x_max, y_max = int(objects[i][0]), int(objects[i][1]), int(objects[i][2]), int(objects[i][3])<br> cv2.rectangle(img_np, (x_min, y_min), (x_max, y_max), (255,0,0), 2)<br> # Save the output image<br> Image.fromarray(img_np[:,:,:3]).save("output.jpg")<br> return Gst.PadProbeReturn.OK<br><br><br>def buffer_to_image_tensor(buf, caps):<br> global fps<br> caps_structure = caps.get_structure(0)<br> height, width = caps_structure.get_value('height'), caps_structure.get_value('width')<br> channels = 3<br> is_mapped, map_info = buf.map(Gst.MapFlags.READ)<br> if is_mapped:<br> try:<br> image_array = np.frombuffer(map_info.data, dtype=np.uint8).reshape((640, 1637, channels)).copy()<br> image_array.resize((height, width, channels))<br> image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)<br> image_array = cv2.resize(image_array, (1920, 1080))<br> return Image.fromarray(image_array) # RGBA -> RGB<br> finally:<br> buf.unmap(map_info)<br><br>pipeline.get_by_name('s').get_static_pad('sink').add_probe(<br> Gst.PadProbeType.BUFFER,<br> on_frame_probe<br>)<br><br>pipeline.set_state(Gst.State.PLAYING)<br><br>while True:<br> msg = pipeline.get_bus().timed_pop_filtered(<br> Gst.SECOND,<br> Gst.MessageType.EOS | Gst.MessageType.ERROR<br> )<br> if msg:<br> text = msg.get_structure().to_string() if msg.get_structure() else ''<br> msg_type = Gst.message_type_get_name(msg.type)<br> print(f'{<a href="http://msg.src.name">msg.src.name</a>}: [{msg_type}] {text}')<br> break<br><br>pipeline.set_state(Gst.State.NULL)<br></div><div><br></div><div>The input video i got from this youtube link: <a href="https://www.youtube.com/watch?v=MNn9qKG2UFI&list=PLcQZGj9lFR7y5WikozDSrdk6UCtAnM9mB">https://www.youtube.com/watch?v=MNn9qKG2UFI&list=PLcQZGj9lFR7y5WikozDSrdk6UCtAnM9mB</a></div><div>I appreciate you so much.</div>-- <br><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><div>Trân trọng,</div><div><br></div>Triệu Việt Hùng</div></div></div></div>