OpenShot Library | libopenshot 0.3.2
CVObjectDetection.cpp
Go to the documentation of this file.
1
10// Copyright (c) 2008-2019 OpenShot Studios, LLC
11//
12// SPDX-License-Identifier: LGPL-3.0-or-later
13
14#include <fstream>
15#include <iomanip>
16#include <iostream>
17
18#include "CVObjectDetection.h"
19#include "Exceptions.h"
20
21#include "objdetectdata.pb.h"
22#include <google/protobuf/util/time_util.h>
23
24using namespace std;
25using namespace openshot;
26using google::protobuf::util::TimeUtil;
27
28CVObjectDetection::CVObjectDetection(std::string processInfoJson, ProcessingController &processingController)
29: processingController(&processingController), processingDevice("CPU"){
30 SetJson(processInfoJson);
31 confThreshold = 0.5;
32 nmsThreshold = 0.1;
33}
34
35void CVObjectDetection::setProcessingDevice(){
36 if(processingDevice == "GPU"){
37 net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
38 net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
39 }
40 else if(processingDevice == "CPU"){
41 net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
42 net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
43 }
44}
45
46void CVObjectDetection::detectObjectsClip(openshot::Clip &video, size_t _start, size_t _end, bool process_interval)
47{
48
49 start = _start; end = _end;
50
51 video.Open();
52
53 if(error){
54 return;
55 }
56
57 processingController->SetError(false, "");
58
59 // Load names of classes
60 std::ifstream ifs(classesFile.c_str());
61 std::string line;
62 while (std::getline(ifs, line)) classNames.push_back(line);
63
64 // Load the network
65 if(classesFile == "" || modelConfiguration == "" || modelWeights == "")
66 return;
67 net = cv::dnn::readNetFromDarknet(modelConfiguration, modelWeights);
68 setProcessingDevice();
69
70 size_t frame_number;
71 if(!process_interval || end <= 1 || end-start == 0){
72 // Get total number of frames in video
73 start = (int)(video.Start() * video.Reader()->info.fps.ToFloat());
74 end = (int)(video.End() * video.Reader()->info.fps.ToFloat());
75 }
76
77 for (frame_number = start; frame_number <= end; frame_number++)
78 {
79 // Stop the feature tracker process
80 if(processingController->ShouldStop()){
81 return;
82 }
83
84 std::shared_ptr<openshot::Frame> f = video.GetFrame(frame_number);
85
86 // Grab OpenCV Mat image
87 cv::Mat cvimage = f->GetImageCV();
88
89 DetectObjects(cvimage, frame_number);
90
91 // Update progress
92 processingController->SetProgress(uint(100*(frame_number-start)/(end-start)));
93
94 }
95}
96
97void CVObjectDetection::DetectObjects(const cv::Mat &frame, size_t frameId){
98 // Get frame as OpenCV Mat
99 cv::Mat blob;
100
101 // Create a 4D blob from the frame.
102 int inpWidth, inpHeight;
103 inpWidth = inpHeight = 416;
104
105 cv::dnn::blobFromImage(frame, blob, 1/255.0, cv::Size(inpWidth, inpHeight), cv::Scalar(0,0,0), true, false);
106
107 //Sets the input to the network
108 net.setInput(blob);
109
110 // Runs the forward pass to get output of the output layers
111 std::vector<cv::Mat> outs;
112 net.forward(outs, getOutputsNames(net));
113
114 // Remove the bounding boxes with low confidence
115 postprocess(frame.size(), outs, frameId);
116
117}
118
119
120// Remove the bounding boxes with low confidence using non-maxima suppression
121void CVObjectDetection::postprocess(const cv::Size &frameDims, const std::vector<cv::Mat>& outs, size_t frameId)
122{
123 std::vector<int> classIds;
124 std::vector<float> confidences;
125 std::vector<cv::Rect> boxes;
126 std::vector<int> objectIds;
127
128 for (size_t i = 0; i < outs.size(); ++i)
129 {
130 // Scan through all the bounding boxes output from the network and keep only the
131 // ones with high confidence scores. Assign the box's class label as the class
132 // with the highest score for the box.
133 float* data = (float*)outs[i].data;
134 for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
135 {
136 cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
137 cv::Point classIdPoint;
138 double confidence;
139 // Get the value and location of the maximum score
140 cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
141 if (confidence > confThreshold)
142 {
143 int centerX = (int)(data[0] * frameDims.width);
144 int centerY = (int)(data[1] * frameDims.height);
145 int width = (int)(data[2] * frameDims.width);
146 int height = (int)(data[3] * frameDims.height);
147 int left = centerX - width / 2;
148 int top = centerY - height / 2;
149
150 classIds.push_back(classIdPoint.x);
151 confidences.push_back((float)confidence);
152 boxes.push_back(cv::Rect(left, top, width, height));
153 }
154 }
155 }
156
157 // Perform non maximum suppression to eliminate redundant overlapping boxes with
158 // lower confidences
159 std::vector<int> indices;
160 cv::dnn::NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
161
162 // Pass boxes to SORT algorithm
163 std::vector<cv::Rect> sortBoxes;
164 for(auto box : boxes)
165 sortBoxes.push_back(box);
166 sort.update(sortBoxes, frameId, sqrt(pow(frameDims.width,2) + pow(frameDims.height, 2)), confidences, classIds);
167
168 // Clear data vectors
169 boxes.clear(); confidences.clear(); classIds.clear(); objectIds.clear();
170 // Get SORT predicted boxes
171 for(auto TBox : sort.frameTrackingResult){
172 if(TBox.frame == frameId){
173 boxes.push_back(TBox.box);
174 confidences.push_back(TBox.confidence);
175 classIds.push_back(TBox.classId);
176 objectIds.push_back(TBox.id);
177 }
178 }
179
180 // Remove boxes based on controids distance
181 for(uint i = 0; i<boxes.size(); i++){
182 for(uint j = i+1; j<boxes.size(); j++){
183 int xc_1 = boxes[i].x + (int)(boxes[i].width/2), yc_1 = boxes[i].y + (int)(boxes[i].width/2);
184 int xc_2 = boxes[j].x + (int)(boxes[j].width/2), yc_2 = boxes[j].y + (int)(boxes[j].width/2);
185
186 if(fabs(xc_1 - xc_2) < 10 && fabs(yc_1 - yc_2) < 10){
187 if(classIds[i] == classIds[j]){
188 if(confidences[i] >= confidences[j]){
189 boxes.erase(boxes.begin() + j);
190 classIds.erase(classIds.begin() + j);
191 confidences.erase(confidences.begin() + j);
192 objectIds.erase(objectIds.begin() + j);
193 break;
194 }
195 else{
196 boxes.erase(boxes.begin() + i);
197 classIds.erase(classIds.begin() + i);
198 confidences.erase(confidences.begin() + i);
199 objectIds.erase(objectIds.begin() + i);
200 i = 0;
201 break;
202 }
203 }
204 }
205 }
206 }
207
208 // Remove boxes based in IOU score
209 for(uint i = 0; i<boxes.size(); i++){
210 for(uint j = i+1; j<boxes.size(); j++){
211
212 if( iou(boxes[i], boxes[j])){
213 if(classIds[i] == classIds[j]){
214 if(confidences[i] >= confidences[j]){
215 boxes.erase(boxes.begin() + j);
216 classIds.erase(classIds.begin() + j);
217 confidences.erase(confidences.begin() + j);
218 objectIds.erase(objectIds.begin() + j);
219 break;
220 }
221 else{
222 boxes.erase(boxes.begin() + i);
223 classIds.erase(classIds.begin() + i);
224 confidences.erase(confidences.begin() + i);
225 objectIds.erase(objectIds.begin() + i);
226 i = 0;
227 break;
228 }
229 }
230 }
231 }
232 }
233
234 // Normalize boxes coordinates
235 std::vector<cv::Rect_<float>> normalized_boxes;
236 for(auto box : boxes){
237 cv::Rect_<float> normalized_box;
238 normalized_box.x = (box.x)/(float)frameDims.width;
239 normalized_box.y = (box.y)/(float)frameDims.height;
240 normalized_box.width = (box.width)/(float)frameDims.width;
241 normalized_box.height = (box.height)/(float)frameDims.height;
242 normalized_boxes.push_back(normalized_box);
243 }
244
245 detectionsData[frameId] = CVDetectionData(classIds, confidences, normalized_boxes, frameId, objectIds);
246}
247
248// Compute IOU between 2 boxes
249bool CVObjectDetection::iou(cv::Rect pred_box, cv::Rect sort_box){
250 // Determine the (x, y)-coordinates of the intersection rectangle
251 int xA = std::max(pred_box.x, sort_box.x);
252 int yA = std::max(pred_box.y, sort_box.y);
253 int xB = std::min(pred_box.x + pred_box.width, sort_box.x + sort_box.width);
254 int yB = std::min(pred_box.y + pred_box.height, sort_box.y + sort_box.height);
255
256 // Compute the area of intersection rectangle
257 int interArea = std::max(0, xB - xA + 1) * std::max(0, yB - yA + 1);
258
259 // Compute the area of both the prediction and ground-truth rectangles
260 int boxAArea = (pred_box.width + 1) * (pred_box.height + 1);
261 int boxBArea = (sort_box.width + 1) * (sort_box.height + 1);
262
263 // Compute the intersection over union by taking the intersection
264 float iou = interArea / (float)(boxAArea + boxBArea - interArea);
265
266 // If IOU is above this value the boxes are very close (probably a variation of the same bounding box)
267 if(iou > 0.5)
268 return true;
269 return false;
270}
271
272// Get the names of the output layers
273std::vector<cv::String> CVObjectDetection::getOutputsNames(const cv::dnn::Net& net)
274{
275 static std::vector<cv::String> names;
276
277 //Get the indices of the output layers, i.e. the layers with unconnected outputs
278 std::vector<int> outLayers = net.getUnconnectedOutLayers();
279
280 //get the names of all the layers in the network
281 std::vector<cv::String> layersNames = net.getLayerNames();
282
283 // Get the names of the output layers in names
284 names.resize(outLayers.size());
285 for (size_t i = 0; i < outLayers.size(); ++i)
286 names[i] = layersNames[outLayers[i] - 1];
287 return names;
288}
289
291 // Check if the stabilizer info for the requested frame exists
292 if ( detectionsData.find(frameId) == detectionsData.end() ) {
293
294 return CVDetectionData();
295 } else {
296
297 return detectionsData[frameId];
298 }
299}
300
302 // Create tracker message
303 pb_objdetect::ObjDetect objMessage;
304
305 //Save class names in protobuf message
306 for(int i = 0; i<classNames.size(); i++){
307 std::string* className = objMessage.add_classnames();
308 className->assign(classNames.at(i));
309 }
310
311 // Iterate over all frames data and save in protobuf message
312 for(std::map<size_t,CVDetectionData>::iterator it=detectionsData.begin(); it!=detectionsData.end(); ++it){
313 CVDetectionData dData = it->second;
314 pb_objdetect::Frame* pbFrameData;
315 AddFrameDataToProto(objMessage.add_frame(), dData);
316 }
317
318 // Add timestamp
319 *objMessage.mutable_last_updated() = TimeUtil::SecondsToTimestamp(time(NULL));
320
321 {
322 // Write the new message to disk.
323 std::fstream output(protobuf_data_path, ios::out | ios::trunc | ios::binary);
324 if (!objMessage.SerializeToOstream(&output)) {
325 cerr << "Failed to write protobuf message." << endl;
326 return false;
327 }
328 }
329
330 // Delete all global objects allocated by libprotobuf.
331 google::protobuf::ShutdownProtobufLibrary();
332
333 return true;
334
335}
336
337// Add frame object detection into protobuf message.
338void CVObjectDetection::AddFrameDataToProto(pb_objdetect::Frame* pbFrameData, CVDetectionData& dData) {
339
340 // Save frame number and rotation
341 pbFrameData->set_id(dData.frameId);
342
343 for(size_t i = 0; i < dData.boxes.size(); i++){
344 pb_objdetect::Frame_Box* box = pbFrameData->add_bounding_box();
345
346 // Save bounding box data
347 box->set_x(dData.boxes.at(i).x);
348 box->set_y(dData.boxes.at(i).y);
349 box->set_w(dData.boxes.at(i).width);
350 box->set_h(dData.boxes.at(i).height);
351 box->set_classid(dData.classIds.at(i));
352 box->set_confidence(dData.confidences.at(i));
353 box->set_objectid(dData.objectIds.at(i));
354
355 }
356}
357
358// Load JSON string into this object
359void CVObjectDetection::SetJson(const std::string value) {
360 // Parse JSON string into JSON objects
361 try
362 {
363 const Json::Value root = openshot::stringToJson(value);
364 // Set all values that match
365
366 SetJsonValue(root);
367 }
368 catch (const std::exception& e)
369 {
370 // Error parsing JSON (or missing keys)
371 // throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
372 std::cout<<"JSON is invalid (missing keys or invalid data types)"<<std::endl;
373 }
374}
375
376// Load Json::Value into this object
377void CVObjectDetection::SetJsonValue(const Json::Value root) {
378
379 // Set data from Json (if key is found)
380 if (!root["protobuf_data_path"].isNull()){
381 protobuf_data_path = (root["protobuf_data_path"].asString());
382 }
383 if (!root["processing-device"].isNull()){
384 processingDevice = (root["processing-device"].asString());
385 }
386 if (!root["model-config"].isNull()){
387 modelConfiguration = (root["model-config"].asString());
388 std::ifstream infile(modelConfiguration);
389 if(!infile.good()){
390 processingController->SetError(true, "Incorrect path to model config file");
391 error = true;
392 }
393
394 }
395 if (!root["model-weights"].isNull()){
396 modelWeights= (root["model-weights"].asString());
397 std::ifstream infile(modelWeights);
398 if(!infile.good()){
399 processingController->SetError(true, "Incorrect path to model weight file");
400 error = true;
401 }
402
403 }
404 if (!root["class-names"].isNull()){
405 classesFile = (root["class-names"].asString());
406
407 std::ifstream infile(classesFile);
408 if(!infile.good()){
409 processingController->SetError(true, "Incorrect path to class name file");
410 error = true;
411 }
412
413 }
414}
415
416/*
417||||||||||||||||||||||||||||||||||||||||||||||||||
418 ONLY FOR MAKE TEST
419||||||||||||||||||||||||||||||||||||||||||||||||||
420*/
421
422// Load protobuf data file
424 // Create tracker message
425 pb_objdetect::ObjDetect objMessage;
426
427 {
428 // Read the existing tracker message.
429 fstream input(protobuf_data_path, ios::in | ios::binary);
430 if (!objMessage.ParseFromIstream(&input)) {
431 cerr << "Failed to parse protobuf message." << endl;
432 return false;
433 }
434 }
435
436 // Make sure classNames and detectionsData are empty
437 classNames.clear(); detectionsData.clear();
438
439 // Get all classes names and assign a color to them
440 for(int i = 0; i < objMessage.classnames_size(); i++){
441 classNames.push_back(objMessage.classnames(i));
442 }
443
444 // Iterate over all frames of the saved message
445 for (size_t i = 0; i < objMessage.frame_size(); i++) {
446 // Create protobuf message reader
447 const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
448
449 // Get frame Id
450 size_t id = pbFrameData.id();
451
452 // Load bounding box data
453 const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
454
455 // Construct data vectors related to detections in the current frame
456 std::vector<int> classIds;
457 std::vector<float> confidences;
458 std::vector<cv::Rect_<float>> boxes;
459 std::vector<int> objectIds;
460
461 for(int i = 0; i < pbFrameData.bounding_box_size(); i++){
462 // Get bounding box coordinates
463 float x = pBox.Get(i).x(); float y = pBox.Get(i).y();
464 float w = pBox.Get(i).w(); float h = pBox.Get(i).h();
465 // Create OpenCV rectangle with the bouding box info
466 cv::Rect_<float> box(x, y, w, h);
467
468 // Get class Id (which will be assign to a class name) and prediction confidence
469 int classId = pBox.Get(i).classid(); float confidence = pBox.Get(i).confidence();
470 // Get object Id
471 int objectId = pBox.Get(i).objectid();
472
473 // Push back data into vectors
474 boxes.push_back(box); classIds.push_back(classId); confidences.push_back(confidence);
475 }
476
477 // Assign data to object detector map
478 detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id, objectIds);
479 }
480
481 // Delete all global objects allocated by libprotobuf.
482 google::protobuf::ShutdownProtobufLibrary();
483
484 return true;
485}
Header file for CVObjectDetection class.
Header file for all Exception classes.
void SetError(bool err, std::string message)
std::vector< TrackingBox > frameTrackingResult
Definition: sort.hpp:56
void update(std::vector< cv::Rect > detection, int frame_count, double image_diagonal, std::vector< float > confidences, std::vector< int > classIds)
Definition: sort.cpp:45
void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
void detectObjectsClip(openshot::Clip &video, size_t start=0, size_t end=0, bool process_interval=false)
void AddFrameDataToProto(pb_objdetect::Frame *pbFrameData, CVDetectionData &dData)
CVDetectionData GetDetectionData(size_t frameId)
std::map< size_t, CVDetectionData > detectionsData
void SetJson(const std::string value)
Load JSON string into this object.
bool SaveObjDetectedData()
Protobuf Save and Load methods.
float Start() const
Get start position (in seconds) of clip (trim start of video)
Definition: ClipBase.h:88
This class represents a clip (used to arrange readers on the timeline)
Definition: Clip.h:89
void Open() override
Open the internal reader.
Definition: Clip.cpp:318
float End() const override
Get end position (in seconds) of clip (trim end of video), which can be affected by the time curve.
Definition: Clip.cpp:354
std::shared_ptr< openshot::Frame > GetFrame(int64_t clip_frame_number) override
Get an openshot::Frame object for a specific frame number of this clip. The image size and number of ...
Definition: Clip.cpp:389
void Reader(openshot::ReaderBase *new_reader)
Set the current reader.
Definition: Clip.cpp:272
This namespace is the default namespace for all code in the openshot library.
Definition: Compressor.h:29
const Json::Value stringToJson(const std::string value)
Definition: Json.cpp:16
std::vector< int > objectIds
std::vector< cv::Rect_< float > > boxes
std::vector< int > classIds
std::vector< float > confidences