|
| 1 | +// License: Apache 2.0. See LICENSE file in root directory. |
| 2 | +// Copyright(c) 2019 Intel Corporation. All Rights Reserved. |
| 3 | + |
| 4 | +#include <librealsense2/rs.hpp> // Include RealSense Cross Platform API |
| 5 | + |
| 6 | +#include "cv-helpers.hpp" // frame_to_mat |
| 7 | +#include <opencv2/core/utils/filesystem.hpp> // glob |
| 8 | +namespace fs = cv::utils::fs; |
| 9 | + |
| 10 | +#include <rs-vino/face-detection.h> |
| 11 | +#include <rs-vino/detected-face.h> |
| 12 | + |
| 13 | +#include <easylogging++.h> |
| 14 | +INITIALIZE_EASYLOGGINGPP |
| 15 | + |
| 16 | +#include <rs-vino/openvino-helpers.h> |
| 17 | +namespace openvino = InferenceEngine; |
| 18 | + |
| 19 | +#include <chrono> |
| 20 | +using namespace std::chrono; |
| 21 | + |
| 22 | + |
| 23 | +/* |
| 24 | + Enable loading multiple detectors at once, so we can switch at runtime. |
| 25 | + Each detector has its associated labels. |
| 26 | +*/ |
| 27 | +struct detector_and_labels |
| 28 | +{ |
| 29 | + std::shared_ptr< openvino_helpers::face_detection > detector; |
| 30 | + std::vector< std::string > labels; |
| 31 | + |
| 32 | + detector_and_labels( std::string const & path_to_xml ) |
| 33 | + : detector( std::make_shared< openvino_helpers::face_detection >( path_to_xml, 0.5 ) ) |
| 34 | + { |
| 35 | + } |
| 36 | + |
| 37 | + openvino_helpers::face_detection * operator->() { return detector.get(); } |
| 38 | + |
| 39 | + void load_labels() |
| 40 | + { |
| 41 | + try |
| 42 | + { |
| 43 | + labels = openvino_helpers::read_labels( openvino_helpers::remove_ext( detector->pathToModel ) + ".labels" ); |
| 44 | + } |
| 45 | + catch( const std::exception & e ) |
| 46 | + { |
| 47 | + // If we have no labels, warn and continue... we can continue without them |
| 48 | + LOG(WARNING) << "Failed to load labels: " << e.what(); |
| 49 | + } |
| 50 | + } |
| 51 | +}; |
| 52 | + |
| 53 | + |
| 54 | +/* |
| 55 | + Populate a collection of detectors from those we find on disk (*.xml), load |
| 56 | + their labels, add them to the engine & device, etc. |
| 57 | +
|
| 58 | + The detectors are loaded with all default values. |
| 59 | +*/ |
| 60 | +void load_detectors_into( |
| 61 | + std::vector< detector_and_labels > & detectors, |
| 62 | + openvino::Core & engine, |
| 63 | + std::string const & device_name |
| 64 | +) |
| 65 | +{ |
| 66 | + std::vector< std::string > xmls; |
| 67 | + fs::glob_relative( ".", "*.xml", xmls ); |
| 68 | + for( auto path_to_xml : xmls ) |
| 69 | + { |
| 70 | + detector_and_labels detector { path_to_xml }; |
| 71 | + try |
| 72 | + { |
| 73 | + detector->load_into( engine, device_name ); // May throw! |
| 74 | + detector.load_labels(); |
| 75 | + detectors.push_back( detector ); |
| 76 | + LOG(INFO) << " ... press '" << char( '0' + detectors.size() ) << "' to switch to it"; |
| 77 | + } |
| 78 | + catch( const std::exception & e ) |
| 79 | + { |
| 80 | + // The model files should have been downloaded automatically by CMake into build/wrappers/openvino/dnn, |
| 81 | + // which is also where Visual Studio runs the sample from. However, you may need to copy these files: |
| 82 | + // *.bin |
| 83 | + // *.xml |
| 84 | + // *.labels [optional] |
| 85 | + // Into the local directory where you run from (or change the path given in the ctor above) |
| 86 | + LOG(ERROR) << "Failed to load model: " << e.what(); |
| 87 | + } |
| 88 | + } |
| 89 | +} |
| 90 | + |
| 91 | + |
| 92 | +int main(int argc, char * argv[]) try |
| 93 | +{ |
| 94 | + el::Configurations conf; |
| 95 | + conf.set( el::Level::Global, el::ConfigurationType::Format, "[%level] %msg" ); |
| 96 | + //conf.set( el::Level::Debug, el::ConfigurationType::Enabled, "false" ); |
| 97 | + el::Loggers::reconfigureLogger( "default", conf ); |
| 98 | + rs2::log_to_console( RS2_LOG_SEVERITY_WARN ); // only warnings (and above) should come through |
| 99 | + |
| 100 | + // Declare RealSense pipeline, encapsulating the actual device and sensors |
| 101 | + rs2::pipeline pipe; |
| 102 | + pipe.start(); |
| 103 | + rs2::align align_to( RS2_STREAM_COLOR ); |
| 104 | + |
| 105 | + // Start the inference engine, needed to accomplish anything. We also add a CPU extension, allowing |
| 106 | + // us to run the inference on the CPU. A GPU solution may be possible but, at least without a GPU, |
| 107 | + // a CPU-bound process is faster. To change to GPU, use "GPU" instead (and disable the exception): |
| 108 | + openvino::Core engine; |
| 109 | + openvino_helpers::error_listener error_listener; |
| 110 | + engine.SetLogCallback( error_listener ); |
| 111 | + std::string const device_name { "CPU" }; |
| 112 | + engine.AddExtension( std::make_shared< openvino::Extensions::Cpu::CpuExtensions >(), device_name ); |
| 113 | + |
| 114 | + std::vector< detector_and_labels > detectors; |
| 115 | + load_detectors_into( detectors, engine, device_name ); |
| 116 | + if( detectors.empty() ) |
| 117 | + { |
| 118 | + LOG(ERROR) << "No detectors available in: " << fs::getcwd(); |
| 119 | + return EXIT_FAILURE; |
| 120 | + } |
| 121 | + // Look for the mobilenet-ssd so it always starts the same... otherwise default to the first detector we found |
| 122 | + size_t current_detector = 0; |
| 123 | + for( size_t i = 1; i < detectors.size(); ++i ) |
| 124 | + { |
| 125 | + if( detectors[i]->pathToModel == "mobilenet-ssd.xml" ) |
| 126 | + { |
| 127 | + current_detector = i; |
| 128 | + break; |
| 129 | + } |
| 130 | + } |
| 131 | + auto p_detector = detectors[current_detector].detector; |
| 132 | + LOG(INFO) << "Current detector set to (" << current_detector+1 << ") \"" << openvino_helpers::remove_ext( p_detector->pathToModel ) << "\""; |
| 133 | + auto p_labels = &detectors[current_detector].labels; |
| 134 | + |
| 135 | + const auto window_name = "OpenVINO DNN sample"; |
| 136 | + cv::namedWindow( window_name, cv::WINDOW_AUTOSIZE ); |
| 137 | + |
| 138 | + cv::Mat prev_image; |
| 139 | + openvino_helpers::detected_faces faces; |
| 140 | + size_t id = 0; |
| 141 | + uint64 last_frame_number = 0; |
| 142 | + high_resolution_clock::time_point switch_time = high_resolution_clock::now(); |
| 143 | + |
| 144 | + while( cv::getWindowProperty( window_name, cv::WND_PROP_AUTOSIZE ) >= 0 ) |
| 145 | + { |
| 146 | + // Wait for the next set of frames |
| 147 | + auto frames = pipe.wait_for_frames(); |
| 148 | + // Make sure the frames are spatially aligned |
| 149 | + frames = align_to.process( frames ); |
| 150 | + |
| 151 | + auto color_frame = frames.get_color_frame(); |
| 152 | + auto depth_frame = frames.get_depth_frame(); |
| 153 | + |
| 154 | + // If we only received a new depth frame, but the color did not update, continue |
| 155 | + if( color_frame.get_frame_number() == last_frame_number ) |
| 156 | + continue; |
| 157 | + last_frame_number = color_frame.get_frame_number(); |
| 158 | + |
| 159 | + auto image = frame_to_mat( color_frame ); |
| 160 | + |
| 161 | + // We process the previous frame so if this is our first then queue it and continue |
| 162 | + if( ! p_detector->_request ) |
| 163 | + { |
| 164 | + p_detector->enqueue( image ); |
| 165 | + p_detector->submit_request(); |
| 166 | + prev_image = image; |
| 167 | + continue; |
| 168 | + } |
| 169 | + |
| 170 | + // Wait for the results of the previous frame we enqueued: we're going to process these |
| 171 | + p_detector->wait(); |
| 172 | + auto const results = p_detector->fetch_results(); |
| 173 | + |
| 174 | + // Enqueue the current frame so we'd get the results when the next frame comes along! |
| 175 | + p_detector->enqueue( image ); |
| 176 | + p_detector->submit_request(); |
| 177 | + |
| 178 | + openvino_helpers::detected_faces prev_faces { std::move( faces ) }; |
| 179 | + faces.clear(); |
| 180 | + for( size_t i = 0; i < results.size(); ++i ) |
| 181 | + { |
| 182 | + auto const & result = results[i]; |
| 183 | + if( result.label <= 0 ) |
| 184 | + continue; // ignore "background", though not clear why we'd get it |
| 185 | + cv::Rect rect = result.location; |
| 186 | + rect = rect & cv::Rect( 0, 0, image.cols, image.rows ); |
| 187 | + auto face_ptr = openvino_helpers::find_face( rect, prev_faces ); |
| 188 | + if( !face_ptr ) |
| 189 | + { |
| 190 | + // New face |
| 191 | + std::string label; |
| 192 | + if( result.label < p_labels->size() ) |
| 193 | + label = (*p_labels)[result.label]; |
| 194 | + face_ptr = std::make_shared< openvino_helpers::detected_face >( id++, label, rect ); |
| 195 | + } |
| 196 | + else |
| 197 | + { |
| 198 | + // Existing face; just update its parameters |
| 199 | + face_ptr->move( rect ); |
| 200 | + } |
| 201 | + faces.push_back( face_ptr ); |
| 202 | + } |
| 203 | + |
| 204 | + // Keep this image so we can actually process pieces of it once we have the results |
| 205 | + prev_image = image; |
| 206 | + |
| 207 | + // Display the results (from the last frame) as rectangles on top (of the current frame) |
| 208 | + for( auto && face : faces ) |
| 209 | + { |
| 210 | + cv::Scalar green( 0, 255, 0 ); // BGR |
| 211 | + auto r = face->get_location(); |
| 212 | + cv::rectangle( image, r, green ); |
| 213 | + |
| 214 | + // Output the distance to the center |
| 215 | + auto center_x = (r.x + r.width / 2) * depth_frame.get_width() / color_frame.get_width(); |
| 216 | + auto center_y = (r.y + r.height / 2) * depth_frame.get_height() / color_frame.get_height(); |
| 217 | + auto d = depth_frame.get_distance( center_x, center_y ); |
| 218 | + if( d ) |
| 219 | + { |
| 220 | + std::ostringstream ss; |
| 221 | + ss << face->get_label() << " "; |
| 222 | + ss << std::setprecision( 2 ) << d; |
| 223 | + ss << " meters away"; |
| 224 | + cv::Scalar white( 255, 255, 255 ); // BGR |
| 225 | + cv::putText( image, ss.str(), cv::Point( r.x + 5, r.y + r.height - 5 ), cv::FONT_HERSHEY_SIMPLEX, 0.4, white ); |
| 226 | + } |
| 227 | + } |
| 228 | + |
| 229 | + // Show the current detector number as an overlay over the image for 1 second |
| 230 | + double alpha = std::max( 0LL, 1000 - duration_cast<milliseconds>(high_resolution_clock::now() - switch_time).count() ) / 1000.; |
| 231 | + std::string str( 1, char( '1' + current_detector )); |
| 232 | + auto size = cv::getTextSize( str, cv::FONT_HERSHEY_SIMPLEX, 3, 1, nullptr ); |
| 233 | + cv::Point center { image.cols / 2, image.rows / 2 }; |
| 234 | + cv::Rect r { center.x - size.width, center.y - size.height, size.width * 2, size.height * 2 }; |
| 235 | + cv::Mat roi = image( r ); |
| 236 | + cv::Mat overlay( roi.size(), CV_8UC3, cv::Scalar( 32, 32, 32 ) ); |
| 237 | + cv::putText( overlay, str, cv::Point { r.width / 2 - size.width / 2, r.height / 2 + size.height / 2 }, cv::FONT_HERSHEY_SIMPLEX, 3, cv::Scalar { 255, 255, 255 } ); |
| 238 | + cv::addWeighted( overlay, alpha, roi, 1 - alpha, 0, roi ); // roi = overlay * alpha + roi * (1-alpha) + 0 |
| 239 | + |
| 240 | + imshow( window_name, image ); |
| 241 | + const int key = cv::waitKey( 1 ); |
| 242 | + if( key == 27 ) |
| 243 | + break; // escape |
| 244 | + if( key >= '1' && key < '1' + detectors.size() ) |
| 245 | + { |
| 246 | + size_t detector_index = key - '1'; |
| 247 | + if( detector_index != current_detector ) |
| 248 | + { |
| 249 | + current_detector = detector_index; |
| 250 | + p_detector = detectors[current_detector].detector; |
| 251 | + p_labels = &detectors[current_detector].labels; |
| 252 | + faces.clear(); |
| 253 | + LOG(INFO) << "Current detector set to (" << current_detector+1 << ") \"" << openvino_helpers::remove_ext( p_detector->pathToModel ) << "\""; |
| 254 | + } |
| 255 | + switch_time = high_resolution_clock::now(); |
| 256 | + } |
| 257 | + } |
| 258 | + |
| 259 | + return EXIT_SUCCESS; |
| 260 | +} |
| 261 | +catch (const rs2::error & e) |
| 262 | +{ |
| 263 | + LOG(ERROR) << "Caught RealSense exception from " << e.get_failed_function() << "(" << e.get_failed_args() << "):\n " << e.what(); |
| 264 | + return EXIT_FAILURE; |
| 265 | +} |
| 266 | +catch (const std::exception& e) |
| 267 | +{ |
| 268 | + LOG(ERROR) << "Unknown exception caught: " << e.what(); |
| 269 | + return EXIT_FAILURE; |
| 270 | +} |
| 271 | + |
0 commit comments