Skip to content

Commit fc98a68

Browse files
committed
Add OpenVINO 'rs-dnn-vino' sample
* Uses same model as rs-dnn, for comparison * Can load multiple models and switch on the fly (press '1' for first, '2', etc.) * Revised download mechanism errors in openvino/CMakeLists.txt
1 parent 2bed2dd commit fc98a68

File tree

7 files changed

+393
-9
lines changed

7 files changed

+393
-9
lines changed

examples/readme.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ For a detailed explanations and API documentation see our [Documentation](../doc
3333
|[Apriltag Pose](./pose-apriltag)|C++|Demonstrates how to compute [Apriltag](https://github.com/AprilRobotics/apriltag/tree/3.1.1) pose from T265 fisheye image stream. | :star::star: |[![Motion Tracking - T260 and SLAM](https://img.shields.io/badge/-Tracking-0e2356.svg)](../doc/t265.md#examples-and-tools)|
3434
|[AR-Basic](./ar-basic)|C++|Shows how to use pose and fisheye frames to display a simple virtual object on the fisheye image | :star::star: |[![Motion Tracking - T260 and SLAM](https://img.shields.io/badge/-Tracking-0e2356.svg)](../doc/t265.md#examples-and-tools)|
3535
|[DNN](../wrappers/opencv/dnn)| C++ & [OpenCV](https://github.com/IntelRealSense/librealsense/tree/master/wrappers/opencv#getting-started) | Intel RealSense camera used for real-time object-detection | :star::star: | [![Depth Sensing - Structured Light, Stereo and L500](https://img.shields.io/badge/-Depth-5bc3ff.svg)](./depth.md) |
36+
|[DNN](../wrappers/openvino/dnn)| C++ & [OpenVINO](https://github.com/IntelRealSense/librealsense/tree/master/wrappers/openvino) | Intel RealSense camera used for real-time object-detection | :star::star: | [![Depth Sensing - Structured Light, Stereo and L500](https://img.shields.io/badge/-Depth-5bc3ff.svg)](./depth.md) |
3637
|[Tracking and Depth](./tracking-and-depth)| C++ | Shows how to use the tracking camera T265 together with a depth camera to display a 3D pointcloud with respect to a static reference frame | :star::star: | [![Depth Sensing - Structured Light, Stereo and L500](https://img.shields.io/badge/-Depth-5bc3ff.svg)](./depth.md) [![Motion Tracking - T260 and SLAM](https://img.shields.io/badge/-Tracking-0e2356.svg)](../doc/t265.md#examples-and-tools)
3738
|[Trajectory](./trajectory)| C++ | Shows how to calculate and render 3D trajectory based on pose data from a tracking camera | :star::star::star: | [![Motion Tracking - T260 and SLAM](https://img.shields.io/badge/-Tracking-0e2356.svg)](../doc/t265.md#examples-and-tools)
3839
|[Software Device](./software-device)| C++ | Shows how to create a custom `rs2::device` | :star::star::star: | [![Depth Sensing - Structured Light, Stereo and L500](https://img.shields.io/badge/-Depth-5bc3ff.svg)](./depth.md) [![Motion Tracking - T260 and SLAM](https://img.shields.io/badge/-Tracking-0e2356.svg)](../doc/t265.md#examples-and-tools) |

wrappers/openvino/CMakeLists.txt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,9 @@ function(dl_vino_model filename sha1)
5252
file(DOWNLOAD "${OPENVINO_MODEL_SRC_URL}/${filename}" "${path}"
5353
EXPECTED_HASH SHA1=${sha1}
5454
STATUS status)
55-
list(GET status 0 error_code)
56-
if (NOT ${error_code} EQUAL 0)
57-
message(FATAL_ERROR "Error (${status}) downloading: ${OPENVINO_MODEL_SRC_URL}/${filename}")
58-
endif()
5955
endif()
6056
endfunction()
6157

6258
# List all the specific examples for OpenVINO
6359
add_subdirectory(face)
60+
add_subdirectory(dnn)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
cmake_minimum_required(VERSION 3.1.0)
2+
3+
project(RealSenseVinoDnnExample)
4+
5+
6+
# OpenCV is required here
7+
if(NOT DEFINED OpenCV_DIR OR NOT IS_DIRECTORY ${OpenCV_DIR})
8+
set(OpenCV_DIR "" CACHE PATH "The path to the OpenCV Toolkit build directory")
9+
message( FATAL_ERROR "OpenVINO examples require OpenCV; specify OpenCV_DIR" )
10+
endif()
11+
find_package(OpenCV REQUIRED)
12+
get_property(deps VARIABLE PROPERTY DEPENDENCIES)
13+
set(DEPENDENCIES ${deps} ${OpenCV_LIBS})
14+
include_directories( ../../opencv )
15+
16+
17+
add_executable(rs-dnn-vino
18+
rs-dnn-vino.cpp
19+
${OPENVINO_FILES}
20+
${ELPP_FILES}
21+
)
22+
23+
source_group("OpenVINO" FILES ${OPENVINO_FILES})
24+
source_group("EasyLogging++" FILES ${ELPP_FILES})
25+
26+
set_property(TARGET rs-dnn-vino PROPERTY CXX_STANDARD 11)
27+
target_link_libraries(rs-dnn-vino ${DEPENDENCIES})
28+
set_target_properties (rs-dnn-vino PROPERTIES
29+
FOLDER "Examples/openvino"
30+
)
31+
32+
install(
33+
TARGETS
34+
35+
rs-dnn-vino
36+
37+
RUNTIME DESTINATION
38+
${CMAKE_INSTALL_PREFIX}/bin
39+
)
40+
41+
42+
# Download model files -- these will go into build/wrappers/openvino/dnn, which is also where the sample
43+
# is run from in Visual Studio
44+
dl_vino_model( "README.txt" "789e144d6cafa379c8437d2a6860829b6d935a8d" )
45+
dl_vino_model( "mobilenet-ssd.xml" "483ba684dd53ba138550377075e928b2008418bb" )
46+
dl_vino_model( "mobilenet-ssd.bin" "3b687f0b9519b0d296c23d5f038a8623b5aeb49b" )
47+
dl_vino_model( "mobilenet-ssd.labels" "b11e85afe8cff3172ad9b62d904bbc8013b3ddbf" )

wrappers/openvino/dnn/readme.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# rs-dnn-vino Sample
2+
3+
## Overview
4+
This example demonstrates OpenVINO™ toolkit integration with object detection, using
5+
basic depth information to approximate distance.
6+
7+
<p align="center"><img src="rs-dnn-vino.jpg" alt="screenshot"/></p>
8+
9+
The same exact neural network is used here as in the OpenCV DNN sample, for
10+
comparison.
11+
12+
## Implementation
13+
14+
This sample makes use of OpenCV.
15+
16+
Though we are detecting general objects in the neural network, the inputs and
17+
outputs are the same as those for facial detections. We can therefore reuse the
18+
`openvino_helpers::face_detection` code we used in the [rs-face-vino example](../face).
19+
20+
There is a single trained model with two Intermediate Representation files
21+
(`mobilenet-ssd.xml` and `.bin`) provided with the sample. The sample
22+
will, however, load any model present in the current directory and is able to
23+
switch between them at runtime, allowing some experimentation.
24+
25+
> The `face_detection` class does have requirements from the model: it is
26+
> expected to have **a single input and output!!!** (bounding box, classification,
27+
> confidence, etc.), and will be rejected otherwise.
28+
29+
> You can see the inputs and outputs of a model listed in the .xml file. Search
30+
> for a layer with `type="Input"` to find the inputs. Similarly, the expected
31+
> output layer is of `type="DetectionOutput"`.
32+
33+
> Some neural networks (e.g., the version of Faster R-CNN available with
34+
> OpenVINO) have two inputs, adding an additional layer for more information. It
35+
> is fairly simple to make `face_detection` provide this information and work
36+
> with two inputs, though it fails at this time.
37+
38+
Each model can optionally provide a `.labels` classification file to help map
39+
the output "label" integer into a human-recognizable name such as "person",
40+
"bottle", etc.
41+
These are not provided by the OpenVINO model zoo and need to be created
42+
manually according to the classes used when training the model.
43+
See the format in `mobilenet-ssd.labels` for an example: one line per
44+
classification, starting at 0 (which is expected to be the background).
45+
46+
## Speed
47+
48+
The MobileNet models are intended for use on mobile devices and so their
49+
performance is high and they are suitable for use on the CPU. More advanced
50+
models can be more accurate or provide better classification but may require
51+
acceleration using a GPU or other device.
Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
// License: Apache 2.0. See LICENSE file in root directory.
2+
// Copyright(c) 2019 Intel Corporation. All Rights Reserved.
3+
4+
#include <librealsense2/rs.hpp> // Include RealSense Cross Platform API
5+
6+
#include "cv-helpers.hpp" // frame_to_mat
7+
#include <opencv2/core/utils/filesystem.hpp> // glob
8+
namespace fs = cv::utils::fs;
9+
10+
#include <rs-vino/face-detection.h>
11+
#include <rs-vino/detected-face.h>
12+
13+
#include <easylogging++.h>
14+
INITIALIZE_EASYLOGGINGPP
15+
16+
#include <rs-vino/openvino-helpers.h>
17+
namespace openvino = InferenceEngine;
18+
19+
#include <chrono>
20+
using namespace std::chrono;
21+
22+
23+
/*
24+
Enable loading multiple detectors at once, so we can switch at runtime.
25+
Each detector has its associated labels.
26+
*/
27+
struct detector_and_labels
28+
{
29+
std::shared_ptr< openvino_helpers::face_detection > detector;
30+
std::vector< std::string > labels;
31+
32+
detector_and_labels( std::string const & path_to_xml )
33+
: detector( std::make_shared< openvino_helpers::face_detection >( path_to_xml, 0.5 ) )
34+
{
35+
}
36+
37+
openvino_helpers::face_detection * operator->() { return detector.get(); }
38+
39+
void load_labels()
40+
{
41+
try
42+
{
43+
labels = openvino_helpers::read_labels( openvino_helpers::remove_ext( detector->pathToModel ) + ".labels" );
44+
}
45+
catch( const std::exception & e )
46+
{
47+
// If we have no labels, warn and continue... we can continue without them
48+
LOG(WARNING) << "Failed to load labels: " << e.what();
49+
}
50+
}
51+
};
52+
53+
54+
/*
55+
Populate a collection of detectors from those we find on disk (*.xml), load
56+
their labels, add them to the engine & device, etc.
57+
58+
The detectors are loaded with all default values.
59+
*/
60+
void load_detectors_into(
61+
std::vector< detector_and_labels > & detectors,
62+
openvino::Core & engine,
63+
std::string const & device_name
64+
)
65+
{
66+
std::vector< std::string > xmls;
67+
fs::glob_relative( ".", "*.xml", xmls );
68+
for( auto path_to_xml : xmls )
69+
{
70+
detector_and_labels detector { path_to_xml };
71+
try
72+
{
73+
detector->load_into( engine, device_name ); // May throw!
74+
detector.load_labels();
75+
detectors.push_back( detector );
76+
LOG(INFO) << " ... press '" << char( '0' + detectors.size() ) << "' to switch to it";
77+
}
78+
catch( const std::exception & e )
79+
{
80+
// The model files should have been downloaded automatically by CMake into build/wrappers/openvino/dnn,
81+
// which is also where Visual Studio runs the sample from. However, you may need to copy these files:
82+
// *.bin
83+
// *.xml
84+
// *.labels [optional]
85+
// Into the local directory where you run from (or change the path given in the ctor above)
86+
LOG(ERROR) << "Failed to load model: " << e.what();
87+
}
88+
}
89+
}
90+
91+
92+
int main(int argc, char * argv[]) try
93+
{
94+
el::Configurations conf;
95+
conf.set( el::Level::Global, el::ConfigurationType::Format, "[%level] %msg" );
96+
//conf.set( el::Level::Debug, el::ConfigurationType::Enabled, "false" );
97+
el::Loggers::reconfigureLogger( "default", conf );
98+
rs2::log_to_console( RS2_LOG_SEVERITY_WARN ); // only warnings (and above) should come through
99+
100+
// Declare RealSense pipeline, encapsulating the actual device and sensors
101+
rs2::pipeline pipe;
102+
pipe.start();
103+
rs2::align align_to( RS2_STREAM_COLOR );
104+
105+
// Start the inference engine, needed to accomplish anything. We also add a CPU extension, allowing
106+
// us to run the inference on the CPU. A GPU solution may be possible but, at least without a GPU,
107+
// a CPU-bound process is faster. To change to GPU, use "GPU" instead (and disable the exception):
108+
openvino::Core engine;
109+
openvino_helpers::error_listener error_listener;
110+
engine.SetLogCallback( error_listener );
111+
std::string const device_name { "CPU" };
112+
engine.AddExtension( std::make_shared< openvino::Extensions::Cpu::CpuExtensions >(), device_name );
113+
114+
std::vector< detector_and_labels > detectors;
115+
load_detectors_into( detectors, engine, device_name );
116+
if( detectors.empty() )
117+
{
118+
LOG(ERROR) << "No detectors available in: " << fs::getcwd();
119+
return EXIT_FAILURE;
120+
}
121+
// Look for the mobilenet-ssd so it always starts the same... otherwise default to the first detector we found
122+
size_t current_detector = 0;
123+
for( size_t i = 1; i < detectors.size(); ++i )
124+
{
125+
if( detectors[i]->pathToModel == "mobilenet-ssd.xml" )
126+
{
127+
current_detector = i;
128+
break;
129+
}
130+
}
131+
auto p_detector = detectors[current_detector].detector;
132+
LOG(INFO) << "Current detector set to (" << current_detector+1 << ") \"" << openvino_helpers::remove_ext( p_detector->pathToModel ) << "\"";
133+
auto p_labels = &detectors[current_detector].labels;
134+
135+
const auto window_name = "OpenVINO DNN sample";
136+
cv::namedWindow( window_name, cv::WINDOW_AUTOSIZE );
137+
138+
cv::Mat prev_image;
139+
openvino_helpers::detected_faces faces;
140+
size_t id = 0;
141+
uint64 last_frame_number = 0;
142+
high_resolution_clock::time_point switch_time = high_resolution_clock::now();
143+
144+
while( cv::getWindowProperty( window_name, cv::WND_PROP_AUTOSIZE ) >= 0 )
145+
{
146+
// Wait for the next set of frames
147+
auto frames = pipe.wait_for_frames();
148+
// Make sure the frames are spatially aligned
149+
frames = align_to.process( frames );
150+
151+
auto color_frame = frames.get_color_frame();
152+
auto depth_frame = frames.get_depth_frame();
153+
154+
// If we only received a new depth frame, but the color did not update, continue
155+
if( color_frame.get_frame_number() == last_frame_number )
156+
continue;
157+
last_frame_number = color_frame.get_frame_number();
158+
159+
auto image = frame_to_mat( color_frame );
160+
161+
// We process the previous frame so if this is our first then queue it and continue
162+
if( ! p_detector->_request )
163+
{
164+
p_detector->enqueue( image );
165+
p_detector->submit_request();
166+
prev_image = image;
167+
continue;
168+
}
169+
170+
// Wait for the results of the previous frame we enqueued: we're going to process these
171+
p_detector->wait();
172+
auto const results = p_detector->fetch_results();
173+
174+
// Enqueue the current frame so we'd get the results when the next frame comes along!
175+
p_detector->enqueue( image );
176+
p_detector->submit_request();
177+
178+
openvino_helpers::detected_faces prev_faces { std::move( faces ) };
179+
faces.clear();
180+
for( size_t i = 0; i < results.size(); ++i )
181+
{
182+
auto const & result = results[i];
183+
if( result.label <= 0 )
184+
continue; // ignore "background", though not clear why we'd get it
185+
cv::Rect rect = result.location;
186+
rect = rect & cv::Rect( 0, 0, image.cols, image.rows );
187+
auto face_ptr = openvino_helpers::find_face( rect, prev_faces );
188+
if( !face_ptr )
189+
{
190+
// New face
191+
std::string label;
192+
if( result.label < p_labels->size() )
193+
label = (*p_labels)[result.label];
194+
face_ptr = std::make_shared< openvino_helpers::detected_face >( id++, label, rect );
195+
}
196+
else
197+
{
198+
// Existing face; just update its parameters
199+
face_ptr->move( rect );
200+
}
201+
faces.push_back( face_ptr );
202+
}
203+
204+
// Keep this image so we can actually process pieces of it once we have the results
205+
prev_image = image;
206+
207+
// Display the results (from the last frame) as rectangles on top (of the current frame)
208+
for( auto && face : faces )
209+
{
210+
cv::Scalar green( 0, 255, 0 ); // BGR
211+
auto r = face->get_location();
212+
cv::rectangle( image, r, green );
213+
214+
// Output the distance to the center
215+
auto center_x = (r.x + r.width / 2) * depth_frame.get_width() / color_frame.get_width();
216+
auto center_y = (r.y + r.height / 2) * depth_frame.get_height() / color_frame.get_height();
217+
auto d = depth_frame.get_distance( center_x, center_y );
218+
if( d )
219+
{
220+
std::ostringstream ss;
221+
ss << face->get_label() << " ";
222+
ss << std::setprecision( 2 ) << d;
223+
ss << " meters away";
224+
cv::Scalar white( 255, 255, 255 ); // BGR
225+
cv::putText( image, ss.str(), cv::Point( r.x + 5, r.y + r.height - 5 ), cv::FONT_HERSHEY_SIMPLEX, 0.4, white );
226+
}
227+
}
228+
229+
// Show the current detector number as an overlay over the image for 1 second
230+
double alpha = std::max( 0LL, 1000 - duration_cast<milliseconds>(high_resolution_clock::now() - switch_time).count() ) / 1000.;
231+
std::string str( 1, char( '1' + current_detector ));
232+
auto size = cv::getTextSize( str, cv::FONT_HERSHEY_SIMPLEX, 3, 1, nullptr );
233+
cv::Point center { image.cols / 2, image.rows / 2 };
234+
cv::Rect r { center.x - size.width, center.y - size.height, size.width * 2, size.height * 2 };
235+
cv::Mat roi = image( r );
236+
cv::Mat overlay( roi.size(), CV_8UC3, cv::Scalar( 32, 32, 32 ) );
237+
cv::putText( overlay, str, cv::Point { r.width / 2 - size.width / 2, r.height / 2 + size.height / 2 }, cv::FONT_HERSHEY_SIMPLEX, 3, cv::Scalar { 255, 255, 255 } );
238+
cv::addWeighted( overlay, alpha, roi, 1 - alpha, 0, roi ); // roi = overlay * alpha + roi * (1-alpha) + 0
239+
240+
imshow( window_name, image );
241+
const int key = cv::waitKey( 1 );
242+
if( key == 27 )
243+
break; // escape
244+
if( key >= '1' && key < '1' + detectors.size() )
245+
{
246+
size_t detector_index = key - '1';
247+
if( detector_index != current_detector )
248+
{
249+
current_detector = detector_index;
250+
p_detector = detectors[current_detector].detector;
251+
p_labels = &detectors[current_detector].labels;
252+
faces.clear();
253+
LOG(INFO) << "Current detector set to (" << current_detector+1 << ") \"" << openvino_helpers::remove_ext( p_detector->pathToModel ) << "\"";
254+
}
255+
switch_time = high_resolution_clock::now();
256+
}
257+
}
258+
259+
return EXIT_SUCCESS;
260+
}
261+
catch (const rs2::error & e)
262+
{
263+
LOG(ERROR) << "Caught RealSense exception from " << e.get_failed_function() << "(" << e.get_failed_args() << "):\n " << e.what();
264+
return EXIT_FAILURE;
265+
}
266+
catch (const std::exception& e)
267+
{
268+
LOG(ERROR) << "Unknown exception caught: " << e.what();
269+
return EXIT_FAILURE;
270+
}
271+
79.3 KB
Loading

0 commit comments

Comments
 (0)