Switch to DNN face detection

Haar Cascades can still be used by passing the "--haar-cascade" option.
This commit is contained in:
Epicalert 2021-01-22 19:48:55 +08:00
parent f72c81b79b
commit 625e9680d3
No known key found for this signature in database
GPG key ID: CAA46F858D0979BD
5 changed files with 1842 additions and 12 deletions

1790
cvdata/deploy.prototxt Normal file

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -7,13 +7,19 @@
#include <paths.hpp> #include <paths.hpp>
cv::Ptr<cv::face::Facemark> facemark; cv::Ptr<cv::face::Facemark> facemark;
cv::CascadeClassifier faceDetector; cv::CascadeClassifier haarFaceDetector;
cv::dnn::Net dnnFaceDetector;
cv::VideoCapture vid; cv::VideoCapture vid;
cv::Mat frame, gray, small; cv::Mat frame, gray, small;
bool useHaar;
void initCV() { void initCV(bool haar) {
//TODO: switch to DNN face detection useHaar = haar;
faceDetector = cv::CascadeClassifier (resolvePath("cvdata/haarcascade_frontalface_alt2.xml"));
haarFaceDetector = cv::CascadeClassifier (resolvePath("cvdata/haarcascade_frontalface_alt2.xml"));
dnnFaceDetector = cv::dnn::readNetFromCaffe(
resolvePath("cvdata/deploy.prototxt"),
resolvePath("cvdata/res10_300x300_ssd_iter_140000_fp16.caffemodel") );
facemark = cv::face::FacemarkLBF::create(); facemark = cv::face::FacemarkLBF::create();
facemark->loadModel (resolvePath("cvdata/lbfmodel.yaml")); facemark->loadModel (resolvePath("cvdata/lbfmodel.yaml"));
@ -21,24 +27,56 @@ void initCV() {
vid = cv::VideoCapture (0); vid = cv::VideoCapture (0);
} }
void dnnFaceDetect(cv::Mat inFrame, std::vector<cv::Rect>* faces) {
cv::Mat inputBlob = cv::dnn::blobFromImage(inFrame, 1.0f, cv::Size(300, 300), cv::Scalar(104, 177, 123, 0), false, false);
dnnFaceDetector.setInput(inputBlob, "data");
cv::Mat output = dnnFaceDetector.forward("detection_out");
cv::Mat detection(output.size[2], output.size[3], CV_32F, output.ptr<float>());
for (int i = 0; i < detection.rows; i++) {
float confidence = detection.at<float>(i, 2);
if (confidence > 0.75f) {
int x1 = detection.at<float>(i, 3) * inFrame.cols;
int y1 = detection.at<float>(i, 4) * inFrame.rows;
int x2 = detection.at<float>(i, 5) * inFrame.cols;
int y2 = detection.at<float>(i, 6) * inFrame.rows;
cv::Point2f pt1(x1, y1);
cv::Point2f pt2(x2, y2);
faces->push_back(cv::Rect(pt1, pt2));
}
}
}
//process image and send controls to graphics //process image and send controls to graphics
void cvFrame() { void cvFrame() {
vid.read(frame); vid.read(frame);
cv::cvtColor (frame, gray, cv::COLOR_BGR2GRAY); cv::cvtColor (frame, gray, cv::COLOR_BGR2GRAY);
//downsample image for face detection, works too slow on full res
cv::pyrDown (gray, small);
cv::pyrDown (small, small);
std::vector<cv::Rect> faces; std::vector<cv::Rect> faces;
faceDetector.detectMultiScale(small, faces);
if (useHaar) {
//downsample image for face detection, works too slow on full res
cv::pyrDown (gray, small);
cv::pyrDown (small, small);
haarFaceDetector.detectMultiScale(small, faces);
} else {
dnnFaceDetect(frame, &faces);
}
//get biggest face //get biggest face
int biggestFace = 0; int biggestFace = 0;
int biggestArea = 0; int biggestArea = 0;
for (int i = 0; i < faces.size(); i++) { for (int i = 0; i < faces.size(); i++) {
//convert face region to full res, because we perform facemark on full res //convert face region to full res, because we perform facemark on full res
faces[i] = cv::Rect (faces[i].x * 4, faces[i].y * 4, faces[i].width * 4, faces[i].height * 4); if (useHaar) {
faces[i] = cv::Rect (faces[i].x * 4, faces[i].y * 4, faces[i].width * 4, faces[i].height * 4);
}
int iArea = faces[i].area(); int iArea = faces[i].area();
if (iArea > biggestArea) { if (iArea > biggestArea) {

View file

@ -1,7 +1,7 @@
#ifndef CV_HPP #ifndef CV_HPP
#define CV_HPP #define CV_HPP
void initCV(); void initCV(bool haar);
void cvFrame(); void cvFrame();

View file

@ -3,8 +3,9 @@
#include <paths.hpp> #include <paths.hpp>
#include <iostream> #include <iostream>
#include <cstring>
int main () { int main (int argc, char** argv) {
std::cout << "Facecam2D is starting..." << std::endl; std::cout << "Facecam2D is starting..." << std::endl;
initPrefixes(); initPrefixes();
@ -12,7 +13,8 @@ int main () {
std::cout << "Default asset prefix: " << prefixDefault << std::endl; std::cout << "Default asset prefix: " << prefixDefault << std::endl;
initGraphics(); initGraphics();
initCV(); //TODO: real argument parsing
initCV(argc > 1 && strcmp(argv[1], "--haar-cascade") == 0);
while (true) { while (true) {
cvFrame(); cvFrame();