Switch to DNN face detection

Haar Cascades can still be used by passing the "--haar-cascade" option.
2021-01-22 19:48:55 +08:00 · 2021-01-22 19:48:55 +08:00 · 625e9680d3
parent f72c81b79b
commit 625e9680d3
5 changed files with 1842 additions and 12 deletions
--- a/cvdata/deploy.prototxt
+++ b/cvdata/deploy.prototxt
--- a/cvdata/res10_300x300_ssd_iter_140000_fp16.caffemodel
+++ b/cvdata/res10_300x300_ssd_iter_140000_fp16.caffemodel
--- a/src/cv.cpp
+++ b/src/cv.cpp
@ -7,13 +7,19 @@
 #include <paths.hpp>

 cv::Ptr<cv::face::Facemark> facemark;
-cv::CascadeClassifier faceDetector;
+cv::CascadeClassifier haarFaceDetector;
+cv::dnn::Net dnnFaceDetector;
 cv::VideoCapture vid;
 cv::Mat frame, gray, small;
+bool useHaar;

-void initCV() {
-	//TODO: switch to DNN face detection
-	faceDetector = cv::CascadeClassifier (resolvePath("cvdata/haarcascade_frontalface_alt2.xml"));
+void initCV(bool haar) {
+	useHaar = haar;
+
+	haarFaceDetector = cv::CascadeClassifier (resolvePath("cvdata/haarcascade_frontalface_alt2.xml"));
+	dnnFaceDetector = cv::dnn::readNetFromCaffe(
+			resolvePath("cvdata/deploy.prototxt"),
+			resolvePath("cvdata/res10_300x300_ssd_iter_140000_fp16.caffemodel") );

 	facemark = cv::face::FacemarkLBF::create();
 	facemark->loadModel (resolvePath("cvdata/lbfmodel.yaml"));
@ -21,24 +27,56 @@ void initCV() {
 	vid = cv::VideoCapture (0);
 }

+void dnnFaceDetect(cv::Mat inFrame, std::vector<cv::Rect>* faces) {
+	cv::Mat inputBlob = cv::dnn::blobFromImage(inFrame, 1.0f, cv::Size(300, 300), cv::Scalar(104, 177, 123, 0), false, false);
+
+	dnnFaceDetector.setInput(inputBlob, "data");
+	cv::Mat output = dnnFaceDetector.forward("detection_out");
+	cv::Mat detection(output.size[2], output.size[3], CV_32F, output.ptr<float>());
+
+	for (int i = 0; i < detection.rows; i++) {
+		float confidence = detection.at<float>(i, 2);
+
+		if (confidence > 0.75f) {
+			int x1 = detection.at<float>(i, 3) * inFrame.cols;
+			int y1 = detection.at<float>(i, 4) * inFrame.rows;
+			int x2 = detection.at<float>(i, 5) * inFrame.cols;
+			int y2 = detection.at<float>(i, 6) * inFrame.rows;
+
+			cv::Point2f pt1(x1, y1);
+			cv::Point2f pt2(x2, y2);
+
+			faces->push_back(cv::Rect(pt1, pt2));
+		}
+	}
+}
+
 //process image and send controls to graphics
 void cvFrame() {
 	vid.read(frame);

 	cv::cvtColor (frame, gray, cv::COLOR_BGR2GRAY);
-	//downsample image for face detection, works too slow on full res
-	cv::pyrDown (gray, small);
-	cv::pyrDown (small, small);

 	std::vector<cv::Rect> faces;
-	faceDetector.detectMultiScale(small, faces);
+
+	if (useHaar) {
+		//downsample image for face detection, works too slow on full res
+		cv::pyrDown (gray, small);
+		cv::pyrDown (small, small);
+
+		haarFaceDetector.detectMultiScale(small, faces);
+	} else {
+		dnnFaceDetect(frame, &faces);
+	}

 	//get biggest face
 	int biggestFace = 0;
 	int biggestArea = 0;
 	for (int i = 0; i < faces.size(); i++) {
 		//convert face region to full res, because we perform facemark on full res
-		faces[i] = cv::Rect (faces[i].x * 4, faces[i].y * 4, faces[i].width * 4, faces[i].height * 4);
+		if (useHaar) {
+			faces[i] = cv::Rect (faces[i].x * 4, faces[i].y * 4, faces[i].width * 4, faces[i].height * 4);
+		}

 		int iArea = faces[i].area();
 		if (iArea > biggestArea) {
--- a/src/cv.hpp
+++ b/src/cv.hpp
@ -1,7 +1,7 @@
 #ifndef CV_HPP
 #define CV_HPP

-void initCV();
+void initCV(bool haar);

 void cvFrame();

--- a/src/main.cpp
+++ b/src/main.cpp
@ -3,8 +3,9 @@
 #include <paths.hpp>

 #include <iostream>
+#include <cstring>

-int main () {
+int main (int argc, char** argv) {
 	std::cout << "Facecam2D is starting..." << std::endl;

 	initPrefixes();
@ -12,7 +13,8 @@ int main () {
 	std::cout << "Default asset prefix: " << prefixDefault << std::endl;

 	initGraphics();
-	initCV();
+	//TODO: real argument parsing
+	initCV(argc > 1 && strcmp(argv[1], "--haar-cascade") == 0);

 	while (true) {
 		cvFrame();