Switch to DNN face detection

Haar Cascades can still be used by passing the "--haar-cascade" option.
2021-01-22 19:48:55 +08:00 · 2021-01-22 19:48:55 +08:00 · 625e9680d3
parent f72c81b79b
commit 625e9680d3
5 changed files with 1842 additions and 12 deletions
--- a/cvdata/deploy.prototxt
+++ b/cvdata/deploy.prototxt
--- a/cvdata/res10_300x300_ssd_iter_140000_fp16.caffemodel
+++ b/cvdata/res10_300x300_ssd_iter_140000_fp16.caffemodel
--- a/src/cv.cpp
+++ b/src/cv.cpp
@ -7,13 +7,19 @@
 #include <paths.hpp>
 cv::Ptr<cv::face::Facemark> facemark;
-cv::CascadeClassifier faceDetector;
+cv::CascadeClassifier haarFaceDetector;
 cv::dnn::Net dnnFaceDetector;
 cv::VideoCapture vid;
 cv::Mat frame, gray, small;
 bool useHaar;
-void initCV() {
+void initCV(bool haar) {
-	//TODO: switch to DNN face detection
+	useHaar = haar;
-	faceDetector = cv::CascadeClassifier (resolvePath("cvdata/haarcascade_frontalface_alt2.xml"));
+
 	haarFaceDetector = cv::CascadeClassifier (resolvePath("cvdata/haarcascade_frontalface_alt2.xml"));
 	dnnFaceDetector = cv::dnn::readNetFromCaffe(
 			resolvePath("cvdata/deploy.prototxt"),
 			resolvePath("cvdata/res10_300x300_ssd_iter_140000_fp16.caffemodel") );
 	facemark = cv::face::FacemarkLBF::create();
 	facemark->loadModel (resolvePath("cvdata/lbfmodel.yaml"));
@ -21,24 +27,56 @@ void initCV() {
 	vid = cv::VideoCapture (0);
 }
 void dnnFaceDetect(cv::Mat inFrame, std::vector<cv::Rect>* faces) {
 	cv::Mat inputBlob = cv::dnn::blobFromImage(inFrame, 1.0f, cv::Size(300, 300), cv::Scalar(104, 177, 123, 0), false, false);
 	dnnFaceDetector.setInput(inputBlob, "data");
 	cv::Mat output = dnnFaceDetector.forward("detection_out");
 	cv::Mat detection(output.size[2], output.size[3], CV_32F, output.ptr<float>());
 	for (int i = 0; i < detection.rows; i++) {
 		float confidence = detection.at<float>(i, 2);
 		if (confidence > 0.75f) {
 			int x1 = detection.at<float>(i, 3) * inFrame.cols;
 			int y1 = detection.at<float>(i, 4) * inFrame.rows;
 			int x2 = detection.at<float>(i, 5) * inFrame.cols;
 			int y2 = detection.at<float>(i, 6) * inFrame.rows;
 			cv::Point2f pt1(x1, y1);
 			cv::Point2f pt2(x2, y2);
 			faces->push_back(cv::Rect(pt1, pt2));
 		}
 	}
 }
 //process image and send controls to graphics
 void cvFrame() {
 	vid.read(frame);
 	cv::cvtColor (frame, gray, cv::COLOR_BGR2GRAY);
 	std::vector<cv::Rect> faces;
 	if (useHaar) {
 		//downsample image for face detection, works too slow on full res
 		cv::pyrDown (gray, small);
 		cv::pyrDown (small, small);
-	std::vector<cv::Rect> faces;
+		haarFaceDetector.detectMultiScale(small, faces);
-	faceDetector.detectMultiScale(small, faces);
+	} else {
 		dnnFaceDetect(frame, &faces);
 	}
 	//get biggest face
 	int biggestFace = 0;
 	int biggestArea = 0;
 	for (int i = 0; i < faces.size(); i++) {
 		//convert face region to full res, because we perform facemark on full res
 		if (useHaar) {
 			faces[i] = cv::Rect (faces[i].x * 4, faces[i].y * 4, faces[i].width * 4, faces[i].height * 4);
 		}
 		int iArea = faces[i].area();
 		if (iArea > biggestArea) {
--- a/src/cv.hpp
+++ b/src/cv.hpp
@ -1,7 +1,7 @@
 #ifndef CV_HPP
 #define CV_HPP
-void initCV();
+void initCV(bool haar);
 void cvFrame();
--- a/src/main.cpp
+++ b/src/main.cpp
@ -3,8 +3,9 @@
 #include <paths.hpp>
 #include <iostream>
 #include <cstring>
-int main () {
+int main (int argc, char** argv) {
 	std::cout << "Facecam2D is starting..." << std::endl;
 	initPrefixes();
@ -12,7 +13,8 @@ int main () {
 	std::cout << "Default asset prefix: " << prefixDefault << std::endl;
 	initGraphics();
-	initCV();
+	//TODO: real argument parsing
 	initCV(argc > 1 && strcmp(argv[1], "--haar-cascade") == 0);
 	while (true) {
 		cvFrame();