@@ -95,7 +95,8 @@ struct Bbox
9595 // Ensure valid dimensions
9696 if (bbox_width <= 0 || bbox_height <= 0 )
9797 {
98- throw std::invalid_argument (" Invalid bounding box dimensions" );
98+ fprintf (stderr, " Invalid bounding box dimensions\n " );
99+ return cv::Mat ();
99100 }
100101
101102 // Ensure coordinates are within image bounds
@@ -248,10 +249,14 @@ non_maximum_supression(const std::vector<Bbox>& bbox, float iou_thresh, bool cla
248249 return picked;
249250}
250251
251- static std::array <float , 3 > scale_wh (float w0, float h0, float w1, float h1)
252+ static std::vector <float > scale_wh (float w0, float h0, float w1, float h1)
252253{
253254 float r = std::min (w1 / w0, h1 / h0);
254- return {r, (float )std::round (w0 * r), (float )std::round (h0 * r)};
255+ std::vector<float > _scale_factor (3 );
256+ _scale_factor[0 ] = r;
257+ _scale_factor[1 ] = (float )std::round (w0 * r);
258+ _scale_factor[2 ] = (float )std::round (h0 * r);
259+ return _scale_factor;
255260}
256261
257262struct ImagePreProcessResults
@@ -279,7 +284,7 @@ static ImagePreProcessResults preprocess_yolo_kpts(cv::Mat& input_image, int inf
279284 int img_w = input_image.cols ;
280285 int img_h = input_image.rows ;
281286 float scale_factor, new_w, new_h;
282- std::array <float , 3 > _scale_factor = scale_wh (img_w, img_h, (float )infer_size, (float )infer_size);
287+ std::vector <float > _scale_factor = scale_wh (img_w, img_h, (float )infer_size, (float )infer_size);
283288 scale_factor = _scale_factor[0 ];
284289 new_w = _scale_factor[1 ];
285290 new_h = _scale_factor[2 ];
@@ -308,7 +313,14 @@ static DetectionResult parse_yolo_keypoints_results(ncnn::Mat& result,
308313 float iou_threshold,
309314 std::vector<std::string> class_names)
310315{
311- cv::Mat output = cv::Mat ((int )result.h , (int )result.w , CV_32F, result).t ();
316+ cv::Mat output ((int )result.w , (int )result.h , CV_32FC1);
317+ for (int i = 0 ; i < output.cols ; i++)
318+ {
319+ for (int j = 0 ; j < output.rows ; j++)
320+ {
321+ output.ptr <float >(j)[i] = result.row (i)[j];
322+ }
323+ }
312324 std::vector<Bbox> detections;
313325 std::vector<std::vector<float > > all_keypoints;
314326
@@ -318,7 +330,7 @@ static DetectionResult parse_yolo_keypoints_results(ncnn::Mat& result,
318330
319331 for (int i = 0 ; i < output.rows ; i++)
320332 {
321- const float * row_ptr = output.row (i). ptr <float >();
333+ const float * row_ptr = output.ptr <float >(i );
322334 const float * bboxes_ptr = row_ptr;
323335 const float * classes_ptr = row_ptr + 4 ;
324336 const float * max_s_ptr = std::max_element (classes_ptr, classes_ptr + num_classes);
@@ -350,9 +362,9 @@ static DetectionResult parse_yolo_keypoints_results(ncnn::Mat& result,
350362
351363 for (int k = 0 ; k < num_keypoints; k++)
352364 {
353- float kp_conf_raw = kp_ptr[k * kp_stride];
354- float kp_x = kp_ptr[k * kp_stride + 1 ];
355- float kp_y = kp_ptr[k * kp_stride + 2 ];
365+ float kp_x = kp_ptr[k * kp_stride];
366+ float kp_y = kp_ptr[k * kp_stride + 1 ];
367+ float kp_conf_raw = kp_ptr[k * kp_stride + 2 ];
356368
357369 // Apply sigmoid to convert logit to probability
358370 float kp_conf = 1 .0f / (1 .0f + expf (-kp_conf_raw));
@@ -438,7 +450,7 @@ static int estimate_norm(float* transform_matrix, const float* lmk, int image_si
438450 dst_points[i * 2 + 1 ] = ARCFACE_DST[i * 2 + 1 ] * ratio;
439451 }
440452
441- ncnn::get_affine_transform (src_points, dst_points , 5 , transform_matrix);
453+ ncnn::get_affine_transform (dst_points, src_points , 5 , transform_matrix);
442454
443455 return 0 ;
444456}
0 commit comments