28 #ifndef KERNEL_BP_STEREO_CPU_H
29 #define KERNEL_BP_STEREO_CPU_H
55 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
58 const float* image_1_pixels_device,
const float* image_2_pixels_device,
59 T* data_cost_stereo_checkerboard_0, T* data_cost_stereo_checkerboard_1,
60 float lambda_bp,
float data_k_bp,
unsigned int bp_settings_disp_vals,
65 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
69 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
70 T* data_cost_current_level,
unsigned int offset_num,
unsigned int bp_settings_disp_vals,
75 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
78 T* message_u_checkerboard_0, T* message_d_checkerboard_0,
79 T* message_l_checkerboard_0, T* message_r_checkerboard_0,
80 T* message_u_checkerboard_1, T* message_d_checkerboard_1,
81 T* message_l_checkerboard_1, T* message_r_checkerboard_1,
82 unsigned int bp_settings_disp_vals,
89 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
93 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
94 T* message_u_checkerboard_0, T* message_d_checkerboard_0,
95 T* message_l_checkerboard_0, T* message_r_checkerboard_0,
96 T* message_u_checkerboard_1, T* message_d_checkerboard_1,
97 T* message_l_checkerboard_1, T* message_r_checkerboard_1,
98 float disc_k_bp,
unsigned int bp_settings_num_disp_vals,
101 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
105 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
106 T* message_u_checkerboard_0, T* message_d_checkerboard_0,
107 T* message_l_checkerboard_0, T* message_r_checkerboard_0,
108 T* message_u_checkerboard_1, T* message_d_checkerboard_1,
109 T* message_l_checkerboard_1, T* message_r_checkerboard_1,
110 float disc_k_bp,
unsigned int bp_settings_disp_vals,
117 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
122 const T* message_u_prev_checkerboard_0,
const T* message_d_prev_checkerboard_0,
123 const T* message_l_prev_checkerboard_0,
const T* message_r_prev_checkerboard_0,
124 const T* message_u_prev_checkerboard_1,
const T* message_d_prev_checkerboard_1,
125 const T* message_l_prev_checkerboard_1,
const T* message_r_prev_checkerboard_1,
126 const T* message_u_checkerboard_0,
const T* message_d_checkerboard_0,
127 const T* message_l_checkerboard_0,
const T* message_r_checkerboard_0,
128 const T* message_u_checkerboard_1,
const T* message_d_checkerboard_1,
129 const T* message_l_checkerboard_1,
const T* message_r_checkerboard_1,
130 unsigned int bp_settings_disp_vals,
135 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
138 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
139 const T* message_u_prev_checkerboard_0,
const T* message_d_prev_checkerboard_0,
140 const T* message_l_prev_checkerboard_0,
const T* message_r_prev_checkerboard_0,
141 const T* message_u_prev_checkerboard_1,
const T* message_d_prev_checkerboard_1,
142 const T* message_l_prev_checkerboard_1,
const T* message_r_prev_checkerboard_1,
143 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
148 template<RunData_t T, RunDataVect_t U, RunDataProcess_t V, RunDataVectProcess_t W,
unsigned int DISP_VALS>
151 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
152 const T* message_u_prev_checkerboard_0,
const T* message_d_prev_checkerboard_0,
153 const T* message_l_prev_checkerboard_0,
const T* message_r_prev_checkerboard_0,
154 const T* message_u_prev_checkerboard_1,
const T* message_d_prev_checkerboard_1,
155 const T* message_l_prev_checkerboard_1,
const T* message_r_prev_checkerboard_1,
156 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
159 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
162 const float* data_cost_checkerboard_0,
const float* data_cost_checkerboard_1,
163 const float* message_u_prev_checkerboard_0,
const float* message_d_prev_checkerboard_0,
164 const float* message_l_prev_checkerboard_0,
const float* message_r_prev_checkerboard_0,
165 const float* message_u_prev_checkerboard_1,
const float* message_d_prev_checkerboard_1,
166 const float* message_l_prev_checkerboard_1,
const float* message_r_prev_checkerboard_1,
167 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
170 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
173 const short* data_cost_checkerboard_0,
const short* data_cost_checkerboard_1,
174 const short* message_u_prev_checkerboard_0,
const short* message_d_prev_checkerboard_0,
175 const short* message_l_prev_checkerboard_0,
const short* message_r_prev_checkerboard_0,
176 const short* message_u_prev_checkerboard_1,
const short* message_d_prev_checkerboard_1,
177 const short* message_l_prev_checkerboard_1,
const short* message_r_prev_checkerboard_1,
178 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
181 #if defined(FLOAT16_VECTORIZATION)
183 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
186 const _Float16* data_cost_checkerboard_0,
const _Float16* data_cost_checkerboard_1,
187 const _Float16* message_u_prev_checkerboard_0,
const _Float16* message_d_prev_checkerboard_0,
188 const _Float16* message_l_prev_checkerboard_0,
const _Float16* message_r_prev_checkerboard_0,
189 const _Float16* message_u_prev_checkerboard_1,
const _Float16* message_d_prev_checkerboard_1,
190 const _Float16* message_l_prev_checkerboard_1,
const _Float16* message_r_prev_checkerboard_1,
191 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
196 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
199 const double* data_cost_checkerboard_0,
const double* data_cost_checkerboard_1,
200 const double* message_u_prev_checkerboard_0,
const double* message_d_prev_checkerboard_0,
201 const double* message_l_prev_checkerboard_0,
const double* message_r_prev_checkerboard_0,
202 const double* message_u_prev_checkerboard_1,
const double* message_d_prev_checkerboard_1,
203 const double* message_l_prev_checkerboard_1,
const double* message_r_prev_checkerboard_1,
204 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
207 #if ((CPU_VECTORIZATION_DEFINE == AVX_512_DEFINE) || (CPU_VECTORIZATION_DEFINE == AVX_512_F16_DEFINE))
208 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
211 const float* data_cost_checkerboard_0,
const float* data_cost_checkerboard_1,
212 const float* message_u_prev_checkerboard_0,
const float* message_d_prev_checkerboard_0,
213 const float* message_l_prev_checkerboard_0,
const float* message_r_prev_checkerboard_0,
214 const float* message_u_prev_checkerboard_1,
const float* message_d_prev_checkerboard_1,
215 const float* message_l_prev_checkerboard_1,
const float* message_r_prev_checkerboard_1,
216 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
219 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
222 const short* data_cost_checkerboard_0,
const short* data_cost_checkerboard_1,
223 const short* message_u_prev_checkerboard_0,
const short* message_d_prev_checkerboard_0,
224 const short* message_l_prev_checkerboard_0,
const short* message_r_prev_checkerboard_0,
225 const short* message_u_prev_checkerboard_1,
const short* message_d_prev_checkerboard_1,
226 const short* message_l_prev_checkerboard_1,
const short* message_r_prev_checkerboard_1,
227 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
230 #if defined(FLOAT16_VECTORIZATION)
232 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
235 const _Float16* data_cost_checkerboard_0,
const _Float16* data_cost_checkerboard_1,
236 const _Float16* message_u_prev_checkerboard_0,
const _Float16* message_d_prev_checkerboard_0,
237 const _Float16* message_l_prev_checkerboard_0,
const _Float16* message_r_prev_checkerboard_0,
238 const _Float16* message_u_prev_checkerboard_1,
const _Float16* message_d_prev_checkerboard_1,
239 const _Float16* message_l_prev_checkerboard_1,
const _Float16* message_r_prev_checkerboard_1,
240 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
245 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
248 const double* data_cost_checkerboard_0,
const double* data_cost_checkerboard_1,
249 const double* message_u_prev_checkerboard_0,
const double* message_d_prev_checkerboard_0,
250 const double* message_l_prev_checkerboard_0,
const double* message_r_prev_checkerboard_0,
251 const double* message_u_prev_checkerboard_1,
const double* message_d_prev_checkerboard_1,
252 const double* message_l_prev_checkerboard_1,
const double* message_r_prev_checkerboard_1,
253 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
257 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
260 const float* data_cost_checkerboard_0,
const float* data_cost_checkerboard_1,
261 const float* message_u_prev_checkerboard_0,
const float* message_d_prev_checkerboard_0,
262 const float* message_l_prev_checkerboard_0,
const float* message_r_prev_checkerboard_0,
263 const float* message_u_prev_checkerboard_1,
const float* message_d_prev_checkerboard_1,
264 const float* message_l_prev_checkerboard_1,
const float* message_r_prev_checkerboard_1,
265 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
268 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
271 const double* data_cost_checkerboard_0,
const double* data_cost_checkerboard_1,
272 const double* message_u_prev_checkerboard_0,
const double* message_d_prev_checkerboard_0,
273 const double* message_l_prev_checkerboard_0,
const double* message_r_prev_checkerboard_0,
274 const double* message_u_prev_checkerboard_1,
const double* message_d_prev_checkerboard_1,
275 const double* message_l_prev_checkerboard_1,
const double* message_r_prev_checkerboard_1,
276 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
279 #if defined(COMPILING_FOR_ARM)
280 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
283 const float16_t* data_cost_checkerboard_0,
const float16_t* data_cost_checkerboard_1,
284 const float16_t* message_u_prev_checkerboard_0,
const float16_t* message_d_prev_checkerboard_0,
285 const float16_t* message_l_prev_checkerboard_0,
const float16_t* message_r_prev_checkerboard_0,
286 const float16_t* message_u_prev_checkerboard_1,
const float16_t* message_d_prev_checkerboard_1,
287 const float16_t* message_l_prev_checkerboard_1,
const float16_t* message_r_prev_checkerboard_1,
288 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
294 template<RunData_t T, RunDataVect_t U,
unsigned int DISP_VALS>
296 unsigned int x_val_start_processing,
unsigned int y_val,
298 const U prev_u_message[DISP_VALS],
const U prev_d_message[DISP_VALS],
299 const U prev_l_message[DISP_VALS],
const U prev_r_message[DISP_VALS],
300 const U data_message[DISP_VALS],
301 T* current_u_message, T* current_d_message,
302 T* current_l_message, T* current_r_message,
303 const U disc_k_bp_vect,
bool data_aligned);
305 template<RunData_t T, RunDataVect_t U>
307 unsigned int x_val_start_processing,
unsigned int y_val,
309 const U* prev_u_message,
const U* prev_d_message,
310 const U* prev_l_message,
const U* prev_r_message,
311 const U* data_message,
312 T* current_u_message, T* current_d_message,
313 T* current_l_message, T* current_r_message,
314 const U disc_k_bp_vect,
bool data_aligned,
315 unsigned int bp_settings_disp_vals);
317 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
321 const float* data_cost_checkerboard_0,
const float* data_cost_checkerboard_1,
322 float* message_u_checkerboard_0,
float* message_d_checkerboard_0,
323 float* message_l_checkerboard_0,
float* message_r_checkerboard_0,
324 float* message_u_checkerboard_1,
float* message_d_checkerboard_1,
325 float* message_l_checkerboard_1,
float* message_r_checkerboard_1,
326 float disc_k_bp,
unsigned int bp_settings_disp_vals,
329 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
333 const short* data_cost_checkerboard_0,
const short* data_cost_checkerboard_1,
334 short* message_u_checkerboard_0,
short* message_d_checkerboard_0,
335 short* message_l_checkerboard_0,
short* message_r_checkerboard_0,
336 short* message_u_checkerboard_1,
short* message_d_checkerboard_1,
337 short* message_l_checkerboard_1,
short* message_r_checkerboard_1,
338 float disc_k_bp,
unsigned int bp_settings_disp_vals,
341 #if defined(FLOAT16_VECTORIZATION)
343 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
347 const _Float16* data_cost_checkerboard_0,
const _Float16* data_cost_checkerboard_1,
348 _Float16* message_u_checkerboard_0, _Float16* message_d_checkerboard_0,
349 _Float16* message_l_checkerboard_0, _Float16* message_r_checkerboard_0,
350 _Float16* message_u_checkerboard_1, _Float16* message_d_checkerboard_1,
351 _Float16* message_l_checkerboard_1, _Float16* message_r_checkerboard_1,
352 float disc_k_bp,
unsigned int bp_settings_disp_vals,
357 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
361 const double* data_cost_checkerboard_0,
const double* data_cost_checkerboard_1,
362 double* message_u_checkerboard_0,
double* message_d_checkerboard_0,
363 double* message_l_checkerboard_0,
double* message_r_checkerboard_0,
364 double* message_u_checkerboard_1,
double* message_d_checkerboard_1,
365 double* message_l_checkerboard_1,
double* message_r_checkerboard_1,
366 float disc_k_bp,
unsigned int bp_settings_disp_vals,
369 #if ((CPU_VECTORIZATION_DEFINE == AVX_512_DEFINE) || (CPU_VECTORIZATION_DEFINE == AVX_512_F16_DEFINE))
370 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
374 const float* data_cost_checkerboard_0,
const float* data_cost_checkerboard_1,
375 float* message_u_checkerboard_0,
float* message_d_checkerboard_0,
376 float* message_l_checkerboard_0,
float* message_r_checkerboard_0,
377 float* message_u_checkerboard_1,
float* message_d_checkerboard_1,
378 float* message_l_checkerboard_1,
float* message_r_checkerboard_1,
379 float disc_k_bp,
unsigned int bp_settings_disp_vals,
382 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
386 const short* data_cost_checkerboard_0,
const short* data_cost_checkerboard_1,
387 short* message_u_checkerboard_0,
short* message_d_checkerboard_0,
388 short* message_l_checkerboard_0,
short* message_r_checkerboard_0,
389 short* message_u_checkerboard_1,
short* message_d_checkerboard_1,
390 short* message_l_checkerboard_1,
short* message_r_checkerboard_1,
391 float disc_k_bp,
unsigned int bp_settings_disp_vals,
394 #if defined(FLOAT16_VECTORIZATION)
396 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
400 const _Float16* data_cost_checkerboard_0,
const _Float16* data_cost_checkerboard_1,
401 _Float16* message_u_checkerboard_0, _Float16* message_d_checkerboard_0,
402 _Float16* message_l_checkerboard_0, _Float16* message_r_checkerboard_0,
403 _Float16* message_u_checkerboard_1, _Float16* message_d_checkerboard_1,
404 _Float16* message_l_checkerboard_1, _Float16* message_r_checkerboard_1,
405 float disc_k_bp,
unsigned int bp_settings_disp_vals,
410 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
414 const double* data_cost_checkerboard_0,
const double* data_cost_checkerboard_1,
415 double* message_u_checkerboard_0,
double* message_d_checkerboard_0,
416 double* message_l_checkerboard_0,
double* message_r_checkerboard_0,
417 double* message_u_checkerboard_1,
double* message_d_checkerboard_1,
418 double* message_l_checkerboard_1,
double* message_r_checkerboard_1,
419 float disc_k_bp,
unsigned int bp_settings_disp_vals,
423 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
427 const float* data_cost_checkerboard_0,
const float* data_cost_checkerboard_1,
428 float* message_u_checkerboard_0,
float* message_d_checkerboard_0,
429 float* message_l_checkerboard_0,
float* message_r_checkerboard_0,
430 float* message_u_checkerboard_1,
float* message_d_checkerboard_1,
431 float* message_l_checkerboard_1,
float* message_r_checkerboard_1,
432 float disc_k_bp,
unsigned int bp_settings_disp_vals,
435 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
439 const double* data_cost_checkerboard_0,
const double* data_cost_checkerboard_1,
440 double* message_u_checkerboard_0,
double* message_d_checkerboard_0,
441 double* message_l_checkerboard_0,
double* message_r_checkerboard_0,
442 double* message_u_checkerboard_1,
double* message_d_checkerboard_1,
443 double* message_l_checkerboard_1,
double* message_r_checkerboard_1,
444 float disc_k_bp,
unsigned int bp_settings_disp_vals,
447 #if defined(COMPILING_FOR_ARM)
448 template<
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
452 const float16_t* data_cost_checkerboard_0,
const float16_t* data_cost_checkerboard_1,
453 float16_t* message_u_checkerboard_0, float16_t* message_d_checkerboard_0,
454 float16_t* message_l_checkerboard_0, float16_t* message_r_checkerboard_0,
455 float16_t* message_u_checkerboard_1, float16_t* message_d_checkerboard_1,
456 float16_t* message_l_checkerboard_1, float16_t* message_r_checkerboard_1,
457 float disc_k_bp,
unsigned int bp_settings_disp_vals,
461 template<RunData_t T, RunDataVect_t U,
unsigned int DISP_VALS>
465 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
466 T* message_u_checkerboard_0, T* message_d_checkerboard_0,
467 T* message_l_checkerboard_0, T* message_r_checkerboard_0,
468 T* message_u_checkerboard_1, T* message_d_checkerboard_1,
469 T* message_l_checkerboard_1, T* message_r_checkerboard_1,
470 float disc_k_bp,
unsigned int bp_settings_disp_vals,
474 template<RunData_t T, RunDataVect_t U,
unsigned int DISP_VALS>
476 unsigned int x_val,
unsigned int y_val,
478 const U messages_neighbor_1[DISP_VALS],
const U messages_neighbor_2[DISP_VALS],
479 const U messages_neighbor_3[DISP_VALS],
const U data_costs[DISP_VALS],
480 T* dst_message_array,
const U& disc_k_bp,
bool data_aligned);
483 template<RunData_t T, RunDataVect_t U>
485 unsigned int x_val,
unsigned int y_val,
487 const U* messages_neighbor_1,
const U* messages_neighbor_2,
488 const U* messages_neighbor_3,
const U* data_costs,
489 T* dst_message_array,
const U& disc_k_bp,
bool data_aligned,
490 unsigned int bp_settings_disp_vals);
493 template<RunData_t T, RunDataVect_t U, RunDataProcess_t V, RunDataVectProcess_t W>
495 unsigned int x_val,
unsigned int y_val,
497 const U* messages_neighbor_1,
const U* messages_neighbor_2,
498 const U* messages_neighbor_3,
const U* data_costs,
499 T* dst_message_array,
const U& disc_k_bp,
bool data_aligned,
500 unsigned int bp_settings_disp_vals);
503 template<RunData_t T, RunDataVect_t U, RunDataProcess_t V, RunDataVectProcess_t W,
unsigned int DISP_VALS>
505 unsigned int x_val,
unsigned int y_val,
507 const U messages_neighbor_1[DISP_VALS],
const U messages_neighbor_2[DISP_VALS],
508 const U messages_neighbor_3[DISP_VALS],
const U data_costs[DISP_VALS],
509 T* dst_message_array,
const U& disc_k_bp,
bool data_aligned);
513 template<RunDataProcess_t T, RunDataVectProcess_t U,
unsigned int DISP_VALS>
518 template<RunDataProcess_t T, RunDataVectProcess_t U>
519 void DtStereoSIMD(U* f,
unsigned int bp_settings_disp_vals);
521 template<RunDataVectProcess_t T>
523 T& best_disparities, T& best_vals,
524 const T& current_disparity,
const T& val_at_disp) {
525 std::cout <<
"Data type not supported for updating best disparities and values" << std::endl;
528 template<RunData_t T,
unsigned int DISP_VALS>
530 unsigned int x_val,
unsigned int y_val,
532 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
533 const T* message_u_checkerboard_0,
const T* message_d_checkerboard_0,
534 const T* message_l_checkerboard_0,
const T* message_r_checkerboard_0,
535 const T* message_u_checkerboard_1,
const T* message_d_checkerboard_1,
536 const T* message_l_checkerboard_1,
const T* message_r_checkerboard_1);
538 template<RunData_t T,
unsigned int DISP_VALS>
540 unsigned int x_val,
unsigned int y_val,
542 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
543 const T* message_u_checkerboard_0,
const T* message_d_checkerboard_0,
544 const T* message_l_checkerboard_0,
const T* message_r_checkerboard_0,
545 const T* message_u_checkerboard_1,
const T* message_d_checkerboard_1,
546 const T* message_l_checkerboard_1,
const T* message_r_checkerboard_1);
550 #if defined(COMPILING_FOR_ARM)
552 #if (CPU_VECTORIZATION_DEFINE == NEON_DEFINE)
558 #if ((CPU_VECTORIZATION_DEFINE == AVX_256_DEFINE) || (CPU_VECTORIZATION_DEFINE == AVX_256_F16_DEFINE))
560 #elif ((CPU_VECTORIZATION_DEFINE == AVX_512_DEFINE) || (CPU_VECTORIZATION_DEFINE == AVX_512_F16_DEFINE))
571 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
574 const float* image_1_pixels_device,
const float* image_2_pixels_device,
575 T* data_cost_stereo_checkerboard_0, T* data_cost_stereo_checkerboard_1,
576 float lambda_bp,
float data_k_bp,
unsigned int bp_settings_disp_vals,
579 #if defined(SET_THREAD_COUNT_INDIVIDUAL_KERNELS_CPU)
580 int num_threads_kernel{
583 #pragma omp parallel for num_threads(num_threads_kernel)
585 #pragma omp parallel for
588 for (
int val = 0; val < (current_bp_level.width_level_*current_bp_level.height_level_); val++)
590 for (
unsigned int val = 0; val < (current_bp_level.width_level_*current_bp_level.height_level_); val++)
593 const unsigned int y_val = val / current_bp_level.width_level_;
594 const unsigned int x_val = val % current_bp_level.width_level_;
596 beliefprop::InitializeBottomLevelDataPixel<T, DISP_VALS>(x_val, y_val, current_bp_level,
597 image_1_pixels_device, image_2_pixels_device,
598 data_cost_stereo_checkerboard_0, data_cost_stereo_checkerboard_1,
599 lambda_bp, data_k_bp, bp_settings_disp_vals);
604 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
609 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
610 T* data_cost_current_level,
unsigned int offset_num,
unsigned int bp_settings_disp_vals,
613 #if defined(SET_THREAD_COUNT_INDIVIDUAL_KERNELS_CPU)
616 #pragma omp parallel for num_threads(num_threads_kernel)
618 #pragma omp parallel for
621 for (
int val = 0; val < (current_bp_level.width_checkerboard_level_*current_bp_level.height_level_); val++)
623 for (
unsigned int val = 0; val < (current_bp_level.width_checkerboard_level_*current_bp_level.height_level_); val++)
626 const unsigned int y_val = val / current_bp_level.width_checkerboard_level_;
627 const unsigned int x_val = val % current_bp_level.width_checkerboard_level_;
631 if constexpr ((
sizeof(T) == 2) &&
636 beliefprop::InitializeCurrentLevelDataPixel<T, float, DISP_VALS>(
637 x_val, y_val, checkerboard_part,
638 current_bp_level, prev_bp_level,
639 data_cost_checkerboard_0, data_cost_checkerboard_1,
640 data_cost_current_level, offset_num, bp_settings_disp_vals);
644 beliefprop::InitializeCurrentLevelDataPixel<T, T, DISP_VALS>(
645 x_val, y_val, checkerboard_part,
646 current_bp_level, prev_bp_level,
647 data_cost_checkerboard_0, data_cost_checkerboard_1,
648 data_cost_current_level, offset_num, bp_settings_disp_vals);
654 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
657 T* message_u_checkerboard_0, T* message_d_checkerboard_0,
658 T* message_l_checkerboard_0, T* message_r_checkerboard_0,
659 T* message_u_checkerboard_1, T* message_d_checkerboard_1,
660 T* message_l_checkerboard_1, T* message_r_checkerboard_1,
661 unsigned int bp_settings_disp_vals,
664 #if defined(SET_THREAD_COUNT_INDIVIDUAL_KERNELS_CPU)
665 int num_threads_kernel{
668 #pragma omp parallel for num_threads(num_threads_kernel)
670 #pragma omp parallel for
673 for (
int val = 0; val < (current_bp_level.width_checkerboard_level_*current_bp_level.height_level_); val++)
675 for (
unsigned int val = 0; val < (current_bp_level.width_checkerboard_level_*current_bp_level.height_level_); val++)
678 const unsigned int y_val = val / current_bp_level.width_checkerboard_level_;
679 const unsigned int x_val_in_checkerboard = val % current_bp_level.width_checkerboard_level_;
681 beliefprop::InitializeMessageValsToDefaultKernelPixel<T, DISP_VALS>(
682 x_val_in_checkerboard, y_val, current_bp_level,
683 message_u_checkerboard_0, message_d_checkerboard_0,
684 message_l_checkerboard_0, message_r_checkerboard_0,
685 message_u_checkerboard_1, message_d_checkerboard_1,
686 message_l_checkerboard_1, message_r_checkerboard_1,
687 bp_settings_disp_vals);
691 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
695 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
696 T* message_u_checkerboard_0, T* message_d_checkerboard_0,
697 T* message_l_checkerboard_0, T* message_r_checkerboard_0,
698 T* message_u_checkerboard_1, T* message_d_checkerboard_1,
699 T* message_l_checkerboard_1, T* message_r_checkerboard_1,
700 float disc_k_bp,
unsigned int bp_settings_disp_vals,
703 const unsigned int width_checkerboard_run_processing = current_bp_level.
width_level_ / 2;
707 const bool data_aligned = beliefprop::MemoryAlignedAtDataStart<T>(
710 #if defined(SET_THREAD_COUNT_INDIVIDUAL_KERNELS_CPU)
713 #pragma omp parallel for num_threads(num_threads_kernel)
715 #pragma omp parallel for
718 for (
int val = 0; val < (width_checkerboard_run_processing * current_bp_level.height_level_); val++)
720 for (
unsigned int val = 0; val < (width_checkerboard_run_processing * current_bp_level.height_level_); val++)
723 const unsigned int y_val = val / width_checkerboard_run_processing;
724 const unsigned int x_val = val % width_checkerboard_run_processing;
728 if constexpr ((
sizeof(T) == 2) &&
733 beliefprop::RunBPIterationUsingCheckerboardUpdatesKernel<T, float, DISP_VALS>(
734 x_val, y_val, checkerboard_part_update, current_bp_level,
735 data_cost_checkerboard_0, data_cost_checkerboard_1,
736 message_u_checkerboard_0, message_d_checkerboard_0,
737 message_l_checkerboard_0, message_r_checkerboard_0,
738 message_u_checkerboard_1, message_d_checkerboard_1,
739 message_l_checkerboard_1, message_r_checkerboard_1,
740 disc_k_bp, 0, data_aligned, bp_settings_disp_vals);
745 beliefprop::RunBPIterationUsingCheckerboardUpdatesKernel<T, T, DISP_VALS>(
746 x_val, y_val, checkerboard_part_update, current_bp_level,
747 data_cost_checkerboard_0, data_cost_checkerboard_1,
748 message_u_checkerboard_0, message_d_checkerboard_0,
749 message_l_checkerboard_0, message_r_checkerboard_0,
750 message_u_checkerboard_1, message_d_checkerboard_1,
751 message_l_checkerboard_1, message_r_checkerboard_1,
752 disc_k_bp, 0, data_aligned, bp_settings_disp_vals);
757 template<RunData_t T, RunDataVect_t U,
unsigned int DISP_VALS>
759 unsigned int x_val_start_processing,
unsigned int y_val,
761 const U prev_u_message[DISP_VALS],
const U prev_d_message[DISP_VALS],
762 const U prev_l_message[DISP_VALS],
const U prev_r_message[DISP_VALS],
763 const U data_message[DISP_VALS],
764 T* current_u_message, T* current_d_message,
765 T* current_l_message, T* current_r_message,
766 const U disc_k_bp_vect,
bool data_aligned)
768 MsgStereoSIMD<T, U, DISP_VALS>(x_val_start_processing, y_val, current_bp_level,
769 prev_u_message, prev_l_message, prev_r_message, data_message, current_u_message,
770 disc_k_bp_vect, data_aligned);
772 MsgStereoSIMD<T, U, DISP_VALS>(x_val_start_processing, y_val, current_bp_level,
773 prev_d_message, prev_l_message, prev_r_message, data_message, current_d_message,
774 disc_k_bp_vect, data_aligned);
776 MsgStereoSIMD<T, U, DISP_VALS>(x_val_start_processing, y_val, current_bp_level,
777 prev_u_message, prev_d_message, prev_r_message, data_message, current_r_message,
778 disc_k_bp_vect, data_aligned);
780 MsgStereoSIMD<T, U, DISP_VALS>(x_val_start_processing, y_val, current_bp_level,
781 prev_u_message, prev_d_message, prev_l_message, data_message, current_l_message,
782 disc_k_bp_vect, data_aligned);
785 template<RunData_t T, RunDataVect_t U>
787 unsigned int x_val_start_processing,
unsigned int y_val,
789 const U* prev_u_message,
const U* prev_d_message,
790 const U* prev_l_message,
const U* prev_r_message,
791 const U* data_message,
792 T* current_u_message, T* current_d_message,
793 T* current_l_message, T* current_r_message,
794 const U disc_k_bp_vect,
bool data_aligned,
unsigned int bp_settings_disp_vals)
796 MsgStereoSIMD<T, U>(x_val_start_processing, y_val, current_bp_level,
797 prev_u_message, prev_l_message, prev_r_message, data_message, current_u_message,
798 disc_k_bp_vect, data_aligned, bp_settings_disp_vals);
800 MsgStereoSIMD<T, U>(x_val_start_processing, y_val, current_bp_level,
801 prev_d_message, prev_l_message, prev_r_message, data_message, current_d_message,
802 disc_k_bp_vect, data_aligned, bp_settings_disp_vals);
804 MsgStereoSIMD<T, U>(x_val_start_processing, y_val, current_bp_level,
805 prev_u_message, prev_d_message, prev_r_message, data_message, current_r_message,
806 disc_k_bp_vect, data_aligned, bp_settings_disp_vals);
808 MsgStereoSIMD<T, U>(x_val_start_processing, y_val, current_bp_level,
809 prev_u_message, prev_d_message, prev_l_message, data_message, current_l_message,
810 disc_k_bp_vect, data_aligned, bp_settings_disp_vals);
813 template<RunData_t T, RunDataVect_t U,
unsigned int DISP_VALS>
817 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
818 T* message_u_checkerboard_0, T* message_d_checkerboard_0,
819 T* message_l_checkerboard_0, T* message_r_checkerboard_0,
820 T* message_u_checkerboard_1, T* message_d_checkerboard_1,
821 T* message_l_checkerboard_1, T* message_r_checkerboard_1,
822 float disc_k_bp,
unsigned int bp_settings_disp_vals,
825 constexpr
size_t simd_data_size{
sizeof(U) /
sizeof(T)};
826 const unsigned int width_checkerboard_run_processing = current_bp_level.
width_level_ / 2;
827 const U disc_k_bp_vect = simd_processing::createSIMDVectorSameData<U>(disc_k_bp);
829 if constexpr (DISP_VALS > 0) {
830 #if defined(SET_THREAD_COUNT_INDIVIDUAL_KERNELS_CPU)
833 #pragma omp parallel for num_threads(num_threads_kernel)
835 #pragma omp parallel for
838 for (
int y_val = 1; y_val < current_bp_level.
height_level_ - 1; y_val++) {
840 for (
unsigned int y_val = 1; y_val < current_bp_level.
height_level_ - 1; y_val++) {
843 const unsigned int checkerboard_adjustment =
847 const unsigned int start_x = (checkerboard_adjustment == 1) ? 0 : 1;
848 const unsigned int end_final =
851 width_checkerboard_run_processing);
852 const int end_x_simd_vect_start =
853 std::max(0, (
int)(end_final / simd_data_size) * (
int)simd_data_size - (
int)simd_data_size);
855 for (
unsigned int x_val = 0; x_val < end_final; x_val += simd_data_size) {
856 unsigned int x_val_process = x_val;
863 if (((
int)x_val_process > end_x_simd_vect_start) &&
864 (end_final > simd_data_size))
866 x_val_process = end_final - simd_data_size;
871 x_val_process = std::max(start_x, x_val_process);
875 const bool data_aligned_x_val =
876 beliefprop::MemoryAlignedAtDataStart<T>(
883 U data_message[DISP_VALS], prev_u_message[DISP_VALS],
884 prev_d_message[DISP_VALS], prev_l_message[DISP_VALS],
885 prev_r_message[DISP_VALS];
888 if (data_aligned_x_val) {
889 for (
unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++) {
891 data_message[current_disparity] = simd_processing::LoadPackedDataAligned<T, U>(
892 x_val_process, y_val, current_disparity, current_bp_level,
893 DISP_VALS, data_cost_checkerboard_0);
894 prev_u_message[current_disparity] = simd_processing::LoadPackedDataAligned<T, U>(
895 x_val_process, y_val + 1, current_disparity, current_bp_level,
896 DISP_VALS, message_u_checkerboard_1);
897 prev_d_message[current_disparity] = simd_processing::LoadPackedDataAligned<T, U>(
898 x_val_process, y_val - 1, current_disparity, current_bp_level,
899 DISP_VALS, message_d_checkerboard_1);
900 prev_l_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
901 x_val_process + checkerboard_adjustment, y_val, current_disparity, current_bp_level,
902 DISP_VALS, message_l_checkerboard_1);
903 prev_r_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
904 (x_val_process + checkerboard_adjustment) - 1, y_val, current_disparity, current_bp_level,
905 DISP_VALS, message_r_checkerboard_1);
909 data_message[current_disparity] = simd_processing::LoadPackedDataAligned<T, U>(
910 x_val_process, y_val, current_disparity, current_bp_level,
911 DISP_VALS, data_cost_checkerboard_1);
912 prev_u_message[current_disparity] = simd_processing::LoadPackedDataAligned<T, U>(
913 x_val_process, y_val + 1, current_disparity, current_bp_level,
914 DISP_VALS, message_u_checkerboard_0);
915 prev_d_message[current_disparity] = simd_processing::LoadPackedDataAligned<T, U>(
916 x_val_process, y_val - 1, current_disparity, current_bp_level,
917 DISP_VALS, message_d_checkerboard_0);
918 prev_l_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
919 x_val_process + checkerboard_adjustment, y_val, current_disparity, current_bp_level,
920 DISP_VALS, message_l_checkerboard_0);
921 prev_r_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
922 (x_val_process + checkerboard_adjustment) - 1, y_val, current_disparity, current_bp_level,
923 DISP_VALS, message_r_checkerboard_0);
927 for (
unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++) {
929 data_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
930 x_val_process, y_val, current_disparity, current_bp_level,
931 DISP_VALS, data_cost_checkerboard_0);
932 prev_u_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
933 x_val_process, y_val + 1, current_disparity, current_bp_level,
934 DISP_VALS, message_u_checkerboard_1);
935 prev_d_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
936 x_val_process, y_val - 1, current_disparity, current_bp_level,
937 DISP_VALS, message_d_checkerboard_1);
938 prev_l_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
939 x_val_process + checkerboard_adjustment, y_val, current_disparity, current_bp_level,
940 DISP_VALS, message_l_checkerboard_1);
941 prev_r_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
942 (x_val_process + checkerboard_adjustment) - 1, y_val, current_disparity, current_bp_level,
943 DISP_VALS, message_r_checkerboard_1);
947 data_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
948 x_val_process, y_val, current_disparity, current_bp_level,
949 DISP_VALS, data_cost_checkerboard_1);
950 prev_u_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
951 x_val_process, y_val + 1, current_disparity, current_bp_level,
952 DISP_VALS, message_u_checkerboard_0);
953 prev_d_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
954 x_val_process, y_val - 1, current_disparity, current_bp_level,
955 DISP_VALS, message_d_checkerboard_0);
956 prev_l_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
957 x_val_process + checkerboard_adjustment, y_val, current_disparity, current_bp_level,
958 DISP_VALS, message_l_checkerboard_0);
959 prev_r_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
960 (x_val_process + checkerboard_adjustment) - 1, y_val, current_disparity, current_bp_level,
961 DISP_VALS, message_r_checkerboard_0);
967 RunBPIterationUpdateMsgValsUseSIMDVectors<T, U, DISP_VALS>(
968 x_val_process, y_val, current_bp_level,
969 prev_u_message, prev_d_message, prev_l_message, prev_r_message, data_message,
970 message_u_checkerboard_0, message_d_checkerboard_0,
971 message_l_checkerboard_0, message_r_checkerboard_0,
972 disc_k_bp_vect, data_aligned_x_val);
975 RunBPIterationUpdateMsgValsUseSIMDVectors<T, U, DISP_VALS>(
976 x_val_process, y_val, current_bp_level,
977 prev_u_message, prev_d_message, prev_l_message, prev_r_message, data_message,
978 message_u_checkerboard_1, message_d_checkerboard_1,
979 message_l_checkerboard_1, message_r_checkerboard_1,
980 disc_k_bp_vect, data_aligned_x_val);
986 #if defined(SET_THREAD_COUNT_INDIVIDUAL_KERNELS_CPU)
987 int num_threads_kernel{
990 #pragma omp parallel for num_threads(num_threads_kernel)
992 #pragma omp parallel for
995 for (
int y_val = 1; y_val < current_bp_level.
height_level_ - 1; y_val++) {
997 for (
unsigned int y_val = 1; y_val < current_bp_level.
height_level_ - 1; y_val++) {
1000 const unsigned int checkerboard_adjustment =
1004 const unsigned int start_x = (checkerboard_adjustment == 1) ? 0 : 1;
1005 const unsigned int end_final =
1008 width_checkerboard_run_processing);
1009 const int end_x_simd_vect_start =
1010 std::max(0, (
int)(end_final / simd_data_size) * (
int)simd_data_size - (
int)simd_data_size);
1012 for (
unsigned int x_val = 0; x_val < end_final; x_val += simd_data_size) {
1013 unsigned int x_val_process = x_val;
1020 if (((
int)x_val_process > end_x_simd_vect_start) &&
1021 (end_final > simd_data_size))
1023 x_val_process = end_final - simd_data_size;
1028 x_val_process = std::max(start_x, x_val_process);
1032 const bool data_aligned_x_val =
1033 beliefprop::MemoryAlignedAtDataStart<T>(
1038 U* data_message =
new U[bp_settings_disp_vals];
1039 U* prev_u_message =
new U[bp_settings_disp_vals];
1040 U* prev_d_message =
new U[bp_settings_disp_vals];
1041 U* prev_l_message =
new U[bp_settings_disp_vals];
1042 U* prev_r_message =
new U[bp_settings_disp_vals];
1045 if (data_aligned_x_val) {
1046 for (
unsigned int current_disparity = 0; current_disparity < bp_settings_disp_vals; current_disparity++) {
1048 data_message[current_disparity] = simd_processing::LoadPackedDataAligned<T, U>(
1049 x_val_process, y_val,
1050 current_disparity, current_bp_level, bp_settings_disp_vals, data_cost_checkerboard_0);
1051 prev_u_message[current_disparity] = simd_processing::LoadPackedDataAligned<T, U>(
1052 x_val_process, y_val + 1,
1053 current_disparity, current_bp_level, bp_settings_disp_vals, message_u_checkerboard_1);
1054 prev_d_message[current_disparity] = simd_processing::LoadPackedDataAligned<T, U>(
1055 x_val_process, y_val - 1,
1056 current_disparity, current_bp_level, bp_settings_disp_vals, message_d_checkerboard_1);
1057 prev_l_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
1058 x_val_process + checkerboard_adjustment, y_val,
1059 current_disparity, current_bp_level, bp_settings_disp_vals, message_l_checkerboard_1);
1060 prev_r_message[current_disparity] = simd_processing::LoadPackedDataUnaligned<T, U>(
1061 (x_val_process + checkerboard_adjustment) - 1, y_val,
1062 current_disparity, current_bp_level, bp_settings_disp_vals, message_r_checkerboard_1);
1066 data_message[current_disparity] =
1067 simd_processing::LoadPackedDataAligned<T, U>(
1068 x_val_process, y_val, current_disparity, current_bp_level,
1069 bp_settings_disp_vals, data_cost_checkerboard_1);
1070 prev_u_message[current_disparity] =
1071 simd_processing::LoadPackedDataAligned<T, U>(
1072 x_val_process, y_val + 1, current_disparity, current_bp_level,
1073 bp_settings_disp_vals, message_u_checkerboard_0);
1074 prev_d_message[current_disparity] =
1075 simd_processing::LoadPackedDataAligned<T, U>(
1076 x_val_process, y_val - 1, current_disparity, current_bp_level,
1077 bp_settings_disp_vals, message_d_checkerboard_0);
1078 prev_l_message[current_disparity] =
1079 simd_processing::LoadPackedDataUnaligned<T, U>(
1080 x_val_process + checkerboard_adjustment, y_val, current_disparity, current_bp_level,
1081 bp_settings_disp_vals, message_l_checkerboard_0);
1082 prev_r_message[current_disparity] =
1083 simd_processing::LoadPackedDataUnaligned<T, U>(
1084 (x_val_process + checkerboard_adjustment) - 1, y_val, current_disparity, current_bp_level,
1085 bp_settings_disp_vals, message_r_checkerboard_0);
1090 for (
unsigned int current_disparity = 0; current_disparity < bp_settings_disp_vals; current_disparity++) {
1092 data_message[current_disparity] =
1093 simd_processing::LoadPackedDataUnaligned<T, U>(
1094 x_val_process, y_val, current_disparity, current_bp_level,
1095 bp_settings_disp_vals, data_cost_checkerboard_0);
1096 prev_u_message[current_disparity] =
1097 simd_processing::LoadPackedDataUnaligned<T, U>(
1098 x_val_process, y_val + 1, current_disparity, current_bp_level,
1099 bp_settings_disp_vals, message_u_checkerboard_1);
1100 prev_d_message[current_disparity] =
1101 simd_processing::LoadPackedDataUnaligned<T, U>(
1102 x_val_process, y_val - 1, current_disparity, current_bp_level,
1103 bp_settings_disp_vals, message_d_checkerboard_1);
1104 prev_l_message[current_disparity] =
1105 simd_processing::LoadPackedDataUnaligned<T, U>(
1106 x_val_process + checkerboard_adjustment, y_val, current_disparity, current_bp_level,
1107 bp_settings_disp_vals, message_l_checkerboard_1);
1108 prev_r_message[current_disparity] =
1109 simd_processing::LoadPackedDataUnaligned<T, U>(
1110 (x_val_process + checkerboard_adjustment) - 1, y_val, current_disparity, current_bp_level,
1111 bp_settings_disp_vals, message_r_checkerboard_1);
1115 data_message[current_disparity] =
1116 simd_processing::LoadPackedDataUnaligned<T, U>(
1117 x_val_process, y_val, current_disparity, current_bp_level,
1118 bp_settings_disp_vals, data_cost_checkerboard_1);
1119 prev_u_message[current_disparity] =
1120 simd_processing::LoadPackedDataUnaligned<T, U>(
1121 x_val_process, y_val + 1, current_disparity, current_bp_level,
1122 bp_settings_disp_vals, message_u_checkerboard_0);
1123 prev_d_message[current_disparity] =
1124 simd_processing::LoadPackedDataUnaligned<T, U>(
1125 x_val_process, y_val - 1, current_disparity, current_bp_level,
1126 bp_settings_disp_vals, message_d_checkerboard_0);
1127 prev_l_message[current_disparity] =
1128 simd_processing::LoadPackedDataUnaligned<T, U>(
1129 x_val_process + checkerboard_adjustment, y_val, current_disparity, current_bp_level,
1130 bp_settings_disp_vals, message_l_checkerboard_0);
1131 prev_r_message[current_disparity] =
1132 simd_processing::LoadPackedDataUnaligned<T, U>(
1133 (x_val_process + checkerboard_adjustment) - 1, y_val, current_disparity, current_bp_level,
1134 bp_settings_disp_vals, message_r_checkerboard_0);
1140 RunBPIterationUpdateMsgValsUseSIMDVectors<T, U>(
1141 x_val_process, y_val, current_bp_level,
1142 prev_u_message, prev_d_message, prev_l_message, prev_r_message, data_message,
1143 message_u_checkerboard_0, message_d_checkerboard_0,
1144 message_l_checkerboard_0, message_r_checkerboard_0,
1145 disc_k_bp_vect, data_aligned_x_val, bp_settings_disp_vals);
1148 RunBPIterationUpdateMsgValsUseSIMDVectors<T, U>(
1149 x_val_process, y_val, current_bp_level,
1150 prev_u_message, prev_d_message, prev_l_message, prev_r_message, data_message,
1151 message_u_checkerboard_1, message_d_checkerboard_1,
1152 message_l_checkerboard_1, message_r_checkerboard_1,
1153 disc_k_bp_vect, data_aligned_x_val, bp_settings_disp_vals);
1156 delete [] data_message;
1157 delete [] prev_u_message;
1158 delete [] prev_d_message;
1159 delete [] prev_l_message;
1160 delete [] prev_r_message;
1170 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
1174 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
1175 T* message_u_checkerboard_0, T* message_d_checkerboard_0,
1176 T* message_l_checkerboard_0, T* message_r_checkerboard_0,
1177 T* message_u_checkerboard_1, T* message_d_checkerboard_1,
1178 T* message_l_checkerboard_1, T* message_r_checkerboard_1,
1179 float disc_k_bp,
unsigned int bp_settings_num_disp_vals,
1182 #if defined(COMPILING_FOR_ARM)
1191 constexpr
size_t kNEONSimdBytes{128 / 8};
1193 ((
sizeof(T) == 2) &&
1196 RunBPIterationUsingCheckerboardUpdatesUseSIMDVectorsNEON<DISP_VALS, ACCELERATION>(
1197 checkerboard_to_update, current_bp_level,
1198 data_cost_checkerboard_0, data_cost_checkerboard_1,
1199 message_u_checkerboard_0, message_d_checkerboard_0,
1200 message_l_checkerboard_0, message_r_checkerboard_0,
1201 message_u_checkerboard_1, message_d_checkerboard_1,
1202 message_l_checkerboard_1, message_r_checkerboard_1,
1203 disc_k_bp, bp_settings_num_disp_vals, opt_cpu_params);
1210 #if (((CPU_VECTORIZATION_DEFINE == AVX_256_DEFINE) || (CPU_VECTORIZATION_DEFINE == AVX_256_F16_DEFINE)) || ((CPU_VECTORIZATION_DEFINE == AVX_512_DEFINE) || (CPU_VECTORIZATION_DEFINE == AVX_512_F16_DEFINE)))
1220 constexpr
size_t kAVX256SimdBytes{256 / 8};
1225 RunBPIterationUsingCheckerboardUpdatesUseSIMDVectorsAVX256<DISP_VALS, ACCELERATION>(
1226 checkerboard_to_update, current_bp_level,
1227 data_cost_checkerboard_0, data_cost_checkerboard_1,
1228 message_u_checkerboard_0, message_d_checkerboard_0,
1229 message_l_checkerboard_0, message_r_checkerboard_0,
1230 message_u_checkerboard_1, message_d_checkerboard_1,
1231 message_l_checkerboard_1, message_r_checkerboard_1,
1232 disc_k_bp, bp_settings_num_disp_vals, opt_cpu_params);
1239 #if ((CPU_VECTORIZATION_DEFINE == AVX_512_DEFINE) || (CPU_VECTORIZATION_DEFINE == AVX_512_F16_DEFINE))
1249 constexpr
size_t kAVX512SimdBytes{512 / 8};
1254 RunBPIterationUsingCheckerboardUpdatesUseSIMDVectorsAVX512<DISP_VALS, ACCELERATION>(
1255 checkerboard_to_update, current_bp_level,
1256 data_cost_checkerboard_0, data_cost_checkerboard_1,
1257 message_u_checkerboard_0, message_d_checkerboard_0,
1258 message_l_checkerboard_0, message_r_checkerboard_0,
1259 message_u_checkerboard_1, message_d_checkerboard_1,
1260 message_l_checkerboard_1, message_r_checkerboard_1,
1261 disc_k_bp, bp_settings_num_disp_vals, opt_cpu_params);
1271 RunBPIterationUsingCheckerboardUpdatesNoPackedInstructions<T, DISP_VALS, ACCELERATION>(
1272 checkerboard_to_update, current_bp_level,
1273 data_cost_checkerboard_0, data_cost_checkerboard_1,
1274 message_u_checkerboard_0, message_d_checkerboard_0,
1275 message_l_checkerboard_0, message_r_checkerboard_0,
1276 message_u_checkerboard_1, message_d_checkerboard_1,
1277 message_l_checkerboard_1, message_r_checkerboard_1,
1278 disc_k_bp, bp_settings_num_disp_vals, opt_cpu_params);
1283 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
1288 const T* message_u_prev_checkerboard_0,
const T* message_d_prev_checkerboard_0,
1289 const T* message_l_prev_checkerboard_0,
const T* message_r_prev_checkerboard_0,
1290 const T* message_u_prev_checkerboard_1,
const T* message_d_prev_checkerboard_1,
1291 const T* message_l_prev_checkerboard_1,
const T* message_r_prev_checkerboard_1,
1292 T* message_u_checkerboard_0, T* message_d_checkerboard_0,
1293 T* message_l_checkerboard_0, T* message_r_checkerboard_0,
1294 T* message_u_checkerboard_1, T* message_d_checkerboard_1,
1295 T* message_l_checkerboard_1, T* message_r_checkerboard_1,
1296 unsigned int bp_settings_disp_vals,
1299 #if defined(SET_THREAD_COUNT_INDIVIDUAL_KERNELS_CPU)
1302 #pragma omp parallel for num_threads(num_threads_kernel)
1304 #pragma omp parallel for
1307 for (
int val = 0; val < (current_bp_level.width_checkerboard_level_*current_bp_level.height_level_); val++)
1309 for (
unsigned int val = 0; val < (current_bp_level.width_checkerboard_level_*current_bp_level.height_level_); val++)
1312 const unsigned int y_val = val / current_bp_level.width_checkerboard_level_;
1313 const unsigned int x_val = val % current_bp_level.width_checkerboard_level_;
1315 beliefprop::CopyMsgDataToNextLevelPixel<T, DISP_VALS>(
1316 x_val, y_val, checkerboard_part,
1317 current_bp_level, next_bp_level,
1318 message_u_prev_checkerboard_0, message_d_prev_checkerboard_0,
1319 message_l_prev_checkerboard_0, message_r_prev_checkerboard_0,
1320 message_u_prev_checkerboard_1, message_d_prev_checkerboard_1,
1321 message_l_prev_checkerboard_1, message_r_prev_checkerboard_1,
1322 message_u_checkerboard_0, message_d_checkerboard_0,
1323 message_l_checkerboard_0, message_r_checkerboard_0,
1324 message_u_checkerboard_1, message_d_checkerboard_1,
1325 message_l_checkerboard_1, message_r_checkerboard_1,
1326 bp_settings_disp_vals);
1330 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
1333 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
1334 const T* message_u_checkerboard_0,
const T* message_d_checkerboard_0,
1335 const T* message_l_checkerboard_0,
const T* message_r_checkerboard_0,
1336 const T* message_u_checkerboard_1,
const T* message_d_checkerboard_1,
1337 const T* message_l_checkerboard_1,
const T* message_r_checkerboard_1,
1338 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
1342 #if defined(SET_THREAD_COUNT_INDIVIDUAL_KERNELS_CPU)
1343 int num_threads_kernel{
1346 #pragma omp parallel for num_threads(num_threads_kernel)
1348 #pragma omp parallel for
1361 if constexpr ((
sizeof(T) == 2) &&
1366 beliefprop::RetrieveOutputDisparityPixel<T, float, DISP_VALS>(
1367 x_val, y_val, current_bp_level,
1368 data_cost_checkerboard_0, data_cost_checkerboard_1,
1369 message_u_checkerboard_0, message_d_checkerboard_0,
1370 message_l_checkerboard_0, message_r_checkerboard_0,
1371 message_u_checkerboard_1, message_d_checkerboard_1,
1372 message_l_checkerboard_1, message_r_checkerboard_1,
1373 disparity_between_images_device, bp_settings_disp_vals);
1377 beliefprop::RetrieveOutputDisparityPixel<T, T, DISP_VALS>(
1378 x_val, y_val, current_bp_level,
1379 data_cost_checkerboard_0, data_cost_checkerboard_1,
1380 message_u_checkerboard_0, message_d_checkerboard_0,
1381 message_l_checkerboard_0, message_r_checkerboard_0,
1382 message_u_checkerboard_1, message_d_checkerboard_1,
1383 message_l_checkerboard_1, message_r_checkerboard_1,
1384 disparity_between_images_device, bp_settings_disp_vals);
1389 #if defined(COMPILING_FOR_ARM)
1390 RetrieveOutputDisparityUseSIMDVectorsNEON<DISP_VALS, ACCELERATION>(
1392 data_cost_checkerboard_0, data_cost_checkerboard_1,
1393 message_u_checkerboard_0, message_d_checkerboard_0,
1394 message_l_checkerboard_0, message_r_checkerboard_0,
1395 message_u_checkerboard_1, message_d_checkerboard_1,
1396 message_l_checkerboard_1, message_r_checkerboard_1,
1397 disparity_between_images_device, bp_settings_disp_vals, opt_cpu_params);
1403 #if ((CPU_VECTORIZATION_DEFINE == AVX_512_DEFINE) || (CPU_VECTORIZATION_DEFINE == AVX_512_F16_DEFINE))
1404 RetrieveOutputDisparityUseSIMDVectorsAVX512<DISP_VALS, ACCELERATION>(
1406 data_cost_checkerboard_0, data_cost_checkerboard_1,
1407 message_u_checkerboard_0, message_d_checkerboard_0,
1408 message_l_checkerboard_0, message_r_checkerboard_0,
1409 message_u_checkerboard_1, message_d_checkerboard_1,
1410 message_l_checkerboard_1, message_r_checkerboard_1,
1411 disparity_between_images_device, bp_settings_disp_vals, opt_cpu_params);
1416 RetrieveOutputDisparityUseSIMDVectorsAVX256<DISP_VALS, ACCELERATION>(
1418 data_cost_checkerboard_0, data_cost_checkerboard_1,
1419 message_u_checkerboard_0, message_d_checkerboard_0,
1420 message_l_checkerboard_0, message_r_checkerboard_0,
1421 message_u_checkerboard_1, message_d_checkerboard_1,
1422 message_l_checkerboard_1, message_r_checkerboard_1,
1423 disparity_between_images_device, bp_settings_disp_vals, opt_cpu_params);
1430 template<RunData_t T, RunDataVect_t U, RunDataProcess_t V, RunDataVectProcess_t W,
unsigned int DISP_VALS>
1433 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
1434 const T* message_u_checkerboard_0,
const T* message_d_checkerboard_0,
1435 const T* message_l_checkerboard_0,
const T* message_r_checkerboard_0,
1436 const T* message_u_checkerboard_1,
const T* message_d_checkerboard_1,
1437 const T* message_l_checkerboard_1,
const T* message_r_checkerboard_1,
1438 float* disparity_between_images_device,
unsigned int bp_settings_disp_vals,
1441 constexpr
size_t simd_data_size{
sizeof(W) /
sizeof(V)};
1442 const unsigned int width_checkerboard_run_processing = current_bp_level.
width_level_ / 2;
1446 unsigned int width_disp_checkerboard =
1451 const unsigned int num_data_disp_checkerboard = width_disp_checkerboard * current_bp_level.
height_level_;
1453 V* disparity_checkerboard_0 =
1455 _aligned_malloc(2 * num_data_disp_checkerboard *
sizeof(V), current_bp_level.
bytes_align_memory_));
1457 V* disparity_checkerboard_0 =
1458 static_cast<V*
>(std::aligned_alloc(
1465 #if defined(SET_THREAD_COUNT_INDIVIDUAL_KERNELS_CPU)
1466 int num_threads_kernel{
1469 #pragma omp parallel for num_threads(num_threads_kernel)
1471 #pragma omp parallel for
1474 for (
int y_val = 1; y_val < current_bp_level.
height_level_ - 1; y_val++) {
1476 for (
unsigned int y_val = 1; y_val < current_bp_level.
height_level_ - 1; y_val++) {
1479 const unsigned int checkerboard_adjustment =
1483 const unsigned int start_x = (checkerboard_adjustment == 1) ? 0 : 1;
1484 const unsigned int end_final =
1487 width_checkerboard_run_processing);
1488 const int end_x_simd_vect_start =
1489 std::max(0, (
int)(end_final / simd_data_size) * (
int)simd_data_size - (
int)simd_data_size);
1491 for (
unsigned int x_val = 0; x_val < end_final; x_val += simd_data_size) {
1492 unsigned int x_val_process = x_val;
1498 if (((
int)x_val_process > end_x_simd_vect_start) &&
1499 (end_final > simd_data_size))
1501 x_val_process = end_final - simd_data_size;
1506 x_val_process = std::max(start_x, x_val_process);
1509 const unsigned int index_output = (y_val * width_disp_checkerboard) + x_val_process;
1512 const bool data_aligned_x_val =
1513 beliefprop::MemoryAlignedAtDataStart<T>(
1523 W best_vals, best_disparities, val_at_disp;
1526 if constexpr (DISP_VALS > 0) {
1527 for (
unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++) {
1529 if (data_aligned_x_val) {
1531 val_at_disp = simd_processing::AddVals<U, U, W>(
1532 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val + 1,
1533 current_disparity, current_bp_level, DISP_VALS, message_u_checkerboard_1),
1534 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val - 1,
1535 current_disparity, current_bp_level, DISP_VALS, message_d_checkerboard_1));
1536 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1537 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process + checkerboard_adjustment, y_val,
1538 current_disparity, current_bp_level, DISP_VALS, message_l_checkerboard_1));
1539 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1540 simd_processing::LoadPackedDataUnaligned<T, U>((x_val_process + checkerboard_adjustment) - 1, y_val,
1541 current_disparity, current_bp_level, DISP_VALS, message_r_checkerboard_1));
1542 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1543 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val,
1544 current_disparity, current_bp_level, DISP_VALS, data_cost_checkerboard_0));
1548 val_at_disp = simd_processing::AddVals<U, U, W>(
1549 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val + 1,
1550 current_disparity, current_bp_level, DISP_VALS, message_u_checkerboard_1),
1551 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val - 1,
1552 current_disparity, current_bp_level, DISP_VALS, message_d_checkerboard_1));
1553 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1554 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process + checkerboard_adjustment, y_val,
1555 current_disparity, current_bp_level, DISP_VALS, message_l_checkerboard_1));
1556 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1557 simd_processing::LoadPackedDataUnaligned<T, U>((x_val_process + checkerboard_adjustment) - 1, y_val,
1558 current_disparity, current_bp_level, DISP_VALS, message_r_checkerboard_1));
1559 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1560 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val,
1561 current_disparity, current_bp_level, DISP_VALS, data_cost_checkerboard_0));
1566 if (data_aligned_x_val) {
1568 val_at_disp = simd_processing::AddVals<U, U, W>(
1569 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val + 1,
1570 current_disparity, current_bp_level, DISP_VALS, message_u_checkerboard_0),
1571 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val - 1,
1572 current_disparity, current_bp_level, DISP_VALS, message_d_checkerboard_0));
1573 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1574 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process + checkerboard_adjustment, y_val,
1575 current_disparity, current_bp_level, DISP_VALS, message_l_checkerboard_0));
1576 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1577 simd_processing::LoadPackedDataUnaligned<T, U>((x_val_process + checkerboard_adjustment) - 1, y_val,
1578 current_disparity, current_bp_level, DISP_VALS, message_r_checkerboard_0));
1579 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1580 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val,
1581 current_disparity, current_bp_level, DISP_VALS, data_cost_checkerboard_1));
1585 val_at_disp = simd_processing::AddVals<U, U, W>(
1586 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val + 1,
1587 current_disparity, current_bp_level, DISP_VALS, message_u_checkerboard_0),
1588 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val - 1,
1589 current_disparity, current_bp_level, DISP_VALS, message_d_checkerboard_0));
1590 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1591 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process + checkerboard_adjustment, y_val,
1592 current_disparity, current_bp_level, DISP_VALS, message_l_checkerboard_0));
1593 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1594 simd_processing::LoadPackedDataUnaligned<T, U>((x_val_process + checkerboard_adjustment) - 1, y_val,
1595 current_disparity, current_bp_level, DISP_VALS, message_r_checkerboard_0));
1596 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1597 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val,
1598 current_disparity, current_bp_level, DISP_VALS, data_cost_checkerboard_1));
1601 if (current_disparity == 0) {
1602 best_vals = val_at_disp;
1604 best_disparities = simd_processing::createSIMDVectorSameData<W>(0.0f);
1611 simd_processing::createSIMDVectorSameData<W>((
float)current_disparity), val_at_disp);
1614 if (data_aligned_x_val) {
1616 simd_processing::StorePackedDataAligned<V, W>(
1617 index_output, disparity_checkerboard_0, best_disparities);
1621 simd_processing::StorePackedDataAligned<V, W>(
1622 num_data_disp_checkerboard + index_output, disparity_checkerboard_0, best_disparities);
1627 simd_processing::StorePackedDataUnaligned<V, W>(
1628 index_output, disparity_checkerboard_0, best_disparities);
1632 simd_processing::StorePackedDataUnaligned<V, W>(
1633 num_data_disp_checkerboard + index_output, disparity_checkerboard_0, best_disparities);
1638 for (
unsigned int current_disparity = 0; current_disparity < bp_settings_disp_vals; current_disparity++) {
1640 if (data_aligned_x_val) {
1642 val_at_disp = simd_processing::AddVals<U, U, W>(
1643 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val + 1,
1644 current_disparity, current_bp_level, bp_settings_disp_vals, message_u_checkerboard_1),
1645 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val - 1,
1646 current_disparity, current_bp_level, bp_settings_disp_vals, message_d_checkerboard_1));
1647 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1648 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process + checkerboard_adjustment, y_val,
1649 current_disparity, current_bp_level, bp_settings_disp_vals, message_l_checkerboard_1));
1650 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1651 simd_processing::LoadPackedDataUnaligned<T, U>((x_val_process + checkerboard_adjustment) - 1, y_val,
1652 current_disparity, current_bp_level, bp_settings_disp_vals, message_r_checkerboard_1));
1653 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1654 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val,
1655 current_disparity, current_bp_level, bp_settings_disp_vals, data_cost_checkerboard_0));
1659 val_at_disp = simd_processing::AddVals<U, U, W>(
1660 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val + 1,
1661 current_disparity, current_bp_level, bp_settings_disp_vals, message_u_checkerboard_1),
1662 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val - 1,
1663 current_disparity, current_bp_level, bp_settings_disp_vals, message_d_checkerboard_1));
1664 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1665 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process + checkerboard_adjustment, y_val,
1666 current_disparity, current_bp_level, bp_settings_disp_vals, message_l_checkerboard_1));
1667 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1668 simd_processing::LoadPackedDataUnaligned<T, U>((x_val_process + checkerboard_adjustment) - 1, y_val,
1669 current_disparity, current_bp_level, bp_settings_disp_vals, message_r_checkerboard_1));
1670 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1671 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val,
1672 current_disparity, current_bp_level, bp_settings_disp_vals, data_cost_checkerboard_0));
1677 if (data_aligned_x_val) {
1679 val_at_disp = simd_processing::AddVals<U, U, W>(
1680 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val + 1,
1681 current_disparity, current_bp_level, bp_settings_disp_vals, message_u_checkerboard_0),
1682 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val - 1,
1683 current_disparity, current_bp_level, bp_settings_disp_vals, message_d_checkerboard_0));
1684 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1685 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process + checkerboard_adjustment, y_val,
1686 current_disparity, current_bp_level, bp_settings_disp_vals, message_l_checkerboard_0));
1687 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1688 simd_processing::LoadPackedDataUnaligned<T, U>((x_val_process + checkerboard_adjustment) - 1, y_val,
1689 current_disparity, current_bp_level, bp_settings_disp_vals, message_r_checkerboard_0));
1690 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1691 simd_processing::LoadPackedDataAligned<T, U>(x_val_process, y_val,
1692 current_disparity, current_bp_level, bp_settings_disp_vals, data_cost_checkerboard_1));
1696 val_at_disp = simd_processing::AddVals<U, U, W>(
1697 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val + 1,
1698 current_disparity, current_bp_level, bp_settings_disp_vals, message_u_checkerboard_0),
1699 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val - 1,
1700 current_disparity, current_bp_level, bp_settings_disp_vals, message_d_checkerboard_0));
1701 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1702 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process + checkerboard_adjustment, y_val,
1703 current_disparity, current_bp_level, bp_settings_disp_vals, message_l_checkerboard_0));
1704 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1705 simd_processing::LoadPackedDataUnaligned<T, U>((x_val_process + checkerboard_adjustment) - 1, y_val,
1706 current_disparity, current_bp_level, bp_settings_disp_vals, message_r_checkerboard_0));
1707 val_at_disp = simd_processing::AddVals<W, U, W>(val_at_disp,
1708 simd_processing::LoadPackedDataUnaligned<T, U>(x_val_process, y_val,
1709 current_disparity, current_bp_level, bp_settings_disp_vals, data_cost_checkerboard_1));
1712 if (current_disparity == 0) {
1713 best_vals = val_at_disp;
1715 best_disparities = simd_processing::createSIMDVectorSameData<W>(0.0f);
1724 simd_processing::createSIMDVectorSameData<W>((
float)current_disparity),
1729 if (data_aligned_x_val) {
1731 simd_processing::StorePackedDataAligned<V, W>(
1732 index_output, disparity_checkerboard_0, best_disparities);
1736 simd_processing::StorePackedDataAligned<V, W>(
1737 num_data_disp_checkerboard + index_output, disparity_checkerboard_0, best_disparities);
1742 simd_processing::StorePackedDataUnaligned<V, W>(
1743 index_output, disparity_checkerboard_0, best_disparities);
1747 simd_processing::StorePackedDataUnaligned<V, W>(
1748 num_data_disp_checkerboard + index_output, disparity_checkerboard_0, best_disparities);
1759 #if defined(SET_THREAD_COUNT_INDIVIDUAL_KERNELS_CPU)
1760 int num_threads_kernel{
1763 #pragma omp parallel for num_threads(num_threads_kernel)
1765 #pragma omp parallel for
1768 for (
int y=0; y < current_bp_level.height_level_; y++)
1770 for (
unsigned int y=0; y < current_bp_level.height_level_; y++)
1773 const bool start_checkerboard_0 = ((y%2) == 0);
1774 unsigned int checkerboard_index = y * width_disp_checkerboard;
1775 for (
unsigned int x=0; x < (current_bp_level.width_level_); x += 2) {
1776 if ((y == 0) || (y == (current_bp_level.height_level_ - 1))) {
1777 disparity_between_images_device[y * current_bp_level.width_level_ + (x + 0)] = 0;
1778 disparity_between_images_device[y * current_bp_level.width_level_ + (x + 1)] = 0;
1781 if (start_checkerboard_0) {
1782 if ((x == 0) || (x == (current_bp_level.width_level_ - 1))) {
1783 disparity_between_images_device[y * current_bp_level.width_level_ + (x + 0)] = 0;
1786 disparity_between_images_device[y * current_bp_level.width_level_ + (x + 0)] =
1787 (
float)disparity_checkerboard_0[checkerboard_index];
1789 if ((x + 1) == (current_bp_level.width_level_ - 1)) {
1790 disparity_between_images_device[y * current_bp_level.width_level_ + (x + 1)] = 0;
1792 else if ((x + 1) < current_bp_level.width_level_) {
1793 disparity_between_images_device[y * current_bp_level.width_level_ + (x + 1)] =
1794 (
float)disparity_checkerboard_0[num_data_disp_checkerboard + checkerboard_index];
1798 if ((x == 0) || (x == (current_bp_level.width_level_ - 1))) {
1799 disparity_between_images_device[y * current_bp_level.width_level_ + (x + 0)] = 0;
1802 disparity_between_images_device[y * current_bp_level.width_level_ + (x + 0)] =
1803 (
float)disparity_checkerboard_0[num_data_disp_checkerboard + checkerboard_index];
1805 if ((x + 1) == (current_bp_level.width_level_ - 1)) {
1806 disparity_between_images_device[y * current_bp_level.width_level_ + (x + 1)] = 0;
1808 else if ((x + 1) < current_bp_level.width_level_) {
1809 disparity_between_images_device[y * current_bp_level.width_level_ + (x + 1)] =
1810 (
float)disparity_checkerboard_0[checkerboard_index];
1814 checkerboard_index++;
1820 free(disparity_checkerboard_0);
1825 template<RunDataProcess_t T, RunDataVectProcess_t U,
unsigned int DISP_VALS>
1829 const U vector_all_one_val = simd_processing::ConvertValToDatatype<U, T>(1.0f);
1830 for (
unsigned int current_disparity = 1; current_disparity < DISP_VALS; current_disparity++)
1833 prev = simd_processing::AddVals<U, U, U>(f[current_disparity - 1], vector_all_one_val);
1837 f[current_disparity] = simd_processing::GetMinByElement<U>(prev, f[current_disparity]);
1840 for (
int current_disparity = (
int)DISP_VALS-2; current_disparity >= 0; current_disparity--)
1843 prev = simd_processing::AddVals<U, U, U>(f[current_disparity + 1], vector_all_one_val);
1847 f[current_disparity] = simd_processing::GetMinByElement<U>(prev, f[current_disparity]);
1852 template<RunData_t T, RunDataVect_t U, RunDataProcess_t V, RunDataVectProcess_t W,
unsigned int DISP_VALS>
1854 unsigned int x_val,
unsigned int y_val,
1856 const U messages_neighbor_1[DISP_VALS],
const U messages_neighbor_2[DISP_VALS],
1857 const U messages_neighbor_3[DISP_VALS],
const U data_costs[DISP_VALS],
1858 T* dst_message_array,
const U& disc_k_bp,
bool data_aligned)
1862 W minimum = simd_processing::ConvertValToDatatype<W, V>(beliefprop::kHighValBp<V>);
1865 for (
unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++)
1869 dst[current_disparity] =
1870 simd_processing::AddVals<U, U, W>(
1871 messages_neighbor_1[current_disparity],
1872 messages_neighbor_2[current_disparity]);
1873 dst[current_disparity] =
1874 simd_processing::AddVals<W, U, W>(
1875 dst[current_disparity],
1876 messages_neighbor_3[current_disparity]);
1877 dst[current_disparity] =
1878 simd_processing::AddVals<W, U, W>(
1879 dst[current_disparity],
1880 data_costs[current_disparity]);
1885 simd_processing::GetMinByElement<W>(
1887 dst[current_disparity]);
1892 DtStereoSIMD<V, W, DISP_VALS>(dst);
1897 simd_processing::AddVals<W, U, W>(
1903 W val_to_normalize = simd_processing::ConvertValToDatatype<W, V>(0.0);
1905 for (
unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++)
1910 dst[current_disparity] =
1911 simd_processing::GetMinByElement<W>(
1913 dst[current_disparity]);
1917 simd_processing::AddVals<W, W, W>(
1919 dst[current_disparity]);
1924 simd_processing::divideVals<W, W, W>(
1926 simd_processing::ConvertValToDatatype<W, V>((
double)DISP_VALS));
1928 unsigned int dest_message_array_index =
1933 for (
unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++)
1936 dst[current_disparity] =
1937 simd_processing::SubtractVals<W, W, W>(
1938 dst[current_disparity],
1942 simd_processing::StorePackedDataAligned<T, W>(
1943 dest_message_array_index,
1945 dst[current_disparity]);
1948 simd_processing::StorePackedDataUnaligned<T, W>(
1949 dest_message_array_index,
1951 dst[current_disparity]);
1955 dest_message_array_index +=
1959 dest_message_array_index++;
1966 template<RunDataProcess_t T, RunDataVectProcess_t U>
1970 const U vector_all_one_val = simd_processing::ConvertValToDatatype<U, T>(1.0f);
1971 for (
unsigned int current_disparity = 1; current_disparity < bp_settings_disp_vals; current_disparity++)
1975 simd_processing::AddVals<U, U, U>(
1976 f[current_disparity - 1],
1977 vector_all_one_val);
1981 f[current_disparity] =
1982 simd_processing::GetMinByElement<U>(
1984 f[current_disparity]);
1987 for (
int current_disparity = (
int)bp_settings_disp_vals-2; current_disparity >= 0; current_disparity--)
1991 simd_processing::AddVals<U, U, U>(
1992 f[current_disparity + 1],
1993 vector_all_one_val);
1997 f[current_disparity] =
1998 simd_processing::GetMinByElement<U>(
2000 f[current_disparity]);
2005 template<RunData_t T, RunDataVect_t U, RunDataProcess_t V, RunDataVectProcess_t W>
2007 unsigned int x_val,
unsigned int y_val,
2009 const U* messages_neighbor_1,
const U* messages_neighbor_2,
2010 const U* messages_neighbor_3,
const U* data_costs,
2011 T* dst_message_array,
2012 const U& disc_k_bp,
bool data_aligned,
2013 unsigned int bp_settings_disp_vals)
2017 W minimum = simd_processing::ConvertValToDatatype<W, V>(beliefprop::kHighValBp<V>);
2018 W* dst =
new W[bp_settings_disp_vals];
2020 for (
unsigned int current_disparity = 0; current_disparity < bp_settings_disp_vals; current_disparity++)
2024 dst[current_disparity] =
2025 simd_processing::AddVals<U, U, W>(
2026 messages_neighbor_1[current_disparity],
2027 messages_neighbor_2[current_disparity]);
2028 dst[current_disparity] =
2029 simd_processing::AddVals<W, U, W>(
2030 dst[current_disparity],
2031 messages_neighbor_3[current_disparity]);
2032 dst[current_disparity] =
2033 simd_processing::AddVals<W, U, W>(
2034 dst[current_disparity],
2035 data_costs[current_disparity]);
2040 simd_processing::GetMinByElement<W>(
2042 dst[current_disparity]);
2047 DtStereoSIMD<V, W>(dst, bp_settings_disp_vals);
2052 simd_processing::AddVals<W, U, W>(
2058 W val_to_normalize = simd_processing::ConvertValToDatatype<W, V>(0.0f);
2060 for (
unsigned int current_disparity = 0; current_disparity < bp_settings_disp_vals; current_disparity++)
2065 dst[current_disparity] =
2066 simd_processing::GetMinByElement<W>(
2068 dst[current_disparity]);
2072 simd_processing::AddVals<W, W, W>(
2074 dst[current_disparity]);
2079 simd_processing::divideVals<W, W, W>(
2081 simd_processing::ConvertValToDatatype<W, V>((
float)bp_settings_disp_vals));
2083 unsigned int dest_message_array_index =
2088 for (
unsigned int current_disparity = 0; current_disparity < bp_settings_disp_vals; current_disparity++)
2091 dst[current_disparity] =
2092 simd_processing::SubtractVals<W, W, W>(
2093 dst[current_disparity],
2097 simd_processing::StorePackedDataAligned<T, W>(
2098 dest_message_array_index,
2100 dst[current_disparity]);
2103 simd_processing::StorePackedDataUnaligned<T, W>(
2104 dest_message_array_index,
2106 dst[current_disparity]);
2110 dest_message_array_index +=
2114 dest_message_array_index++;
2122 template<RunData_t T, RunDataVect_t U,
unsigned int DISP_VALS>
2124 unsigned int x_val,
unsigned int y_val,
2126 const U messages_neighbor_1[DISP_VALS],
const U messages_neighbor_2[DISP_VALS],
2127 const U messages_neighbor_3[DISP_VALS],
const U data_costs[DISP_VALS],
2128 T* dst_message_array,
2129 const U& disc_k_bp,
bool data_aligned)
2131 MsgStereoSIMDProcessing<T, U, T, U, DISP_VALS>(x_val, y_val,
2132 current_bp_level, messages_neighbor_1, messages_neighbor_2,
2133 messages_neighbor_3, data_costs, dst_message_array, disc_k_bp, data_aligned);
2137 template<RunData_t T, RunDataVect_t U>
2139 unsigned int x_val,
unsigned int y_val,
2141 const U* messages_neighbor_1,
const U* messages_neighbor_2,
2142 const U* messages_neighbor_3,
const U* data_costs,
2143 T* dst_message_array,
2144 const U& disc_k_bp,
bool data_aligned,
unsigned int bp_settings_disp_vals)
2146 MsgStereoSIMDProcessing<T, U, T, U>(
2147 x_val, y_val, current_bp_level,
2148 messages_neighbor_1, messages_neighbor_2,
2149 messages_neighbor_3, data_costs,
2150 dst_message_array, disc_k_bp, data_aligned,
2151 bp_settings_disp_vals);
2154 template<RunData_t T,
unsigned int DISP_VALS>
2156 unsigned int x_val,
unsigned int y_val,
2158 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
2159 const T* message_u_checkerboard_0,
const T* message_d_checkerboard_0,
2160 const T* message_l_checkerboard_0,
const T* message_r_checkerboard_0,
2161 const T* message_u_checkerboard_1,
const T* message_d_checkerboard_1,
2162 const T* message_l_checkerboard_1,
const T* message_r_checkerboard_1)
2164 if (((x_val + y_val) % 2) == 0) {
2165 printf(
"x_val: %u\n", x_val);
2166 printf(
"y_val: %u\n", y_val);
2167 for (
unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++) {
2168 printf(
"DISP: %u\n", current_disparity);
2169 printf(
"messageUPrevStereoCheckerboard: %f \n",
2172 current_disparity, DISP_VALS)]);
2173 printf(
"messageDPrevStereoCheckerboard: %f \n",
2176 current_disparity, DISP_VALS)]);
2177 printf(
"messageLPrevStereoCheckerboard: %f \n",
2180 current_disparity, DISP_VALS)]);
2181 printf(
"messageRPrevStereoCheckerboard: %f \n",
2184 current_disparity, DISP_VALS)]);
2185 printf(
"dataCostStereoCheckerboard: %f \n",
2188 current_disparity, DISP_VALS)]);
2191 printf(
"x_val: %u\n", x_val);
2192 printf(
"y_val: %u\n", y_val);
2193 for (
unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++) {
2194 printf(
"DISP: %u\n", current_disparity);
2195 printf(
"messageUPrevStereoCheckerboard: %f \n",
2198 current_disparity, DISP_VALS)]);
2199 printf(
"messageDPrevStereoCheckerboard: %f \n",
2202 current_disparity, DISP_VALS)]);
2203 printf(
"messageLPrevStereoCheckerboard: %f \n",
2206 current_disparity, DISP_VALS)]);
2207 printf(
"messageRPrevStereoCheckerboard: %f \n",
2210 current_disparity, DISP_VALS)]);
2211 printf(
"dataCostStereoCheckerboard: %f \n",
2214 current_disparity, DISP_VALS)]);
2219 template<RunData_t T,
unsigned int DISP_VALS>
2221 unsigned int x_val,
unsigned int y_val,
2223 const T* data_cost_checkerboard_0,
const T* data_cost_checkerboard_1,
2224 const T* message_u_checkerboard_0,
const T* message_d_checkerboard_0,
2225 const T* message_l_checkerboard_0,
const T* message_r_checkerboard_0,
2226 const T* message_u_checkerboard_1,
const T* message_d_checkerboard_1,
2227 const T* message_l_checkerboard_1,
const T* message_r_checkerboard_1)
2229 const unsigned int checkerboard_adjustment = (((x_val + y_val) % 2) == 0) ? ((y_val)%2) : ((y_val+1)%2);
2230 if (((x_val + y_val) % 2) == 0) {
2233 printf(
"x_val: %u\n", x_val);
2234 printf(
"y_val: %u\n", y_val);
2235 for (
unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++) {
2236 printf(
"DISP: %u\n", current_disparity);
2237 printf(
"messageUPrevStereoCheckerboard: %f \n",
2240 current_disparity, DISP_VALS)]);
2241 printf(
"messageDPrevStereoCheckerboard: %f \n",
2244 current_disparity, DISP_VALS)]);
2245 printf(
"messageLPrevStereoCheckerboard: %f \n",
2248 current_disparity, DISP_VALS)]);
2249 printf(
"messageRPrevStereoCheckerboard: %f \n",
2252 current_disparity, DISP_VALS)]);
2253 printf(
"dataCostStereoCheckerboard: %f \n",
2256 current_disparity, DISP_VALS)]);
2260 printf(
"x_val: %u\n", x_val);
2261 printf(
"y_val: %u\n", y_val);
2262 for (
unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++) {
2263 printf(
"DISP: %u\n", current_disparity);
2264 printf(
"messageUPrevStereoCheckerboard: %f \n",
2267 current_disparity, DISP_VALS)]);
2268 printf(
"messageDPrevStereoCheckerboard: %f \n",
2271 current_disparity, DISP_VALS)]);
2272 printf(
"messageLPrevStereoCheckerboard: %f \n",
2275 current_disparity, DISP_VALS)]);
2276 printf(
"messageRPrevStereoCheckerboard: %f \n",
2279 current_disparity, DISP_VALS)]);
2280 printf(
"dataCostStereoCheckerboard: %f \n",
2283 current_disparity, DISP_VALS)]);
File with namespace for enums, constants, structures, and functions specific to belief propagation pr...
Header file that contains information about the stereo sets used for evaluation of the bp implementat...
Defines functions used in processing belief propagation that are specific to implementation with AVX2...
Defines functions used in processing belief propagation that are specific to implementation with AVX5...
Defines functions used in processing belief propagation that are specific to implementation with NEON...
Declares child class of ParallelParams to store and process parallelization parameters to use in each...
Contains namespace with CPU run defaults and constants.
Define constraints for data type in processing.
Contains general functions for processing using SIMD vector data types on CPU.
Functions for belief propagation processing that are used in both optimized CPU and CUDA implementati...
Contains namespace with utility functions for implementation.
Abstract class for holding and processing parallelization parameters. Child class(es) specific to im...
virtual std::array< unsigned int, 2 > OptParamsForKernel(const std::array< unsigned int, 2 > &kernel_location) const =0
Get optimized parallel parameters for parallel processing kernel for kernel that is indexed as an arr...
Namespace to define global kernel functions for optimized belief propagation processing on the CPU us...
void RunBPIterationUsingCheckerboardUpdates(beliefprop::CheckerboardPart checkerboard_to_update, const beliefprop::BpLevelProperties ¤t_bp_level, const T *data_cost_checkerboard_0, const T *data_cost_checkerboard_1, T *message_u_checkerboard_0, T *message_d_checkerboard_0, T *message_l_checkerboard_0, T *message_r_checkerboard_0, T *message_u_checkerboard_1, T *message_d_checkerboard_1, T *message_l_checkerboard_1, T *message_r_checkerboard_1, float disc_k_bp, unsigned int bp_settings_num_disp_vals, const ParallelParams &opt_cpu_params)
void RunBPIterationUsingCheckerboardUpdatesUseSIMDVectorsAVX512(beliefprop::CheckerboardPart checkerboard_to_update, const beliefprop::BpLevelProperties ¤t_bp_level, const float *data_cost_checkerboard_0, const float *data_cost_checkerboard_1, float *message_u_checkerboard_0, float *message_d_checkerboard_0, float *message_l_checkerboard_0, float *message_r_checkerboard_0, float *message_u_checkerboard_1, float *message_d_checkerboard_1, float *message_l_checkerboard_1, float *message_r_checkerboard_1, float disc_k_bp, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void DtStereoSIMD(U f[DISP_VALS])
void RetrieveOutputDisparityUseSIMDVectorsAVX256(const beliefprop::BpLevelProperties ¤t_bp_level, const float *data_cost_checkerboard_0, const float *data_cost_checkerboard_1, const float *message_u_prev_checkerboard_0, const float *message_d_prev_checkerboard_0, const float *message_l_prev_checkerboard_0, const float *message_r_prev_checkerboard_0, const float *message_u_prev_checkerboard_1, const float *message_d_prev_checkerboard_1, const float *message_l_prev_checkerboard_1, const float *message_r_prev_checkerboard_1, float *disparity_between_images_device, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void RunBPIterationUsingCheckerboardUpdatesUseSIMDVectorsAVX256(beliefprop::CheckerboardPart checkerboard_to_update, const beliefprop::BpLevelProperties ¤t_bp_level, const float *data_cost_checkerboard_0, const float *data_cost_checkerboard_1, float *message_u_checkerboard_0, float *message_d_checkerboard_0, float *message_l_checkerboard_0, float *message_r_checkerboard_0, float *message_u_checkerboard_1, float *message_d_checkerboard_1, float *message_l_checkerboard_1, float *message_r_checkerboard_1, float disc_k_bp, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void MsgStereoSIMD(unsigned int x_val, unsigned int y_val, const beliefprop::BpLevelProperties ¤t_bp_level, const U messages_neighbor_1[DISP_VALS], const U messages_neighbor_2[DISP_VALS], const U messages_neighbor_3[DISP_VALS], const U data_costs[DISP_VALS], T *dst_message_array, const U &disc_k_bp, bool data_aligned)
void InitializeBottomLevelData(const beliefprop::BpLevelProperties ¤t_bp_level, const float *image_1_pixels_device, const float *image_2_pixels_device, T *data_cost_stereo_checkerboard_0, T *data_cost_stereo_checkerboard_1, float lambda_bp, float data_k_bp, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void RetrieveOutputDisparityUseSIMDVectorsNEON(const beliefprop::BpLevelProperties ¤t_bp_level, const float *data_cost_checkerboard_0, const float *data_cost_checkerboard_1, const float *message_u_prev_checkerboard_0, const float *message_d_prev_checkerboard_0, const float *message_l_prev_checkerboard_0, const float *message_r_prev_checkerboard_0, const float *message_u_prev_checkerboard_1, const float *message_d_prev_checkerboard_1, const float *message_l_prev_checkerboard_1, const float *message_r_prev_checkerboard_1, float *disparity_between_images_device, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void RunBPIterationUpdateMsgValsUseSIMDVectors(unsigned int x_val_start_processing, unsigned int y_val, const beliefprop::BpLevelProperties ¤t_bp_level, const U prev_u_message[DISP_VALS], const U prev_d_message[DISP_VALS], const U prev_l_message[DISP_VALS], const U prev_r_message[DISP_VALS], const U data_message[DISP_VALS], T *current_u_message, T *current_d_message, T *current_l_message, T *current_r_message, const U disc_k_bp_vect, bool data_aligned)
void PrintDataAndMessageValsAtPointKernel(unsigned int x_val, unsigned int y_val, const beliefprop::BpLevelProperties ¤t_bp_level, const T *data_cost_checkerboard_0, const T *data_cost_checkerboard_1, const T *message_u_checkerboard_0, const T *message_d_checkerboard_0, const T *message_l_checkerboard_0, const T *message_r_checkerboard_0, const T *message_u_checkerboard_1, const T *message_d_checkerboard_1, const T *message_l_checkerboard_1, const T *message_r_checkerboard_1)
void RunBPIterationUsingCheckerboardUpdatesUseSIMDVectorsProcess(beliefprop::CheckerboardPart checkerboard_to_update, const beliefprop::BpLevelProperties ¤t_bp_level, const T *data_cost_checkerboard_0, const T *data_cost_checkerboard_1, T *message_u_checkerboard_0, T *message_d_checkerboard_0, T *message_l_checkerboard_0, T *message_r_checkerboard_0, T *message_u_checkerboard_1, T *message_d_checkerboard_1, T *message_l_checkerboard_1, T *message_r_checkerboard_1, float disc_k_bp, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void PrintDataAndMessageValsToPointKernel(unsigned int x_val, unsigned int y_val, const beliefprop::BpLevelProperties ¤t_bp_level, const T *data_cost_checkerboard_0, const T *data_cost_checkerboard_1, const T *message_u_checkerboard_0, const T *message_d_checkerboard_0, const T *message_l_checkerboard_0, const T *message_r_checkerboard_0, const T *message_u_checkerboard_1, const T *message_d_checkerboard_1, const T *message_l_checkerboard_1, const T *message_r_checkerboard_1)
void RetrieveOutputDisparityUseSIMDVectors(const beliefprop::BpLevelProperties ¤t_bp_level, const T *data_cost_checkerboard_0, const T *data_cost_checkerboard_1, const T *message_u_prev_checkerboard_0, const T *message_d_prev_checkerboard_0, const T *message_l_prev_checkerboard_0, const T *message_r_prev_checkerboard_0, const T *message_u_prev_checkerboard_1, const T *message_d_prev_checkerboard_1, const T *message_l_prev_checkerboard_1, const T *message_r_prev_checkerboard_1, float *disparity_between_images_device, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void MsgStereoSIMDProcessing(unsigned int x_val, unsigned int y_val, const beliefprop::BpLevelProperties ¤t_bp_level, const U *messages_neighbor_1, const U *messages_neighbor_2, const U *messages_neighbor_3, const U *data_costs, T *dst_message_array, const U &disc_k_bp, bool data_aligned, unsigned int bp_settings_disp_vals)
void InitializeCurrentLevelData(beliefprop::CheckerboardPart checkerboard_part, const beliefprop::BpLevelProperties ¤t_bp_level, const beliefprop::BpLevelProperties &prev_bp_level, const T *data_cost_checkerboard_0, const T *data_cost_checkerboard_1, T *data_cost_current_level, unsigned int offset_num, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void RunBPIterationUsingCheckerboardUpdatesNoPackedInstructions(beliefprop::CheckerboardPart checkerboard_part_update, const beliefprop::BpLevelProperties ¤t_bp_level, const T *data_cost_checkerboard_0, const T *data_cost_checkerboard_1, T *message_u_checkerboard_0, T *message_d_checkerboard_0, T *message_l_checkerboard_0, T *message_r_checkerboard_0, T *message_u_checkerboard_1, T *message_d_checkerboard_1, T *message_l_checkerboard_1, T *message_r_checkerboard_1, float disc_k_bp, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void RunBPIterationUsingCheckerboardUpdatesUseSIMDVectorsNEON(beliefprop::CheckerboardPart checkerboard_to_update, const beliefprop::BpLevelProperties ¤t_bp_level, const float *data_cost_checkerboard_0, const float *data_cost_checkerboard_1, float *message_u_checkerboard_0, float *message_d_checkerboard_0, float *message_l_checkerboard_0, float *message_r_checkerboard_0, float *message_u_checkerboard_1, float *message_d_checkerboard_1, float *message_l_checkerboard_1, float *message_r_checkerboard_1, float disc_k_bp, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void RetrieveOutputDisparityUseSIMDVectorsAVX512(const beliefprop::BpLevelProperties ¤t_bp_level, const float *data_cost_checkerboard_0, const float *data_cost_checkerboard_1, const float *message_u_prev_checkerboard_0, const float *message_d_prev_checkerboard_0, const float *message_l_prev_checkerboard_0, const float *message_r_prev_checkerboard_0, const float *message_u_prev_checkerboard_1, const float *message_d_prev_checkerboard_1, const float *message_l_prev_checkerboard_1, const float *message_r_prev_checkerboard_1, float *disparity_between_images_device, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void InitializeMessageValsToDefaultKernel(const beliefprop::BpLevelProperties ¤t_bp_level, T *message_u_checkerboard_0, T *message_d_checkerboard_0, T *message_l_checkerboard_0, T *message_r_checkerboard_0, T *message_u_checkerboard_1, T *message_d_checkerboard_1, T *message_l_checkerboard_1, T *message_r_checkerboard_1, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void RetrieveOutputDisparity(const beliefprop::BpLevelProperties ¤t_bp_level, const T *data_cost_checkerboard_0, const T *data_cost_checkerboard_1, const T *message_u_prev_checkerboard_0, const T *message_d_prev_checkerboard_0, const T *message_l_prev_checkerboard_0, const T *message_r_prev_checkerboard_0, const T *message_u_prev_checkerboard_1, const T *message_d_prev_checkerboard_1, const T *message_l_prev_checkerboard_1, const T *message_r_prev_checkerboard_1, float *disparity_between_images_device, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
void UpdateBestDispBestVals(T &best_disparities, T &best_vals, const T ¤t_disparity, const T &val_at_disp)
void CopyMsgDataToNextLevel(beliefprop::CheckerboardPart checkerboard_part, const beliefprop::BpLevelProperties ¤t_bp_level, const beliefprop::BpLevelProperties &next_bp_level, const T *message_u_prev_checkerboard_0, const T *message_d_prev_checkerboard_0, const T *message_l_prev_checkerboard_0, const T *message_r_prev_checkerboard_0, const T *message_u_prev_checkerboard_1, const T *message_d_prev_checkerboard_1, const T *message_l_prev_checkerboard_1, const T *message_r_prev_checkerboard_1, const T *message_u_checkerboard_0, const T *message_d_checkerboard_0, const T *message_l_checkerboard_0, const T *message_r_checkerboard_0, const T *message_u_checkerboard_1, const T *message_d_checkerboard_1, const T *message_l_checkerboard_1, const T *message_r_checkerboard_1, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
constexpr bool kOptimizedIndexingSetting
CheckerboardPart
Define the two checkerboard "parts" that the image is divided into.
ARCHITECTURE_ADDITION unsigned int RetrieveIndexInDataAndMessage(unsigned int x_val, unsigned int y_val, unsigned int width, unsigned int height, unsigned int current_disparity, unsigned int total_num_disp_vals, unsigned int offset_data=0u)
Retrieve the current 1-D index value of the given point at the given disparity in the data cost and m...
POD struct to store bp level data. Struct can be passed to global CUDAs kernel so needs to take restr...
unsigned int height_level_
unsigned int width_checkerboard_level_
unsigned int bytes_align_memory_
unsigned int width_level_
unsigned int padded_width_checkerboard_level_