34 #include <unordered_map>
55 using timingType = std::chrono::time_point<std::chrono::system_clock>;
66 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
74 const char *file =
"",
76 bool abort =
false)
const
95 std::optional<std::pair<float*, DetailedTimings<beliefprop::Runtime_Type>>>
operator()(
96 const std::array<float*, 2>& images_target_device,
98 const std::array<unsigned int, 2>& width_height_images,
99 T* allocated_mem_bp_processing, T* allocated_memory,
118 const std::array<float*, 2>& images_target_device,
136 unsigned int bp_settings_num_disp_vals)
const = 0;
149 unsigned int bp_settings_num_disp_vals)
const = 0;
166 T* allocated_memory)
const = 0;
183 unsigned int bp_settings_num_disp_vals)
const = 0;
195 virtual float* RetrieveOutputDisparity(
199 unsigned int bp_settings_num_disp_vals)
const = 0;
207 virtual void FreeCheckerboardMessagesMemory(
211 std::ranges::for_each(checkerboard_messages_to_free,
212 [&mem_management_bp_run](
auto& checkerboard_messages_set) {
213 std::ranges::for_each(checkerboard_messages_set,
214 [&mem_management_bp_run](
auto& checkerboard_messages) {
215 mem_management_bp_run->FreeAlignedMemoryOnDevice(checkerboard_messages);
229 std::size_t num_data_allocate_per_message,
233 std::ranges::for_each(
234 output_checkerboard_messages,
235 [
this, num_data_allocate_per_message, &mem_management_bp_run](
236 auto& checkerboard_messages_set)
238 std::ranges::for_each(checkerboard_messages_set,
239 [
this, num_data_allocate_per_message, &mem_management_bp_run](
240 auto& checkerboard_messages)
242 checkerboard_messages =
243 mem_management_bp_run->AllocateAlignedMemoryOnDevice(
244 num_data_allocate_per_message,
250 return output_checkerboard_messages;
264 std::size_t offset_into_messages)
const
267 for (
const auto checkerboard_num :
271 for (
unsigned int i = 0;
272 i < output_checkerboard_messages[checkerboard_num].size();
275 output_checkerboard_messages[checkerboard_num][i] =
276 &((all_checkerboard_messages[checkerboard_num][i])[offset_into_messages]);
280 return output_checkerboard_messages;
289 virtual void FreeDataCostsMemory(
293 mem_management_bp_run->FreeAlignedMemoryOnDevice(data_costs_to_free[0]);
294 mem_management_bp_run->FreeAlignedMemoryOnDevice(data_costs_to_free[1]);
306 std::size_t num_data_costs_checkerboards,
309 return {mem_management_bp_run->AllocateAlignedMemoryOnDevice(
310 num_data_costs_checkerboards,
312 mem_management_bp_run->AllocateAlignedMemoryOnDevice(
313 num_data_costs_checkerboards,
327 AllocateAndOrganizeDataCostsAndMessageDataAllLevels(
328 std::size_t num_data_allocate_per_data_costs_message_data_array,
331 T* data_all_levels = mem_management_bp_run->AllocateAlignedMemoryOnDevice(
332 10*num_data_allocate_per_data_costs_message_data_array, ACCELERATION);
333 return OrganizeDataCostsAndMessageDataAllLevels(
334 data_all_levels, num_data_allocate_per_data_costs_message_data_array);
346 OrganizeDataCostsAndMessageDataAllLevels(
347 T* data_all_levels, std::size_t num_data_allocate_per_data_costs_message_data_array)
const
350 data_costs_device_checkerboard_all_levels[0] = data_all_levels;
351 data_costs_device_checkerboard_all_levels[1] =
352 &(data_costs_device_checkerboard_all_levels[0][1 * (num_data_allocate_per_data_costs_message_data_array)]);
355 for (
const auto checkerboard_num :
359 for (
unsigned int i = 0;
360 i < messages_device_all_levels[checkerboard_num].size();
363 messages_device_all_levels[checkerboard_num][i] =
364 &(data_costs_device_checkerboard_all_levels[0][
365 ((checkerboard_num * messages_device_all_levels[checkerboard_num].size()) + (i + 2)) *
366 (num_data_allocate_per_data_costs_message_data_array)]);
370 return {data_costs_device_checkerboard_all_levels, messages_device_all_levels};
380 virtual void FreeDataCostsAllDataInSingleArray(
384 mem_management_bp_run->FreeAlignedMemoryOnDevice(data_costs_to_free[0]);
397 std::size_t offset)
const
399 return {&(all_data_costs[0][offset]),
400 &(all_data_costs[1][offset])};
408 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
409 std::optional<std::pair<float*, DetailedTimings<beliefprop::Runtime_Type>>>
411 const std::array<float*, 2> & images_target_device,
413 const std::array<unsigned int, 2>& width_height_images,
414 T* allocated_mem_bp_processing,
420 std::unordered_map<beliefprop::Runtime_Type, std::array<timingType, 2>> start_end_times;
421 std::vector<std::array<timingType, 2>> data_costs_timings(alg_settings.
num_levels);
422 std::vector<std::array<timingType, 2>> bp_timings(alg_settings.
num_levels);
423 std::vector<std::array<timingType, 2>> data_copy_timings(alg_settings.
num_levels);
424 std::chrono::duration<double> total_time_bp_iters{0};
425 std::chrono::duration<double> total_time_copy_data{0};
426 std::chrono::duration<double> total_time_copy_data_kernel{0};
430 std::vector<BpLevel<T>> bp_levels;
435 bp_levels.push_back(
BpLevel<T>(width_height_images, 0, 0, ACCELERATION));
439 for (
unsigned int level_num = 1;
445 bp_levels[level_num-1].NextBpLevel(alg_settings.
num_disp_vals));
449 std::chrono::system_clock::now();
461 const std::size_t data_all_levels_each_data_message_arr =
462 bp_levels[alg_settings.
num_levels-1].LevelProperties().offset_into_arrays_ +
468 std::tie(data_costs_device_all_levels, messages_device_all_levels) =
469 OrganizeDataCostsAndMessageDataAllLevels(
470 allocated_mem_bp_processing, data_all_levels_each_data_message_arr);
475 std::tie(data_costs_device_all_levels, messages_device_all_levels) =
476 AllocateAndOrganizeDataCostsAndMessageDataAllLevels(
477 data_all_levels_each_data_message_arr, mem_management_bp_run);
481 data_costs_device_all_levels =
482 AllocateMemoryForDataCosts(
483 data_all_levels_each_data_message_arr,
484 mem_management_bp_run);
487 auto curr_time = std::chrono::system_clock::now();
489 data_costs_timings[0][0] = curr_time;
496 images_target_device,
497 data_costs_device_all_levels);
500 curr_time = std::chrono::system_clock::now();
501 data_costs_timings[0][1] = curr_time;
505 for (
unsigned int level_num = 1u; level_num < alg_settings.
num_levels; level_num++)
507 data_costs_timings[level_num][0] = std::chrono::system_clock::now();
509 InitializeDataCurrentLevel(
510 bp_levels[level_num],
511 bp_levels[level_num - 1],
512 RetrieveLevelDataCosts(
513 data_costs_device_all_levels,
514 bp_levels[level_num - 1u].LevelProperties().offset_into_arrays_),
515 RetrieveLevelDataCosts(
516 data_costs_device_all_levels,
517 bp_levels[level_num].LevelProperties().offset_into_arrays_),
521 data_costs_timings[level_num][1] = std::chrono::system_clock::now();
524 curr_time = data_costs_timings[alg_settings.
num_levels-1][1];
530 RetrieveLevelDataCosts(
531 data_costs_device_all_levels,
532 bp_levels[alg_settings.
num_levels - 1u].LevelProperties().offset_into_arrays_);
538 std::array<beliefprop::CheckerboardMessages<T*>, 2> messages_curr_next_level;
542 unsigned int current_level_messages_idx{0};
543 unsigned int next_level_messages_idx{(current_level_messages_idx + 1) % 2};
547 messages_curr_next_level[current_level_messages_idx] =
548 RetrieveLevelMessageData(
549 messages_device_all_levels,
550 bp_levels[alg_settings.
num_levels - 1u].LevelProperties().offset_into_arrays_);
555 messages_curr_next_level[current_level_messages_idx] =
556 AllocateMemoryForCheckerboardMessages(
558 mem_management_bp_run);
562 std::chrono::system_clock::now();
566 InitializeMessageValsToDefault(
568 messages_curr_next_level[current_level_messages_idx],
572 curr_time = std::chrono::system_clock::now();
580 for (
int level_num = (
int)alg_settings.
num_levels - 1;
584 const auto bp_iter_start_time = std::chrono::system_clock::now();
592 bp_levels[(
unsigned int)level_num],
593 data_costs_device_current_level,
594 messages_curr_next_level[current_level_messages_idx],
599 const auto bp_iter_end_time = std::chrono::system_clock::now();
600 total_time_bp_iters += bp_iter_end_time - bp_iter_start_time;
601 const auto copy_message_values_start_time = std::chrono::system_clock::now();
608 data_costs_device_current_level =
609 RetrieveLevelDataCosts(
610 data_costs_device_all_levels,
611 bp_levels[level_num - 1].LevelProperties().offset_into_arrays_);
615 messages_curr_next_level[next_level_messages_idx] =
616 RetrieveLevelMessageData(
617 messages_device_all_levels,
618 bp_levels[level_num - 1].LevelProperties().offset_into_arrays_);
622 messages_curr_next_level[next_level_messages_idx] =
623 AllocateMemoryForCheckerboardMessages(
624 bp_levels[level_num - 1].NumDataInBpArrays(alg_settings.
num_disp_vals));
627 const auto copy_message_values_kernel_start_time = std::chrono::system_clock::now();
633 CopyMessageValuesToNextLevelDown(
634 bp_levels[level_num],
635 bp_levels[level_num - 1],
636 messages_curr_next_level[current_level_messages_idx],
637 messages_curr_next_level[next_level_messages_idx],
642 const auto copy_message_values_kernel_end_time = std::chrono::system_clock::now();
643 total_time_copy_data_kernel +=
644 copy_message_values_kernel_end_time - copy_message_values_kernel_start_time;
645 data_copy_timings[level_num][0] = copy_message_values_kernel_start_time;
646 data_copy_timings[level_num][1] = copy_message_values_kernel_end_time;
650 FreeCheckerboardMessagesMemory(
651 messages_curr_next_level[current_level_messages_idx],
652 mem_management_bp_run);
659 current_level_messages_idx = (current_level_messages_idx + 1) % 2;
660 next_level_messages_idx = (current_level_messages_idx + 1) % 2;
664 const auto copy_message_values_end_time = std::chrono::system_clock::now();
665 total_time_copy_data +=
666 copy_message_values_end_time - copy_message_values_start_time;
667 bp_timings[level_num][0] = bp_iter_start_time;
668 bp_timings[level_num][1] = bp_iter_end_time;
672 std::chrono::system_clock::now();
675 float* result_disp_map_device =
678 data_costs_device_current_level,
679 messages_curr_next_level[current_level_messages_idx],
681 if (result_disp_map_device ==
nullptr) {
return {}; }
683 curr_time = std::chrono::system_clock::now();
694 FreeDataCostsAllDataInSingleArray(
695 data_costs_device_all_levels,
696 mem_management_bp_run);
702 FreeCheckerboardMessagesMemory(
703 messages_curr_next_level[current_level_messages_idx],
704 mem_management_bp_run);
708 data_costs_device_all_levels,
709 mem_management_bp_run);
713 std::chrono::system_clock::now();
720 if (bp_timings.size() > 1) {
725 if (bp_timings.size() > 2) {
730 if (bp_timings.size() > 3) {
735 if (bp_timings.size() > 4) {
740 if (bp_timings.size() > 5) {
745 if (bp_timings.size() > 6) {
750 if (bp_timings.size() > 7) {
755 if (bp_timings.size() > 8) {
760 if (bp_timings.size() > 9) {
768 std::ranges::for_each(start_end_times,
769 [&segment_timings](
const auto& current_runtime_name_timing) {
771 current_runtime_name_timing.first,
772 current_runtime_name_timing.second[1] - current_runtime_name_timing.second[0]);
780 const auto total_time =
785 total_time_bp_iters +
786 total_time_copy_data +
792 return std::pair<float*, DetailedTimings<beliefprop::Runtime_Type>>{
793 result_disp_map_device,
File with namespace for enums, constants, structures, and functions specific to belief propagation pr...
Header file that contains information about the stereo sets used for evaluation of the bp implementat...
Declares class to store and retrieve properties of a bp processing level.
File with namespace for enums, constants, structures, and functions specific to belief propagation pr...
Declares structure to store the belief propagation settings including the number of levels and iterat...
Constants for timing belief propagation implementation.
Declares class to store timings of one or more segments taken during the run(s) of an implementation ...
Declares class for memory management with functions defined for standard memory allocation using CPU ...
Declares child class of ParallelParams to store and process parallelization parameters to use in each...
std::chrono::time_point< std::chrono::system_clock > timingType
Alias for time point for start and end time for each timing segment.
Contains namespace with enums and constants for implementation run evaluation.
Declares and defines structure that stores settings for current implementation run as well as functio...
Define constraints for data type in processing.
Class to store and retrieve properties of a bp processing level including a data type specified as a ...
Class to store timings of one or more segments taken during the run(s) of an implementation or across...
std::chrono::duration< double > MedianTiming(const T run_segment_index) const
Get median timing for a specified segment that may have been run multiple times.
void AddTiming(const T timing_segment, const std::chrono::duration< double > &segment_time)
Add timing by segment index.
Class for memory management with functions defined for standard memory allocation using CPU....
Abstract class for holding and processing parallelization parameters. Child class(es) specific to im...
Abstract class to run belief propagation on target device. Some of the class functions need to be ove...
std::optional< std::pair< float *, DetailedTimings< beliefprop::Runtime_Type > > > operator()(const std::array< float *, 2 > &images_target_device, const beliefprop::BpSettings &alg_settings, const std::array< unsigned int, 2 > &width_height_images, T *allocated_mem_bp_processing, T *allocated_memory, const std::unique_ptr< MemoryManagement< T >> &mem_management_bp_run) const
Run belief propagation implementation with on a set of stereo images to generate a disparity map....
ProcessBp(const ParallelParams ¶llel_params)
const ParallelParams & parallel_params_
virtual run_eval::Status ErrorCheck(const char *file="", int line=0, bool abort=false) const
void RetrieveOutputDisparity(const beliefprop::BpLevelProperties ¤t_bp_level, const T *data_cost_checkerboard_0, const T *data_cost_checkerboard_1, const T *message_u_prev_checkerboard_0, const T *message_d_prev_checkerboard_0, const T *message_l_prev_checkerboard_0, const T *message_r_prev_checkerboard_0, const T *message_u_prev_checkerboard_1, const T *message_d_prev_checkerboard_1, const T *message_l_prev_checkerboard_1, const T *message_r_prev_checkerboard_1, float *disparity_between_images_device, unsigned int bp_settings_disp_vals, const ParallelParams &opt_cpu_params)
constexpr bool kAllocateFreeBpMemoryOutsideRuns
std::array< T, kNumCheckerboardParts > DataCostsCheckerboards
Define alias for two-element array with data costs for each bp processing checkerboard....
const std::unordered_map< Runtime_Type, std::string_view > kTimingNames
Mapping of runtime segment enum to header describing timing of the segment.
constexpr bool kUseOptMemManagement
std::array< std::array< T, kNumMessageArrays >, kNumCheckerboardParts > CheckerboardMessages
Define alias for array with message costs for each bp processing checkerboard. Each checkerboard mes...
Status
Enum for status to indicate if error or no error.
Structure to store the belief propagation settings including the number of levels and iterations.
unsigned int num_disp_vals
Number of disparity values must be set for each stereo set.