35 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
37 const char *file,
int line,
bool abort)
const
39 const auto code = cudaPeekAtLastError();
40 if (code != cudaSuccess) {
41 std::cout <<
"CUDA ERROR: " << cudaGetErrorString(code) <<
" " << file <<
" " << line << std::endl;
44 cudaDeviceSynchronize();
57 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
63 T* allocated_memory)
const
66 cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
67 const auto kernel_thread_block_dims =
68 this->parallel_params_.OptParamsForKernel(
71 const dim3 threads{kernel_thread_block_dims[0], kernel_thread_block_dims[1]};
79 const bool data_aligned{
80 beliefprop::MemoryAlignedAtDataStart<T>(
85 for (
unsigned int iteration_num = 0; iteration_num < alg_settings.
num_iterations; iteration_num++)
88 ((iteration_num % 2) == 0) ?
91 cudaDeviceSynchronize();
94 if constexpr (DISP_VALS > 0) {
95 beliefprop_cuda::RunBPIterationUsingCheckerboardUpdates<T, DISP_VALS> <<<grid, threads>>> (
97 data_costs_device[0], data_costs_device[1],
109 beliefprop_cuda::RunBPIterationUsingCheckerboardUpdates<T, DISP_VALS> <<<grid, threads>>> (
111 data_costs_device[0], data_costs_device[1],
123 cudaDeviceSynchronize();
135 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
141 unsigned int bp_settings_num_disp_vals)
const
143 const auto kernel_thread_block_dims =
144 this->parallel_params_.OptParamsForKernel(
147 const dim3 threads{kernel_thread_block_dims[0], kernel_thread_block_dims[1]};
152 cudaDeviceSynchronize();
163 beliefprop_cuda::CopyMsgDataToNextLevel<T, DISP_VALS> <<< grid, threads >>> (
181 bp_settings_num_disp_vals);
183 cudaDeviceSynchronize();
192 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
196 const std::array<float*, 2>& images_target_device,
204 cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
208 const auto kernel_thread_block_dims =
209 this->parallel_params_.OptParamsForKernel(
212 const dim3 threads{kernel_thread_block_dims[0], kernel_thread_block_dims[1]};
219 beliefprop_cuda::InitializeBottomLevelData<T, DISP_VALS> <<<grid, threads>>> (
221 images_target_device[0], images_target_device[1],
222 data_costs_device[0], data_costs_device[1],
224 cudaDeviceSynchronize();
234 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
238 unsigned int bp_settings_num_disp_vals)
const
240 const auto kernel_thread_block_dims =
241 this->parallel_params_.OptParamsForKernel(
243 const dim3 threads{kernel_thread_block_dims[0], kernel_thread_block_dims[1]};
250 beliefprop_cuda::InitializeMessageValsToDefaultKernel<T, DISP_VALS> <<< grid, threads >>> (
260 bp_settings_num_disp_vals);
261 cudaDeviceSynchronize();
270 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
275 unsigned int bp_settings_num_disp_vals)
const
277 const auto kernel_thread_block_dims =
278 this->parallel_params_.OptParamsForKernel(
281 const dim3 threads{kernel_thread_block_dims[0], kernel_thread_block_dims[1]};
293 const size_t offset_num{0};
294 for (
const auto& [checkerboard_part, data_costs_write] : {
298 beliefprop_cuda::InitializeCurrentLevelData<T, DISP_VALS> <<<grid, threads>>> (
301 data_costs_device[0], data_costs_device[1],
302 data_costs_write, ((
unsigned int) offset_num /
sizeof(
float)),
303 bp_settings_num_disp_vals);
304 cudaDeviceSynchronize();
312 template<RunData_t T,
unsigned int DISP_VALS, run_environment::AccSetting ACCELERATION>
317 unsigned int bp_settings_num_disp_vals)
const
319 float* result_disp_map_device;
321 (
void**)&result_disp_map_device,
324 const auto kernel_thread_block_dims =
325 this->parallel_params_.OptParamsForKernel(
328 const dim3 threads{kernel_thread_block_dims[0], kernel_thread_block_dims[1]};
334 beliefprop_cuda::RetrieveOutputDisparity<T, DISP_VALS> <<<grid, threads>>> (
336 data_costs_device[0], data_costs_device[1],
345 result_disp_map_device, bp_settings_num_disp_vals);
346 cudaDeviceSynchronize();
351 return result_disp_map_device;
File with namespace for enums, constants, structures, and functions specific to belief propagation pr...
This file defines the methods to perform belief propagation for disparity map estimation from stereo ...
Contains namespace with enums and constants for implementation run evaluation.
Contains namespace with utility functions for implementation.
Class to store and retrieve properties of a bp processing level including a data type specified as a ...
const beliefprop::BpLevelProperties & LevelProperties() const
Return level properties as const reference to avoid copying and not allow it to be modified.
Child class of ProcessBp that define functions used in processing bp in the CUDA implementation.
Namespace for enums, constants, structures, and functions specific to belief propagation processing.
std::array< T, kNumCheckerboardParts > DataCostsCheckerboards
Define alias for two-element array with data costs for each bp processing checkerboard....
CheckerboardPart
Define the two checkerboard "parts" that the image is divided into.
std::array< std::array< T, kNumMessageArrays >, kNumCheckerboardParts > CheckerboardMessages
Define alias for array with message costs for each bp processing checkerboard. Each checkerboard mes...
constexpr std::array< BpStereoSet, 8 > kStereoSetsToProcess
Declare stereo sets to process with name, num disparity values, and scale factor currently conesFullS...
Status
Enum for status to indicate if error or no error.
unsigned int height_level_
unsigned int width_checkerboard_level_
unsigned int bytes_align_memory_
unsigned int width_level_
unsigned int padded_width_checkerboard_level_
Structure to store the belief propagation settings including the number of levels and iterations.
unsigned int num_iterations
unsigned int num_disp_vals
Number of disparity values must be set for each stereo set.
float disc_k_bp
Discontinuity cost cap set to high value by default but is expected to be dependent on number of disp...