Optimized Belief Propagation (CPU and GPU)
KernelBpStereoHalf.cu
Go to the documentation of this file.
1 /*
2 Copyright (C) 2024 Scott Grauer-Gray
3 
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18 
31 //set constexpr unsigned int values for number of disparity values for each stereo set used
32 constexpr unsigned int kDispVals0{beliefprop::kStereoSetsToProcess[0].num_disp_vals};
33 constexpr unsigned int kDispVals1{beliefprop::kStereoSetsToProcess[1].num_disp_vals};
34 constexpr unsigned int kDispVals2{beliefprop::kStereoSetsToProcess[2].num_disp_vals};
35 constexpr unsigned int kDispVals3{beliefprop::kStereoSetsToProcess[3].num_disp_vals};
36 constexpr unsigned int kDispVals4{beliefprop::kStereoSetsToProcess[4].num_disp_vals};
37 constexpr unsigned int kDispVals5{beliefprop::kStereoSetsToProcess[5].num_disp_vals};
38 constexpr unsigned int kDispVals6{beliefprop::kStereoSetsToProcess[6].num_disp_vals};
39 
40 //device function to process messages using half precision with number of disparity values
41 //given in template parameter
42 template <unsigned int DISP_VALS>
43 __device__ inline void MsgStereoHalf(unsigned int x_val, unsigned int y_val,
44  const BpLevel<T>& current_bp_level, half messages_neighbor_1[DISP_VALS],
45  half messages_neighbor_2[DISP_VALS], half messages_neighbor_3[DISP_VALS],
46  half data_costs[DISP_VALS], half* dst_message_array, half disc_k_bp, bool data_aligned)
47 {
48  // aggregate and find min
49  half minimum = beliefprop::kHighValBp<half>;
50  half dst[DISP_VALS];
51 
52  for (unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++) {
53  dst[current_disparity] = messages_neighbor_1[current_disparity] +
54  messages_neighbor_2[current_disparity] +
55  messages_neighbor_3[current_disparity] +
56  data_costs[current_disparity];
57  if (dst[current_disparity] < minimum) {
58  minimum = dst[current_disparity];
59  }
60  }
61 
62  //retrieve the minimum value at each disparity in O(n) time using Felzenszwalb's method (see "Efficient Belief Propagation for Early Vision")
63  DtStereo<half, DISP_VALS>(dst);
64 
65  // truncate
66  minimum += disc_k_bp;
67 
68  // normalize
69  half val_to_normalize = 0;
70 
71  for (unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++)
72  {
73  if (minimum < dst[current_disparity]) {
74  dst[current_disparity] = minimum;
75  }
76  val_to_normalize += dst[current_disparity];
77  }
78 
79  //if val_to_normalize is infinite or NaN (observed when using more than 5 computation levels with half-precision),
80  //set destination vector to 0 for all disparities
81  //note that may cause results to differ a little from ideal
82  if (__hisnan(val_to_normalize) || ((__hisinf(val_to_normalize)) != 0)) {
83  unsigned int dest_message_array_index = beliefprop::RetrieveIndexInDataAndMessage(x_val, y_val,
85  current_bp_level.LevelProperties().height_level_, 0,
86  DISP_VALS);
87 
88  for (unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++) {
89  dst_message_array[dest_message_array_index] = (half) 0.0;
91  dest_message_array_index += current_bp_level.LevelProperties().padded_width_checkerboard_level_;
92  }
93  else {
94  dest_message_array_index++;
95  }
96  }
97  }
98  else
99  {
100  val_to_normalize /= DISP_VALS;
101 
102  unsigned int dest_message_array_index = beliefprop::RetrieveIndexInDataAndMessage(x_val, y_val,
104  current_bp_level.LevelProperties().height_level_, 0,
105  DISP_VALS);
106 
107  for (unsigned int current_disparity = 0; current_disparity < DISP_VALS; current_disparity++)
108  {
109  dst[current_disparity] -= val_to_normalize;
110  dst_message_array[dest_message_array_index] = dst[current_disparity];
112  dest_message_array_index += current_bp_level.LevelProperties().padded_width_checkerboard_level_;
113  }
114  else {
115  dest_message_array_index++;
116  }
117  }
118  }
119 }
120 
121 //template BP message processing when number of disparity values is given
122 //as an input parameter and not as a template
123 template <beliefprop::MessageComp M>
124 __device__ inline void MsgStereoHalf(unsigned int x_val, unsigned int y_val,
125  const BpLevel<T>& current_bp_level,
126  half* prev_u_messageArray, half* prev_d_messageArray,
127  half* prev_l_messageArray, half* prev_r_messageArray,
128  half* data_message_array, half* dst_message_array,
129  half disc_k_bp, bool data_aligned, unsigned int bp_settings_disp_vals,
130  half* dst_processing, unsigned int checkerboard_adjustment,
131  unsigned int offset_data)
132 {
133  // aggregate and find min
134  half minimum{beliefprop::kHighValBp<half>};
135  unsigned int proc_array_idx_disp_0 = beliefprop::RetrieveIndexInDataAndMessage(x_val, y_val,
137  current_bp_level.LevelProperties().height_level_, 0,
138  bp_settings_disp_vals);
139  unsigned int proc_array_idx{proc_array_idx_disp_0};
140 
141  for (unsigned int current_disparity = 0; current_disparity < bp_settings_disp_vals; current_disparity++)
142  {
143  //set initial dst processing array value corresponding to disparity for M message type
144  beliefprop::SetInitDstProcessing<half, half, M>(x_val, y_val, current_bp_level, prev_u_messageArray, prev_d_messageArray,
145  prev_l_messageArray, prev_r_messageArray, data_message_array, dst_message_array,
146  disc_k_bp, data_aligned, bp_settings_disp_vals, dst_processing, checkerboard_adjustment,
147  offset_data, current_disparity, proc_array_idx);
148 
149  if (dst_processing[proc_array_idx] < minimum)
150  minimum = dst_processing[proc_array_idx];
151 
153  proc_array_idx += current_bp_level.LevelProperties().padded_width_checkerboard_level_;
154  }
155  else {
156  proc_array_idx++;
157  }
158  }
159 
160  //retrieve the minimum value at each disparity in O(n) time using Felzenszwalb's method
161  //(see "Efficient Belief Propagation for Early Vision")
162  DtStereo<half>(dst_processing, bp_settings_disp_vals, x_val, y_val, current_bp_level);
163 
164  // truncate
165  minimum += disc_k_bp;
166 
167  // normalize
168  half val_to_normalize{(half)0.0};
169 
170  proc_array_idx = proc_array_idx_disp_0;
171  for (unsigned int current_disparity = 0; current_disparity < bp_settings_disp_vals; current_disparity++) {
172  if (minimum < dst_processing[proc_array_idx]) {
173  dst_processing[proc_array_idx] = minimum;
174  }
175 
176  val_to_normalize += dst_processing[proc_array_idx];
177 
179  proc_array_idx += current_bp_level.LevelProperties().padded_width_checkerboard_level_;
180  }
181  else {
182  proc_array_idx++;
183  }
184  }
185 
186  //if val_to_normalize is infinite or NaN (observed when using more than 5 computation levels with half-precision),
187  //set destination vector to 0 for all disparities
188  //note that may cause results to differ a little from ideal
189  if (__hisnan(val_to_normalize) || ((__hisinf(val_to_normalize)) != 0)) {
190  //dst processing index and message array index are the same for each disparity value in this processing
191  proc_array_idx = proc_array_idx_disp_0;
192 
193  for (unsigned int current_disparity = 0; current_disparity < bp_settings_disp_vals; current_disparity++) {
194  dst_message_array[proc_array_idx] = (half)0.0;
196  proc_array_idx += current_bp_level.LevelProperties().padded_width_checkerboard_level_;
197  }
198  else {
199  proc_array_idx++;
200  }
201  }
202  }
203  else
204  {
205  val_to_normalize /= ((half)bp_settings_disp_vals);
206 
207  //dst processing index and message array index are the same for each disparity value in this processing
208  proc_array_idx = proc_array_idx_disp_0;
209 
210  for (unsigned int current_disparity = 0; current_disparity < bp_settings_disp_vals; current_disparity++) {
211  dst_processing[proc_array_idx] -= val_to_normalize;
212  dst_message_array[proc_array_idx] = ConvertValToDifferentDataTypeIfNeeded<half, half>(dst_processing[proc_array_idx]);
214  proc_array_idx += current_bp_level.LevelProperties().padded_width_checkerboard_level_;
215  }
216  else {
217  proc_array_idx++;
218  }
219  }
220  }
221 }
222 
223 template<>
224 __device__ inline void MsgStereo<half, half, beliefprop::MessageComp::kUMessage>(
225  unsigned int x_val, unsigned int y_val,
226  const BpLevel<T>& current_bp_level,
227  half* prev_u_messageArray, half* prev_d_messageArray,
228  half* prev_l_messageArray, half* prev_r_messageArray,
229  half* data_message_array, half* dst_message_array,
230  half disc_k_bp, bool data_aligned, unsigned int bp_settings_disp_vals,
231  half* dst_processing, unsigned int checkerboard_adjustment,
232  unsigned int offset_data)
233 {
234  MsgStereoHalf<beliefprop::MessageComp::kUMessage>(x_val, y_val, current_bp_level, prev_u_messageArray, prev_d_messageArray,
235  prev_l_messageArray, prev_r_messageArray, data_message_array, dst_message_array, disc_k_bp, data_aligned, bp_settings_disp_vals,
236  dst_processing, checkerboard_adjustment, offset_data);
237 }
238 
239 template<>
240 __device__ inline void MsgStereo<half, half, beliefprop::MessageComp::kDMessage>(
241  unsigned int x_val, unsigned int y_val,
242  const BpLevel<T>& current_bp_level,
243  half* prev_u_messageArray, half* prev_d_messageArray,
244  half* prev_l_messageArray, half* prev_r_messageArray,
245  half* data_message_array, half* dst_message_array,
246  half disc_k_bp, bool data_aligned, unsigned int bp_settings_disp_vals,
247  half* dst_processing, unsigned int checkerboard_adjustment,
248  unsigned int offset_data)
249 {
250  MsgStereoHalf<beliefprop::MessageComp::kDMessage>(x_val, y_val, current_bp_level, prev_u_messageArray, prev_d_messageArray,
251  prev_l_messageArray, prev_r_messageArray, data_message_array, dst_message_array, disc_k_bp, data_aligned, bp_settings_disp_vals,
252  dst_processing, checkerboard_adjustment, offset_data);
253 }
254 
255 template<>
256 __device__ inline void MsgStereo<half, half, beliefprop::MessageComp::kLMessage>(
257  unsigned int x_val, unsigned int y_val,
258  const BpLevel<T>& current_bp_level,
259  half* prev_u_messageArray, half* prev_d_messageArray,
260  half* prev_l_messageArray, half* prev_r_messageArray,
261  half* data_message_array, half* dst_message_array,
262  half disc_k_bp, bool data_aligned, unsigned int bp_settings_disp_vals,
263  half* dst_processing, unsigned int checkerboard_adjustment,
264  unsigned int offset_data)
265 {
266  MsgStereoHalf<beliefprop::MessageComp::kLMessage>(x_val, y_val, current_bp_level, prev_u_messageArray, prev_d_messageArray,
267  prev_l_messageArray, prev_r_messageArray, data_message_array, dst_message_array, disc_k_bp, data_aligned, bp_settings_disp_vals,
268  dst_processing, checkerboard_adjustment, offset_data);
269 }
270 
271 template<>
272 __device__ inline void MsgStereo<half, half, beliefprop::MessageComp::kRMessage>(
273  unsigned int x_val, unsigned int y_val,
274  const BpLevel<T>& current_bp_level,
275  half* prev_u_messageArray, half* prev_d_messageArray,
276  half* prev_l_messageArray, half* prev_r_messageArray,
277  half* data_message_array, half* dst_message_array,
278  half disc_k_bp, bool data_aligned, unsigned int bp_settings_disp_vals,
279  half* dst_processing, unsigned int checkerboard_adjustment,
280  unsigned int offset_data)
281 {
282  MsgStereoHalf<beliefprop::MessageComp::kRMessage>(x_val, y_val, current_bp_level, prev_u_messageArray, prev_d_messageArray,
283  prev_l_messageArray, prev_r_messageArray, data_message_array, dst_message_array, disc_k_bp, data_aligned, bp_settings_disp_vals,
284  dst_processing, checkerboard_adjustment, offset_data);
285 }
286 
287 template<>
288 __device__ inline void MsgStereo<half, half, kDispVals0>(
289  unsigned int x_val, unsigned int y_val,
290  const BpLevel<T>& current_bp_level, half messages_neighbor_1[kDispVals0],
291  half messages_neighbor_2[kDispVals0], half messages_neighbor_3[kDispVals0],
292  half data_costs[kDispVals0], half* dst_message_array, half disc_k_bp, bool data_aligned)
293 {
294  MsgStereoHalf<kDispVals0>(x_val, y_val, current_bp_level, messages_neighbor_1, messages_neighbor_2, messages_neighbor_3,
295  data_costs, dst_message_array, disc_k_bp, data_aligned);
296 }
297 
298 template<>
299 __device__ inline void MsgStereo<half, half, kDispVals1>(
300  unsigned int x_val, unsigned int y_val,
301  const BpLevel<T>& current_bp_level, half messages_neighbor_1[kDispVals1],
302  half messages_neighbor_2[kDispVals1], half messages_neighbor_3[kDispVals1],
303  half data_costs[kDispVals1], half* dst_message_array, half disc_k_bp, bool data_aligned)
304 {
305  MsgStereoHalf<kDispVals1>(x_val, y_val, current_bp_level, messages_neighbor_1, messages_neighbor_2, messages_neighbor_3,
306  data_costs, dst_message_array, disc_k_bp, data_aligned);
307 }
308 
309 template<>
310 __device__ inline void MsgStereo<half, half, kDispVals2>(
311  unsigned int x_val, unsigned int y_val,
312  const BpLevel<T>& current_bp_level, half messages_neighbor_1[kDispVals2],
313  half messages_neighbor_2[kDispVals2], half messages_neighbor_3[kDispVals2],
314  half data_costs[kDispVals2], half* dst_message_array, half disc_k_bp, bool data_aligned)
315 {
316  MsgStereoHalf<kDispVals2>(x_val, y_val, current_bp_level, messages_neighbor_1, messages_neighbor_2, messages_neighbor_3,
317  data_costs, dst_message_array, disc_k_bp, data_aligned);
318 }
319 
320 template<>
321 __device__ inline void MsgStereo<half, half, kDispVals3>(
322  unsigned int x_val, unsigned int y_val,
323  const BpLevel<T>& current_bp_level, half messages_neighbor_1[kDispVals3],
324  half messages_neighbor_2[kDispVals3], half messages_neighbor_3[kDispVals3],
325  half data_costs[kDispVals3], half* dst_message_array, half disc_k_bp, bool data_aligned)
326 {
327  MsgStereoHalf<kDispVals3>(x_val, y_val, current_bp_level, messages_neighbor_1, messages_neighbor_2, messages_neighbor_3,
328  data_costs, dst_message_array, disc_k_bp, data_aligned);
329 }
330 
331 template<>
332 __device__ inline void MsgStereo<half, half, kDispVals4>(
333  unsigned int x_val, unsigned int y_val,
334  const BpLevel<T>& current_bp_level, half messages_neighbor_1[kDispVals4],
335  half messages_neighbor_2[kDispVals4], half messages_neighbor_3[kDispVals4],
336  half data_costs[kDispVals4], half* dst_message_array, half disc_k_bp, bool data_aligned)
337 {
338  MsgStereoHalf<kDispVals4>(x_val, y_val, current_bp_level, messages_neighbor_1, messages_neighbor_2, messages_neighbor_3,
339  data_costs, dst_message_array, disc_k_bp, data_aligned);
340 }
341 
342 template<>
343 __device__ inline void MsgStereo<half, half, kDispVals5>(
344  unsigned int x_val, unsigned int y_val,
345  const BpLevel<T>& current_bp_level, half messages_neighbor_1[kDispVals5],
346  half messages_neighbor_2[kDispVals5], half messages_neighbor_3[kDispVals5],
347  half data_costs[kDispVals5], half* dst_message_array, half disc_k_bp, bool data_aligned)
348 {
349  MsgStereoHalf<kDispVals5>(x_val, y_val, current_bp_level, messages_neighbor_1, messages_neighbor_2, messages_neighbor_3,
350  data_costs, dst_message_array, disc_k_bp, data_aligned);
351 }
352 
353 template<>
354 __device__ inline void MsgStereo<half, half, kDispVals6>(
355  unsigned int x_val, unsigned int y_val,
356  const BpLevel<T>& current_bp_level, half messages_neighbor_1[kDispVals6],
357  half messages_neighbor_2[kDispVals6], half messages_neighbor_3[kDispVals6],
358  half data_costs[kDispVals6], half* dst_message_array, half disc_k_bp, bool data_aligned)
359 {
360  MsgStereoHalf<kDispVals6>(x_val, y_val, current_bp_level, messages_neighbor_1, messages_neighbor_2, messages_neighbor_3,
361  data_costs, dst_message_array, disc_k_bp, data_aligned);
362 }
constexpr unsigned int kDispVals6
__device__ void MsgStereo< half, half, kDispVals2 >(unsigned int x_val, unsigned int y_val, const BpLevel< T > &current_bp_level, half messages_neighbor_1[kDispVals2], half messages_neighbor_2[kDispVals2], half messages_neighbor_3[kDispVals2], half data_costs[kDispVals2], half *dst_message_array, half disc_k_bp, bool data_aligned)
__device__ void MsgStereoHalf(unsigned int x_val, unsigned int y_val, const BpLevel< T > &current_bp_level, half messages_neighbor_1[DISP_VALS], half messages_neighbor_2[DISP_VALS], half messages_neighbor_3[DISP_VALS], half data_costs[DISP_VALS], half *dst_message_array, half disc_k_bp, bool data_aligned)
__device__ void MsgStereo< half, half, kDispVals4 >(unsigned int x_val, unsigned int y_val, const BpLevel< T > &current_bp_level, half messages_neighbor_1[kDispVals4], half messages_neighbor_2[kDispVals4], half messages_neighbor_3[kDispVals4], half data_costs[kDispVals4], half *dst_message_array, half disc_k_bp, bool data_aligned)
__device__ void MsgStereo< half, half, kDispVals6 >(unsigned int x_val, unsigned int y_val, const BpLevel< T > &current_bp_level, half messages_neighbor_1[kDispVals6], half messages_neighbor_2[kDispVals6], half messages_neighbor_3[kDispVals6], half data_costs[kDispVals6], half *dst_message_array, half disc_k_bp, bool data_aligned)
constexpr unsigned int kDispVals2
constexpr unsigned int kDispVals0
constexpr unsigned int kDispVals1
__device__ void MsgStereo< half, half, kDispVals5 >(unsigned int x_val, unsigned int y_val, const BpLevel< T > &current_bp_level, half messages_neighbor_1[kDispVals5], half messages_neighbor_2[kDispVals5], half messages_neighbor_3[kDispVals5], half data_costs[kDispVals5], half *dst_message_array, half disc_k_bp, bool data_aligned)
constexpr unsigned int kDispVals4
__device__ void MsgStereo< half, half, kDispVals3 >(unsigned int x_val, unsigned int y_val, const BpLevel< T > &current_bp_level, half messages_neighbor_1[kDispVals3], half messages_neighbor_2[kDispVals3], half messages_neighbor_3[kDispVals3], half data_costs[kDispVals3], half *dst_message_array, half disc_k_bp, bool data_aligned)
constexpr unsigned int kDispVals3
__device__ void MsgStereo< half, half, kDispVals0 >(unsigned int x_val, unsigned int y_val, const BpLevel< T > &current_bp_level, half messages_neighbor_1[kDispVals0], half messages_neighbor_2[kDispVals0], half messages_neighbor_3[kDispVals0], half data_costs[kDispVals0], half *dst_message_array, half disc_k_bp, bool data_aligned)
__device__ void MsgStereo< half, half, kDispVals1 >(unsigned int x_val, unsigned int y_val, const BpLevel< T > &current_bp_level, half messages_neighbor_1[kDispVals1], half messages_neighbor_2[kDispVals1], half messages_neighbor_3[kDispVals1], half data_costs[kDispVals1], half *dst_message_array, half disc_k_bp, bool data_aligned)
constexpr unsigned int kDispVals5
Class to store and retrieve properties of a bp processing level including a data type specified as a ...
Definition: BpLevel.h:60
const beliefprop::BpLevelProperties & LevelProperties() const
Return level properties as const reference to avoid copying and not allow it to be modified.
Definition: BpLevel.h:165
constexpr bool kOptimizedIndexingSetting
Definition: BpRunUtils.h:114
ARCHITECTURE_ADDITION unsigned int RetrieveIndexInDataAndMessage(unsigned int x_val, unsigned int y_val, unsigned int width, unsigned int height, unsigned int current_disparity, unsigned int total_num_disp_vals, unsigned int offset_data=0u)
Retrieve the current 1-D index value of the given point at the given disparity in the data cost and m...
constexpr std::array< BpStereoSet, 8 > kStereoSetsToProcess
Declare stereo sets to process with name, num disparity values, and scale factor currently conesFullS...
unsigned int height_level_
Definition: BpLevel.h:44
unsigned int padded_width_checkerboard_level_
Definition: BpLevel.h:47