Optimized Belief Propagation (CPU and GPU)
ParallelParamsBp.cpp
Go to the documentation of this file.
1 /*
2 Copyright (C) 2024 Scott Grauer-Gray
3 
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18 
27 #include <ranges>
30 #include "ParallelParamsBp.h"
31 
32 //constructor to set parallel parameters with default dimensions for each kernel
34  run_environment::OptParallelParamsSetting opt_parallel_params_setting,
35  unsigned int num_levels,
36  const std::array<unsigned int, 2>& default_parallel_dims) :
37  opt_parallel_params_setting_{opt_parallel_params_setting},
38  num_levels_{num_levels}
39 {
40  SetParallelDims(default_parallel_dims);
41  //set up mapping of parallel parameters to runtime for each kernel at each
42  //level and total runtime
43  for (unsigned int i=0; i < beliefprop::kNumKernels; i++) {
44  //set to vector length for each kernel to corresponding vector length of
45  //kernel in parallel_params.parallel_dims_each_kernel_
46  p_params_to_run_time_each_kernel_[i] =
47  std::vector<std::map<std::array<unsigned int, 2>, double>>(
48  parallel_dims_each_kernel_[i].size());
49  }
50  p_params_to_run_time_each_kernel_[beliefprop::kNumKernels] =
51  std::vector<std::map<std::array<unsigned int, 2>, double>>(1);
52 }
53 
54 //set parallel parameters for each kernel to the same input dimensions
56  const std::array<unsigned int, 2>& parallel_dims)
57 {
58  parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kBlurImages)] =
59  {parallel_dims};
60  parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kDataCostsAtLevel)] =
61  std::vector<std::array<unsigned int, 2>>(num_levels_, parallel_dims);
62  parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kInitMessageVals)] =
63  {parallel_dims};
64  parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kBpAtLevel)] =
65  std::vector<std::array<unsigned int, 2>>(num_levels_, parallel_dims);
66  parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kCopyAtLevel)] =
67  std::vector<std::array<unsigned int, 2>>(num_levels_, parallel_dims);
68  parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kOutputDisp)] =
69  {parallel_dims};
70 }
71 
72 //get current parallel parameters to data as RunData object
74 {
75  //initialize RunData object
76  RunData curr_run_data;
77 
78  //add parallel parameters setting
79  curr_run_data.AddDataWHeader(
82  opt_parallel_params_setting_))));
83 
84  //add parallel parameters for each kernel
85  curr_run_data.AddDataWHeader(std::string(beliefprop::kBlurImagesPDimsHeader),
86  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kBlurImages)][0][0]) + " x " +
87  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kBlurImages)][0][1]));
88  curr_run_data.AddDataWHeader(std::string(beliefprop::kInitMValsPDimsHeader),
89  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kInitMessageVals)][0][0]) + " x " +
90  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kInitMessageVals)][0][1]));
91  for (unsigned int level=0; level < parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kDataCostsAtLevel)].size(); level++) {
92  curr_run_data.AddDataWHeader(
93  std::string(beliefprop::kLevelText) + " " + std::to_string(level) + " " + std::string(beliefprop::kDataCostsPDimsHeader),
94  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kDataCostsAtLevel)][level][0]) + " x " +
95  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kDataCostsAtLevel)][level][1]));
96  }
97  for (unsigned int level=0; level < parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kBpAtLevel)].size(); level++) {
98  curr_run_data.AddDataWHeader(
99  std::string(beliefprop::kLevelText) + " " + std::to_string(level) + " " + std::string(beliefprop::kBpItersPDimsHeader),
100  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kBpAtLevel)][level][0]) + " x " +
101  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kBpAtLevel)][level][1]));
102  }
103  for (unsigned int level=0; level < parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kCopyAtLevel)].size(); level++) {
104  curr_run_data.AddDataWHeader(
105  std::string(beliefprop::kLevelText) + " " + std::to_string(level) + " " + std::string(beliefprop::kCopyToNextLevelPDimsHeader),
106  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kCopyAtLevel)][level][0]) + " x " +
107  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kCopyAtLevel)][level][1]));
108  }
109  curr_run_data.AddDataWHeader(std::string(beliefprop::kCompOutputDispPDimsHeader),
110  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kOutputDisp)][0][0]) + " x " +
111  std::to_string(parallel_dims_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kOutputDisp)][0][1]));
112 
113  return curr_run_data;
114 }
115 
116 //add results from run with same specified parallel parameters used every parallel component
117 void ParallelParamsBp::AddTestResultsForParallelParams(const std::array<unsigned int, 2>& p_params_curr_run, const RunData& curr_run_data)
118 {
119  if (opt_parallel_params_setting_ ==
121  {
122  for (unsigned int level=0; level < num_levels_; level++) {
123  p_params_to_run_time_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kDataCostsAtLevel)][level][p_params_curr_run] =
124  *curr_run_data.GetDataAsDouble(
125  std::string(beliefprop::kLevelDCostBpTimeCTimeNames[level][0]) + " " + std::string(run_eval::kMedianOfTestRunsDesc));
126  p_params_to_run_time_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kBpAtLevel)][level][p_params_curr_run] =
127  *curr_run_data.GetDataAsDouble(
128  std::string(beliefprop::kLevelDCostBpTimeCTimeNames[level][1]) + " " + std::string(run_eval::kMedianOfTestRunsDesc));
129  p_params_to_run_time_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kCopyAtLevel)][level][p_params_curr_run] =
130  *curr_run_data.GetDataAsDouble(
131  std::string(beliefprop::kLevelDCostBpTimeCTimeNames[level][2]) + " " + std::string(run_eval::kMedianOfTestRunsDesc));
132  }
133  p_params_to_run_time_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kBlurImages)][0][p_params_curr_run] =
134  *curr_run_data.GetDataAsDouble(
136  p_params_to_run_time_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kInitMessageVals)][0][p_params_curr_run] =
137  *curr_run_data.GetDataAsDouble(
139  p_params_to_run_time_each_kernel_[static_cast<unsigned int>(beliefprop::BpKernel::kOutputDisp)][0][p_params_curr_run] =
140  *curr_run_data.GetDataAsDouble(
142  }
143  //get total runtime
144  p_params_to_run_time_each_kernel_[beliefprop::kNumKernels][0][p_params_curr_run] =
146 }
147 
148 //retrieve optimized parameters from results across multiple runs with different
149 //parallel parameters and set current parameters to retrieved optimized
150 //parameters
152  if (opt_parallel_params_setting_ ==
154  {
155  for (unsigned int num_kernel_set = 0;
156  num_kernel_set < parallel_dims_each_kernel_.size();
157  num_kernel_set++)
158  {
159  //retrieve and set optimized parallel parameters for each kernel at each
160  //level for optimized run by finding and setting the parallel parameters
161  //with the lowest runtime for each kernel at each level from test runs
162  //with each possible parallel parameter setting
163  //std::min_element used to retrieve parallel parameters corresponding to
164  //lowest runtime for each kernel at each level across test runs
165  std::ranges::transform(p_params_to_run_time_each_kernel_[num_kernel_set],
166  parallel_dims_each_kernel_[num_kernel_set].begin(),
167  [](const auto& p_params_to_runtime_kernel_at_level) {
168  return (std::ranges::min_element(
169  p_params_to_runtime_kernel_at_level,
170  {},
171  [](const auto& p_params_w_runtime) {
172  return p_params_w_runtime.second;
173  }))->first;
174  });
175  }
176  }
177  else {
178  //set optimized parallel parameters for all kernels to parallel parameters
179  //that got the lowest runtime across all kernels in test runs where each
180  //possible parallel parameter setting was used
181  //seems like setting different parallel parameters for different kernels on
182  //GPU decreases runtime but increases runtime on CPU
183  const auto best_parallel_params = std::ranges::min_element(
184  p_params_to_run_time_each_kernel_[beliefprop::kNumKernels][0],
185  {},
186  [](const auto& p_params_w_runtime) {
187  return p_params_w_runtime.second;
188  })->first;
189  SetParallelDims(best_parallel_params);
190  }
191 }
File with namespace for enums, constants, structures, and functions specific to belief propagation pr...
Constants for timing belief propagation implementation.
Declares child class of ParallelParams to store and process parallelization parameters to use in each...
void AddTestResultsForParallelParams(const std::array< unsigned int, 2 > &p_params_curr_run, const RunData &curr_run_data)
Add results from run with same specified parallel parameters used every parallel component.
void SetParallelDims(const std::array< unsigned int, 2 > &parallel_dims) override
Set parallel parameters for each kernel to the same input dimensions.
RunData AsRunData() const override
Retrieve current parallel parameters as RunData object.
void SetOptimizedParams() override
Retrieve optimized parameters from results across multiple runs with different parallel parameters an...
ParallelParamsBp(run_environment::OptParallelParamsSetting opt_parallel_params_setting, unsigned int num_levels, const std::array< unsigned int, 2 > &default_parallel_dims)
Constructor to set parallel parameters with default dimensions for each kernel.
Class to store headers with data corresponding to current program run and evaluation.
Definition: RunData.h:42
void AddDataWHeader(const std::string &header, const std::string &data)
Add string data with header describing added data.
Definition: RunData.cpp:49
std::optional< double > GetDataAsDouble(const std::string_view header) const
Get data corresponding to header as double Return null if data corresponds to a different data type.
Definition: RunData.cpp:142
constexpr std::string_view kCompOutputDispPDimsHeader
constexpr std::string_view kInitMValsPDimsHeader
constexpr unsigned int kNumKernels
const std::unordered_map< Runtime_Type, std::string_view > kTimingNames
Mapping of runtime segment enum to header describing timing of the segment.
constexpr std::string_view kDataCostsPDimsHeader
constexpr std::string_view kLevelText
constexpr std::string_view kCopyToNextLevelPDimsHeader
constexpr std::string_view kBlurImagesPDimsHeader
constexpr std::array< std::array< std::string_view, 3 >, 10 > kLevelDCostBpTimeCTimeNames
Constant consisting of arrays of string views for headers corresponding to timing of computation of d...
constexpr std::string_view kBpItersPDimsHeader
const std::map< OptParallelParamsSetting, std::string_view > kOptPParmsSettingToDesc
constexpr std::string_view kPParamsPerKernelSettingHeader
OptParallelParamsSetting
Enum to specify if optimizing parallel parameters per kernel or using same parallel parameters across...
constexpr std::string_view kMedianOfTestRunsDesc
Constant to describing timing as median across evaluation runs.
constexpr std::string_view kOptimizedRuntimeHeader