Optimized Belief Propagation (CPU and GPU)
SmoothImageCUDA.cpp
Go to the documentation of this file.
1 /*
2 Copyright (C) 2024 Scott Grauer-Gray
3 
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18 
27 #include "SmoothImageCUDA.h"
28 #include "KernelFilter.cu"
29 
30 //for the CUDA smoothing, the input image is on the host and the output image is on the device (GPU)
31 void SmoothImageCUDA::operator()(const BpImage<unsigned int>& in_image, float sigma, float* smoothed_image) const
32 {
33  // setup execution parameters
34  const auto kernel_thread_block_dims = this->parallel_params_.OptParamsForKernel(
35  {static_cast<unsigned int>(beliefprop::BpKernel::kBlurImages), 0});
36  const dim3 threads{kernel_thread_block_dims[0], kernel_thread_block_dims[1]};
37  const dim3 grid{(unsigned int)(ceil((float)in_image.Width() / (float)threads.x)),
38  (unsigned int)(ceil((float)in_image.Height() / (float)threads.y))};
39 
40  //if sigma < kMinSigmaValSmooth, don't smooth image...just convert the input image
41  //of unsigned ints to an output image of float values
42  if (sigma < kMinSigmaValSmooth)
43  {
44  //declare and allocate the space for the input unsigned int image pixels and the output float image pixels
45  unsigned int* original_image_device;
46  cudaMalloc((void**) &original_image_device, (in_image.Width()*in_image.Height()*sizeof(unsigned int)));
47 
48  //load image to the device and convert the pixel values to floats stored in smoothed_image
49  cudaMemcpy(original_image_device, in_image.PointerToPixelsStart(),
50  (in_image.Width()*in_image.Height()*sizeof(unsigned int)), cudaMemcpyHostToDevice);
51 
52  //call kernel to convert input unsigned int pixels to output float pixels on the device
53  beliefprop_cuda::convertUnsignedIntImageToFloat <<< grid, threads >>> (
54  original_image_device, smoothed_image, in_image.Width(), in_image.Height());
55  cudaDeviceSynchronize();
56 
57  //free the device memory used to store original images
58  cudaFree(original_image_device);
59  }
60  else
61  {
62  //apply a Guassian filter to the images
63  //retrieve output filter (float array in unique_ptr) and size
64  const auto filter = this->MakeFilter(sigma);
65 
66  //copy the image filter to the GPU
67  float* filter_device;
68  cudaMalloc((void**)&filter_device, filter.size()*sizeof(float));
69  cudaMemcpy(filter_device, filter.data(),
70  filter.size()*sizeof(float), cudaMemcpyHostToDevice);
71 
72  //allocate the GPU global memory for the original, intermediate (when image filtered horizontally but not vertically),
73  //and final image
74  unsigned int* original_image_device;
75  float* intermediate_image_device;
76 
77  //it is possible to use the same storage for the original and final images...
78  cudaMalloc((void**)&original_image_device, (in_image.Width()*in_image.Height()*sizeof(unsigned int)));
79  cudaMalloc((void**)&intermediate_image_device, (in_image.Width()*in_image.Height()*sizeof(float)));
80 
81  //copy image to GPU memory
82  cudaMemcpy(original_image_device, in_image.PointerToPixelsStart(),
83  in_image.Width()*in_image.Height()*sizeof(unsigned int),
84  cudaMemcpyHostToDevice);
85 
86  //first filter the image horizontally, then vertically
87  //the result is applying a 2D gaussian filter with the given sigma value to the image
88  beliefprop_cuda::FilterImageAcross<unsigned int> <<< grid, threads >>> (
89  original_image_device, intermediate_image_device,
90  in_image.Width(), in_image.Height(),
91  filter_device, filter.size());
92  cudaDeviceSynchronize();
93 
94  //now use the vertical filter to complete the smoothing of image on the device
95  beliefprop_cuda::FilterImageVertical<float> <<< grid, threads >>> (
96  intermediate_image_device, smoothed_image,
97  in_image.Width(), in_image.Height(),
98  filter_device, filter.size());
99  cudaDeviceSynchronize();
100 
101  //free the device memory used to store the images
102  cudaFree(original_image_device);
103  cudaFree(intermediate_image_device);
104  cudaFree(filter_device);
105  }
106 }
This kernel is used to filter the image with the given filter in the vertical and horizontal directio...
constexpr float kMinSigmaValSmooth
Definition: SmoothImage.h:39
Declares child class of SmoothImage for smoothing images in the CUDA implementation.
Class to define images that are used in bp processing.
Definition: BpImage.h:56
unsigned int Height() const
Definition: BpImage.h:106
unsigned int Width() const
Definition: BpImage.h:105
T * PointerToPixelsStart() const
Definition: BpImage.h:85
virtual std::array< unsigned int, 2 > OptParamsForKernel(const std::array< unsigned int, 2 > &kernel_location) const =0
Get optimized parallel parameters for parallel processing kernel for kernel that is indexed as an arr...
void operator()(const BpImage< unsigned int > &in_image, float sigma, float *smoothed_image) const override
For the CUDA smoothing, the input image is on the host and the output image is on the device (GPU)
const ParallelParams & parallel_params_
Parallel parameters to use parallel operations (number of threads on CPU / thread block config in CUD...
Definition: SmoothImage.h:86
std::vector< float > MakeFilter(float sigma) const
Create a Gaussian filter from a sigma value.
Definition: SmoothImage.cpp:30