36 const dim3 threads{kernel_thread_block_dims[0], kernel_thread_block_dims[1]};
37 const dim3 grid{(
unsigned int)(ceil((
float)in_image.
Width() / (float)threads.x)),
38 (
unsigned int)(ceil((
float)in_image.
Height() / (
float)threads.y))};
45 unsigned int* original_image_device;
46 cudaMalloc((
void**) &original_image_device, (in_image.
Width()*in_image.
Height()*
sizeof(
unsigned int)));
50 (in_image.
Width()*in_image.
Height()*
sizeof(
unsigned int)), cudaMemcpyHostToDevice);
53 beliefprop_cuda::convertUnsignedIntImageToFloat <<< grid, threads >>> (
54 original_image_device, smoothed_image, in_image.
Width(), in_image.
Height());
55 cudaDeviceSynchronize();
58 cudaFree(original_image_device);
68 cudaMalloc((
void**)&filter_device, filter.size()*
sizeof(
float));
69 cudaMemcpy(filter_device, filter.data(),
70 filter.size()*
sizeof(
float), cudaMemcpyHostToDevice);
74 unsigned int* original_image_device;
75 float* intermediate_image_device;
78 cudaMalloc((
void**)&original_image_device, (in_image.
Width()*in_image.
Height()*
sizeof(
unsigned int)));
79 cudaMalloc((
void**)&intermediate_image_device, (in_image.
Width()*in_image.
Height()*
sizeof(
float)));
83 in_image.
Width()*in_image.
Height()*
sizeof(
unsigned int),
84 cudaMemcpyHostToDevice);
88 beliefprop_cuda::FilterImageAcross<unsigned int> <<< grid, threads >>> (
89 original_image_device, intermediate_image_device,
91 filter_device, filter.size());
92 cudaDeviceSynchronize();
95 beliefprop_cuda::FilterImageVertical<float> <<< grid, threads >>> (
96 intermediate_image_device, smoothed_image,
98 filter_device, filter.size());
99 cudaDeviceSynchronize();
102 cudaFree(original_image_device);
103 cudaFree(intermediate_image_device);
104 cudaFree(filter_device);
This kernel is used to filter the image with the given filter in the vertical and horizontal directio...
constexpr float kMinSigmaValSmooth
Declares child class of SmoothImage for smoothing images in the CUDA implementation.
Class to define images that are used in bp processing.
unsigned int Height() const
unsigned int Width() const
T * PointerToPixelsStart() const
virtual std::array< unsigned int, 2 > OptParamsForKernel(const std::array< unsigned int, 2 > &kernel_location) const =0
Get optimized parallel parameters for parallel processing kernel for kernel that is indexed as an arr...
void operator()(const BpImage< unsigned int > &in_image, float sigma, float *smoothed_image) const override
For the CUDA smoothing, the input image is on the host and the output image is on the device (GPU)
const ParallelParams & parallel_params_
Parallel parameters to use parallel operations (number of threads on CPU / thread block config in CUD...
std::vector< float > MakeFilter(float sigma) const
Create a Gaussian filter from a sigma value.