Optimized Belief Propagation (CPU and GPU)
SIMDProcessing.h
Go to the documentation of this file.
1 /*
2 Copyright (C) 2024 Scott Grauer-Gray
3 
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18 
28 #ifndef SIMD_PROCESSING_H_
29 #define SIMD_PROCESSING_H_
30 
31 #include <math.h>
32 #include <omp.h>
33 #include <algorithm>
34 #include <iostream>
37 
42 namespace simd_processing
43 {
60  template<RunData_t T, RunDataVect_t U>
61  U LoadPackedDataAligned(unsigned int x, unsigned int y, unsigned int current_disparity,
62  const beliefprop::BpLevelProperties& current_bp_level, unsigned int numDispVals,
63  const T* inData)
64  {
65  std::cout << "Data type not supported for loading aligned data" << std::endl;
66  }
67 
83  template<RunData_t T, RunDataVect_t U>
84  U LoadPackedDataUnaligned(unsigned int x, unsigned int y, unsigned int current_disparity,
85  const beliefprop::BpLevelProperties& current_bp_level, unsigned int numDispVals,
86  const T* inData)
87  {
88  std::cout << "Data type not supported for loading unaligned data" << std::endl;
89  }
90 
99  template<RunDataVect_t T>
100  T createSIMDVectorSameData(float data) {
101  std::cout << "Data type not supported for creating simd vector" << std::endl;
102  }
103 
104  //
116  template<RunDataSingOrVect_t T, RunDataSingOrVect_t U, RunDataSingOrVect_t V>
117  V AddVals(const T& val1, const U& val2) { return (val1 + val2); }
118 
130  template<RunDataSingOrVect_t T, RunDataSingOrVect_t U, RunDataSingOrVect_t V>
131  V SubtractVals(const T& val1, const U& val2) { return (val1 - val2); }
132 
144  template<RunDataSingOrVect_t T, RunDataSingOrVect_t U, RunDataSingOrVect_t V>
145  V divideVals(const T& val1, const U& val2) { return (val1 / val2); }
146 
157  template<RunDataSingOrVect_t T, RunDataSingOrVect_t V>
158  T ConvertValToDatatype(V val) { return (T)val; }
159 
170  template<RunDataSingOrVect_t T>
171  T GetMinByElement(const T& val1, const T& val2) { return std::min(val1, val2); }
172 
185  template<RunData_t T, RunDataVectProcess_t U>
186  void StorePackedDataAligned(unsigned int indexDataStore, T* locationDataStore, const U& dataToStore) {
187  locationDataStore[indexDataStore] = dataToStore;
188  }
189 
202  template<RunData_t T, RunDataVectProcess_t U>
203  void StorePackedDataUnaligned(unsigned int indexDataStore, T* locationDataStore, const U& dataToStore) {
204  locationDataStore[indexDataStore] = dataToStore;
205  }
206 };
207 
208 //headers to include differ depending on architecture and CPU vectorization setting
209 #if defined(COMPILING_FOR_ARM)
210 #include "ARMTemplateSpFuncts.h"
211 
212 #if (CPU_VECTORIZATION_DEFINE == NEON_DEFINE)
213 #include "NEONTemplateSpFuncts.h"
214 #endif //CPU_VECTORIZATION_DEFINE == NEON_DEFINE
215 
216 #else
217 //needed so that template specializations are used when available
218 #include "AVXTemplateSpFuncts.h"
219 
220 #if (CPU_VECTORIZATION_DEFINE == AVX_256_DEFINE)
221 #include "AVX256TemplateSpFuncts.h"
222 #elif (CPU_VECTORIZATION_DEFINE == AVX_512_DEFINE)
223 #include "AVX256TemplateSpFuncts.h"
224 #include "AVX512TemplateSpFuncts.h"
225 #endif
226 
227 #endif //COMPILING_FOR_ARM
228 
229 #endif //VECT_PROCESSING_FUNCTS_H_
Contains template specializations for ARM/NEON vector processing.
Template specializations for processing on SIMD vector data types supported by AVX256.
Template specializations for processing on SIMD vector data types supported by AVX512.
Contains template specializations for AVX vector processing.
Template specializations for processing on SIMD vector data types supported by NEON on ARM CPUs.
Contains namespace with CPU run defaults and constants.
Define constraints for data type in processing.
General functions for processing using SIMD vector data types on CPU. Template specializations must ...
void StorePackedDataUnaligned(unsigned int indexDataStore, T *locationDataStore, const U &dataToStore)
Write data in SIMD vector (or single element) to specified location in array Array that data is writ...
void StorePackedDataAligned(unsigned int indexDataStore, T *locationDataStore, const U &dataToStore)
Write data in SIMD vector (or single element) to specified location in array Array that data is writ...
T ConvertValToDatatype(V val)
Convert value of specified type to value of another specified type Define template specialization to...
U LoadPackedDataAligned(unsigned int x, unsigned int y, unsigned int current_disparity, const beliefprop::BpLevelProperties &current_bp_level, unsigned int numDispVals, const T *inData)
Load multiple values of primitive type data from inData array to SIMD vector data type inData array ...
V SubtractVals(const T &val1, const U &val2)
Subtract values of specified types and return difference as specified type Define template specializa...
T GetMinByElement(const T &val1, const T &val2)
Get element-wise minimum of two inputs which may be of a SIMD vector type where corresponding values ...
V AddVals(const T &val1, const U &val2)
Add values of specified types and return sum as specified type Define template specialization to sup...
T createSIMDVectorSameData(float data)
Create a SIMD vector of the specified type with all elements containing the same data.
V divideVals(const T &val1, const U &val2)
Divide values of specified types and return quotient as specified type Define template specializatio...
U LoadPackedDataUnaligned(unsigned int x, unsigned int y, unsigned int current_disparity, const beliefprop::BpLevelProperties &current_bp_level, unsigned int numDispVals, const T *inData)
Load multiple values of primitive type data from inData array to SIMD vector data type inData array ...
POD struct to store bp level data. Struct can be passed to global CUDAs kernel so needs to take restr...
Definition: BpLevel.h:42