28 #ifndef SIMD_PROCESSING_H_
29 #define SIMD_PROCESSING_H_
60 template<RunData_t T, RunDataVect_t U>
65 std::cout <<
"Data type not supported for loading aligned data" << std::endl;
83 template<RunData_t T, RunDataVect_t U>
88 std::cout <<
"Data type not supported for loading unaligned data" << std::endl;
99 template<RunDataVect_t T>
101 std::cout <<
"Data type not supported for creating simd vector" << std::endl;
116 template<RunDataSingOrVect_t T, RunDataSingOrVect_t U, RunDataSingOrVect_t V>
117 V
AddVals(
const T& val1,
const U& val2) {
return (val1 + val2); }
130 template<RunDataSingOrVect_t T, RunDataSingOrVect_t U, RunDataSingOrVect_t V>
144 template<RunDataSingOrVect_t T, RunDataSingOrVect_t U, RunDataSingOrVect_t V>
145 V
divideVals(
const T& val1,
const U& val2) {
return (val1 / val2); }
157 template<RunDataSingOrVect_t T, RunDataSingOrVect_t V>
170 template<RunDataSingOrVect_t T>
185 template<RunData_t T, RunDataVectProcess_t U>
187 locationDataStore[indexDataStore] = dataToStore;
202 template<RunData_t T, RunDataVectProcess_t U>
204 locationDataStore[indexDataStore] = dataToStore;
209 #if defined(COMPILING_FOR_ARM)
212 #if (CPU_VECTORIZATION_DEFINE == NEON_DEFINE)
220 #if (CPU_VECTORIZATION_DEFINE == AVX_256_DEFINE)
222 #elif (CPU_VECTORIZATION_DEFINE == AVX_512_DEFINE)
Contains template specializations for ARM/NEON vector processing.
Template specializations for processing on SIMD vector data types supported by AVX256.
Template specializations for processing on SIMD vector data types supported by AVX512.
Contains template specializations for AVX vector processing.
Template specializations for processing on SIMD vector data types supported by NEON on ARM CPUs.
Contains namespace with CPU run defaults and constants.
Define constraints for data type in processing.
General functions for processing using SIMD vector data types on CPU. Template specializations must ...
void StorePackedDataUnaligned(unsigned int indexDataStore, T *locationDataStore, const U &dataToStore)
Write data in SIMD vector (or single element) to specified location in array Array that data is writ...
void StorePackedDataAligned(unsigned int indexDataStore, T *locationDataStore, const U &dataToStore)
Write data in SIMD vector (or single element) to specified location in array Array that data is writ...
T ConvertValToDatatype(V val)
Convert value of specified type to value of another specified type Define template specialization to...
U LoadPackedDataAligned(unsigned int x, unsigned int y, unsigned int current_disparity, const beliefprop::BpLevelProperties ¤t_bp_level, unsigned int numDispVals, const T *inData)
Load multiple values of primitive type data from inData array to SIMD vector data type inData array ...
V SubtractVals(const T &val1, const U &val2)
Subtract values of specified types and return difference as specified type Define template specializa...
T GetMinByElement(const T &val1, const T &val2)
Get element-wise minimum of two inputs which may be of a SIMD vector type where corresponding values ...
V AddVals(const T &val1, const U &val2)
Add values of specified types and return sum as specified type Define template specialization to sup...
T createSIMDVectorSameData(float data)
Create a SIMD vector of the specified type with all elements containing the same data.
V divideVals(const T &val1, const U &val2)
Divide values of specified types and return quotient as specified type Define template specializatio...
U LoadPackedDataUnaligned(unsigned int x, unsigned int y, unsigned int current_disparity, const beliefprop::BpLevelProperties ¤t_bp_level, unsigned int numDispVals, const T *inData)
Load multiple values of primitive type data from inData array to SIMD vector data type inData array ...
POD struct to store bp level data. Struct can be passed to global CUDAs kernel so needs to take restr...