cudaBeliefProp/html/_s_i_m_d_processing_8h_source.html

 /*

 Copyright (C) 2024 Scott Grauer-Gray


 This program is free software; you can redistribute it and/or modify

 it under the terms of the GNU General Public License as published by

 the Free Software Foundation; either version 2 of the License, or

 (at your option) any later version.


 This program is distributed in the hope that it will be useful,

 but WITHOUT ANY WARRANTY; without even the implied warranty of

 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 GNU General Public License for more details.


 You should have received a copy of the GNU General Public License

 along with this program; if not, write to the Free Software

 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA

 */


 #ifndef SIMD_PROCESSING_H_

 #define SIMD_PROCESSING_H_


 #include <math.h>

 #include <omp.h>

 #include <algorithm>

 #include <iostream>

 #include "RunEval/RunTypeConstraints.h"

 #include "RunImpCPU/RunCPUSettings.h"


 namespace simd_processing

 {

   template<RunData_t T, RunDataVect_t U>

   U LoadPackedDataAligned(unsigned int x, unsigned int y, unsigned int current_disparity,

     const beliefprop::BpLevelProperties& current_bp_level, unsigned int numDispVals,

     const T* inData)

   {

     std::cout << "Data type not supported for loading aligned data" << std::endl;

   }


   template<RunData_t T, RunDataVect_t U>

   U LoadPackedDataUnaligned(unsigned int x, unsigned int y, unsigned int current_disparity,

     const beliefprop::BpLevelProperties& current_bp_level, unsigned int numDispVals,

     const T* inData)

   {

     std::cout << "Data type not supported for loading unaligned data" << std::endl;

   }


   template<RunDataVect_t T>

   T createSIMDVectorSameData(float data) {

     std::cout << "Data type not supported for creating simd vector" << std::endl;

   }


   //

   template<RunDataSingOrVect_t T, RunDataSingOrVect_t U, RunDataSingOrVect_t V>

   V AddVals(const T& val1, const U& val2) { return (val1 + val2); }


   template<RunDataSingOrVect_t T, RunDataSingOrVect_t U, RunDataSingOrVect_t V>

   V SubtractVals(const T& val1, const U& val2) { return (val1 - val2); }


   template<RunDataSingOrVect_t T, RunDataSingOrVect_t U, RunDataSingOrVect_t V>

   V divideVals(const T& val1, const U& val2) { return (val1 / val2); }


   template<RunDataSingOrVect_t T, RunDataSingOrVect_t V>

   T ConvertValToDatatype(V val) { return (T)val; }


   template<RunDataSingOrVect_t T>

   T GetMinByElement(const T& val1, const T& val2) { return std::min(val1, val2); }


   template<RunData_t T, RunDataVectProcess_t U>

   void StorePackedDataAligned(unsigned int indexDataStore, T* locationDataStore, const U& dataToStore) {

     locationDataStore[indexDataStore] = dataToStore;

   }


   template<RunData_t T, RunDataVectProcess_t U>

   void StorePackedDataUnaligned(unsigned int indexDataStore, T* locationDataStore, const U& dataToStore) {

     locationDataStore[indexDataStore] = dataToStore;

   }

 };


 //headers to include differ depending on architecture and CPU vectorization setting

 #if defined(COMPILING_FOR_ARM)

 #include "ARMTemplateSpFuncts.h"


 #if (CPU_VECTORIZATION_DEFINE == NEON_DEFINE)

 #include "NEONTemplateSpFuncts.h"

 #endif //CPU_VECTORIZATION_DEFINE == NEON_DEFINE


 #else

 //needed so that template specializations are used when available

 #include "AVXTemplateSpFuncts.h"


 #if (CPU_VECTORIZATION_DEFINE == AVX_256_DEFINE)

 #include "AVX256TemplateSpFuncts.h"

 #elif (CPU_VECTORIZATION_DEFINE == AVX_512_DEFINE)

 #include "AVX256TemplateSpFuncts.h"

 #include "AVX512TemplateSpFuncts.h"

 #endif


 #endif //COMPILING_FOR_ARM


 #endif //VECT_PROCESSING_FUNCTS_H_

ARMTemplateSpFuncts.h
Contains template specializations for ARM/NEON vector processing.

AVX256TemplateSpFuncts.h
Template specializations for processing on SIMD vector data types supported by AVX256.

AVX512TemplateSpFuncts.h
Template specializations for processing on SIMD vector data types supported by AVX512.

AVXTemplateSpFuncts.h
Contains template specializations for AVX vector processing.

NEONTemplateSpFuncts.h
Template specializations for processing on SIMD vector data types supported by NEON on ARM CPUs.

RunCPUSettings.h
Contains namespace with CPU run defaults and constants.

RunTypeConstraints.h
Define constraints for data type in processing.

simd_processing
General functions for processing using SIMD vector data types on CPU.  Template specializations must ...
Definition: SIMDProcessing.h:43

simd_processing::StorePackedDataUnaligned
void StorePackedDataUnaligned(unsigned int indexDataStore, T *locationDataStore, const U &dataToStore)
Write data in SIMD vector (or single element) to specified location in array  Array that data is writ...
Definition: SIMDProcessing.h:203

simd_processing::StorePackedDataAligned
void StorePackedDataAligned(unsigned int indexDataStore, T *locationDataStore, const U &dataToStore)
Write data in SIMD vector (or single element) to specified location in array  Array that data is writ...
Definition: SIMDProcessing.h:186

simd_processing::ConvertValToDatatype
T ConvertValToDatatype(V val)
Convert value of specified type to value of another specified type  Define template specialization to...
Definition: SIMDProcessing.h:158

simd_processing::LoadPackedDataAligned
U LoadPackedDataAligned(unsigned int x, unsigned int y, unsigned int current_disparity, const beliefprop::BpLevelProperties &current_bp_level, unsigned int numDispVals, const T *inData)
Load multiple values of primitive type data from inData array to SIMD vector data type  inData array ...
Definition: SIMDProcessing.h:61

simd_processing::SubtractVals
V SubtractVals(const T &val1, const U &val2)
Subtract values of specified types and return difference as specified type Define template specializa...
Definition: SIMDProcessing.h:131

simd_processing::GetMinByElement
T GetMinByElement(const T &val1, const T &val2)
Get element-wise minimum of two inputs which may be of a SIMD vector type where corresponding values ...
Definition: SIMDProcessing.h:171

simd_processing::AddVals
V AddVals(const T &val1, const U &val2)
Add values of specified types and return sum as specified type  Define template specialization to sup...
Definition: SIMDProcessing.h:117

simd_processing::createSIMDVectorSameData
T createSIMDVectorSameData(float data)
Create a SIMD vector of the specified type with all elements containing the same data.
Definition: SIMDProcessing.h:100

simd_processing::divideVals
V divideVals(const T &val1, const U &val2)
Divide values of specified types and return quotient as specified type  Define template specializatio...
Definition: SIMDProcessing.h:145

simd_processing::LoadPackedDataUnaligned
U LoadPackedDataUnaligned(unsigned int x, unsigned int y, unsigned int current_disparity, const beliefprop::BpLevelProperties &current_bp_level, unsigned int numDispVals, const T *inData)
Load multiple values of primitive type data from inData array to SIMD vector data type  inData array ...
Definition: SIMDProcessing.h:84

beliefprop::BpLevelProperties
POD struct to store bp level data. Struct can be passed to global CUDAs kernel so needs to take restr...
Definition: BpLevel.h:42