GNU Radio 3.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H 00002 #define INCLUDED_volk_32f_s32f_convert_8i_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE2 00008 #include <emmintrin.h> 00009 /*! 00010 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value 00011 \param inputVector The floating point input data buffer 00012 \param outputVector The 8 bit output data buffer 00013 \param scalar The value multiplied against each point in the input buffer 00014 \param num_points The number of data values to be converted 00015 \note Input buffer does NOT need to be properly aligned 00016 */ 00017 static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00018 unsigned int number = 0; 00019 00020 const unsigned int sixteenthPoints = num_points / 16; 00021 00022 const float* inputVectorPtr = (const float*)inputVector; 00023 int8_t* outputVectorPtr = outputVector; 00024 __m128 vScalar = _mm_set_ps1(scalar); 00025 __m128 inputVal1, inputVal2, inputVal3, inputVal4; 00026 __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4; 00027 00028 for(;number < sixteenthPoints; number++){ 00029 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00030 inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00031 inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00032 inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00033 00034 intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar)); 00035 intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar)); 00036 intInputVal3 = _mm_cvtps_epi32(_mm_mul_ps(inputVal3, vScalar)); 00037 intInputVal4 = _mm_cvtps_epi32(_mm_mul_ps(inputVal4, vScalar)); 00038 00039 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); 00040 intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4); 00041 00042 intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3); 00043 00044 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1); 00045 outputVectorPtr += 16; 00046 } 00047 00048 number = sixteenthPoints * 16; 00049 for(; number < num_points; number++){ 00050 outputVector[number] = (int8_t)(inputVector[number] * scalar); 00051 } 00052 } 00053 #endif /* LV_HAVE_SSE2 */ 00054 00055 #ifdef LV_HAVE_SSE 00056 #include <xmmintrin.h> 00057 /*! 00058 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value 00059 \param inputVector The floating point input data buffer 00060 \param outputVector The 8 bit output data buffer 00061 \param scalar The value multiplied against each point in the input buffer 00062 \param num_points The number of data values to be converted 00063 \note Input buffer does NOT need to be properly aligned 00064 */ 00065 static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00066 unsigned int number = 0; 00067 00068 const unsigned int quarterPoints = num_points / 4; 00069 00070 const float* inputVectorPtr = (const float*)inputVector; 00071 int8_t* outputVectorPtr = outputVector; 00072 __m128 vScalar = _mm_set_ps1(scalar); 00073 __m128 ret; 00074 00075 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; 00076 00077 for(;number < quarterPoints; number++){ 00078 ret = _mm_loadu_ps(inputVectorPtr); 00079 inputVectorPtr += 4; 00080 00081 ret = _mm_mul_ps(ret, vScalar); 00082 00083 _mm_store_ps(outputFloatBuffer, ret); 00084 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]); 00085 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]); 00086 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]); 00087 *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]); 00088 } 00089 00090 number = quarterPoints * 4; 00091 for(; number < num_points; number++){ 00092 outputVector[number] = (int8_t)(inputVector[number] * scalar); 00093 } 00094 } 00095 #endif /* LV_HAVE_SSE */ 00096 00097 #ifdef LV_HAVE_GENERIC 00098 /*! 00099 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value 00100 \param inputVector The floating point input data buffer 00101 \param outputVector The 8 bit output data buffer 00102 \param scalar The value multiplied against each point in the input buffer 00103 \param num_points The number of data values to be converted 00104 \note Input buffer does NOT need to be properly aligned 00105 */ 00106 static inline void volk_32f_s32f_convert_8i_u_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00107 int8_t* outputVectorPtr = outputVector; 00108 const float* inputVectorPtr = inputVector; 00109 unsigned int number = 0; 00110 00111 for(number = 0; number < num_points; number++){ 00112 *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ * scalar)); 00113 } 00114 } 00115 #endif /* LV_HAVE_GENERIC */ 00116 00117 00118 00119 00120 #endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */