GNU Radio 3.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H 00002 #define INCLUDED_volk_32f_s32f_convert_16i_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE2 00008 #include <emmintrin.h> 00009 /*! 00010 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00011 \param inputVector The floating point input data buffer 00012 \param outputVector The 16 bit output data buffer 00013 \param scalar The value multiplied against each point in the input buffer 00014 \param num_points The number of data values to be converted 00015 \note Input buffer does NOT need to be properly aligned 00016 */ 00017 static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00018 unsigned int number = 0; 00019 00020 const unsigned int eighthPoints = num_points / 8; 00021 00022 const float* inputVectorPtr = (const float*)inputVector; 00023 int16_t* outputVectorPtr = outputVector; 00024 __m128 vScalar = _mm_set_ps1(scalar); 00025 __m128 inputVal1, inputVal2; 00026 __m128i intInputVal1, intInputVal2; 00027 00028 for(;number < eighthPoints; number++){ 00029 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00030 inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00031 00032 intInputVal1 = _mm_cvtps_epi32(_mm_mul_ps(inputVal1, vScalar)); 00033 intInputVal2 = _mm_cvtps_epi32(_mm_mul_ps(inputVal2, vScalar)); 00034 00035 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); 00036 00037 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1); 00038 outputVectorPtr += 8; 00039 } 00040 00041 number = eighthPoints * 8; 00042 for(; number < num_points; number++){ 00043 outputVector[number] = (int16_t)(inputVector[number] * scalar); 00044 } 00045 } 00046 #endif /* LV_HAVE_SSE2 */ 00047 00048 #ifdef LV_HAVE_SSE 00049 #include <xmmintrin.h> 00050 /*! 00051 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00052 \param inputVector The floating point input data buffer 00053 \param outputVector The 16 bit output data buffer 00054 \param scalar The value multiplied against each point in the input buffer 00055 \param num_points The number of data values to be converted 00056 \note Input buffer does NOT need to be properly aligned 00057 */ 00058 static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00059 unsigned int number = 0; 00060 00061 const unsigned int quarterPoints = num_points / 4; 00062 00063 const float* inputVectorPtr = (const float*)inputVector; 00064 int16_t* outputVectorPtr = outputVector; 00065 __m128 vScalar = _mm_set_ps1(scalar); 00066 __m128 ret; 00067 00068 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; 00069 00070 for(;number < quarterPoints; number++){ 00071 ret = _mm_loadu_ps(inputVectorPtr); 00072 inputVectorPtr += 4; 00073 00074 ret = _mm_mul_ps(ret, vScalar); 00075 00076 _mm_store_ps(outputFloatBuffer, ret); 00077 *outputVectorPtr++ = (int16_t)(outputFloatBuffer[0]); 00078 *outputVectorPtr++ = (int16_t)(outputFloatBuffer[1]); 00079 *outputVectorPtr++ = (int16_t)(outputFloatBuffer[2]); 00080 *outputVectorPtr++ = (int16_t)(outputFloatBuffer[3]); 00081 } 00082 00083 number = quarterPoints * 4; 00084 for(; number < num_points; number++){ 00085 outputVector[number] = (int16_t)(inputVector[number] * scalar); 00086 } 00087 } 00088 #endif /* LV_HAVE_SSE */ 00089 00090 #ifdef LV_HAVE_GENERIC 00091 /*! 00092 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00093 \param inputVector The floating point input data buffer 00094 \param outputVector The 16 bit output data buffer 00095 \param scalar The value multiplied against each point in the input buffer 00096 \param num_points The number of data values to be converted 00097 \note Input buffer does NOT need to be properly aligned 00098 */ 00099 static inline void volk_32f_s32f_convert_16i_u_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00100 int16_t* outputVectorPtr = outputVector; 00101 const float* inputVectorPtr = inputVector; 00102 unsigned int number = 0; 00103 00104 for(number = 0; number < num_points; number++){ 00105 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++ * scalar)); 00106 } 00107 } 00108 #endif /* LV_HAVE_GENERIC */ 00109 00110 00111 00112 00113 #endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */