GNU Radio 3.5.3.1 C++ API
volk_32f_s32f_convert_8i_a.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H
2 #define INCLUDED_volk_32f_s32f_convert_8i_a_H
3 
4 #include <volk/volk_common.h>
5 #include <inttypes.h>
6 #include <stdio.h>
7 
8 #ifdef LV_HAVE_SSE2
9 #include <emmintrin.h>
10  /*!
11  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
12  \param inputVector The floating point input data buffer
13  \param outputVector The 8 bit output data buffer
14  \param scalar The value multiplied against each point in the input buffer
15  \param num_points The number of data values to be converted
16  */
17 static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
18  unsigned int number = 0;
19 
20  const unsigned int sixteenthPoints = num_points / 16;
21 
22  const float* inputVectorPtr = (const float*)inputVector;
23  int8_t* outputVectorPtr = outputVector;
24 
25  float min_val = -128;
26  float max_val = 127;
27  float r;
28 
29  __m128 vScalar = _mm_set_ps1(scalar);
30  __m128 inputVal1, inputVal2, inputVal3, inputVal4;
31  __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
32  __m128 vmin_val = _mm_set_ps1(min_val);
33  __m128 vmax_val = _mm_set_ps1(max_val);
34 
35  for(;number < sixteenthPoints; number++){
36  inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
37  inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
38  inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
39  inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
40 
41  inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
42  inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
43  inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
44  inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
45 
46  intInputVal1 = _mm_cvtps_epi32(inputVal1);
47  intInputVal2 = _mm_cvtps_epi32(inputVal2);
48  intInputVal3 = _mm_cvtps_epi32(inputVal3);
49  intInputVal4 = _mm_cvtps_epi32(inputVal4);
50 
51  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
52  intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
53 
54  intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
55 
56  _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
57  outputVectorPtr += 16;
58  }
59 
60  number = sixteenthPoints * 16;
61  for(; number < num_points; number++){
62  r = inputVector[number] * scalar;
63  if(r > max_val)
64  r = max_val;
65  else if(r < min_val)
66  r = min_val;
67  outputVector[number] = (int8_t)(r);
68  }
69 }
70 #endif /* LV_HAVE_SSE2 */
71 
72 #ifdef LV_HAVE_SSE
73 #include <xmmintrin.h>
74  /*!
75  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
76  \param inputVector The floating point input data buffer
77  \param outputVector The 8 bit output data buffer
78  \param scalar The value multiplied against each point in the input buffer
79  \param num_points The number of data values to be converted
80  */
81 static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
82  unsigned int number = 0;
83 
84  const unsigned int quarterPoints = num_points / 4;
85 
86  const float* inputVectorPtr = (const float*)inputVector;
87 
88  float min_val = -128;
89  float max_val = 127;
90  float r;
91 
92  int8_t* outputVectorPtr = outputVector;
93  __m128 vScalar = _mm_set_ps1(scalar);
94  __m128 ret;
95  __m128 vmin_val = _mm_set_ps1(min_val);
96  __m128 vmax_val = _mm_set_ps1(max_val);
97 
98  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
99 
100  for(;number < quarterPoints; number++){
101  ret = _mm_load_ps(inputVectorPtr);
102  inputVectorPtr += 4;
103 
104  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
105 
106  _mm_store_ps(outputFloatBuffer, ret);
107  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
108  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]);
109  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]);
110  *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
111  }
112 
113  number = quarterPoints * 4;
114  for(; number < num_points; number++){
115  r = inputVector[number] * scalar;
116  if(r > max_val)
117  r = max_val;
118  else if(r < min_val)
119  r = min_val;
120  outputVector[number] = (int8_t)(r);
121  }
122 }
123 #endif /* LV_HAVE_SSE */
124 
125 #ifdef LV_HAVE_GENERIC
126  /*!
127  \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 8 bit integer value
128  \param inputVector The floating point input data buffer
129  \param outputVector The 8 bit output data buffer
130  \param scalar The value multiplied against each point in the input buffer
131  \param num_points The number of data values to be converted
132  */
133 static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){
134  int8_t* outputVectorPtr = outputVector;
135  const float* inputVectorPtr = inputVector;
136  unsigned int number = 0;
137  float min_val = -128;
138  float max_val = 127;
139  float r;
140 
141  for(number = 0; number < num_points; number++){
142  r = *inputVectorPtr++ * scalar;
143  if(r > max_val)
144  r = max_val;
145  else if(r < min_val)
146  r = min_val;
147  *outputVectorPtr++ = (int8_t)(r);
148  }
149 }
150 #endif /* LV_HAVE_GENERIC */
151 
152 
153 
154 
155 #endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */