1 #ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a_H
2 #define INCLUDED_volk_32fc_x2_multiply_32fc_a_H
10 #include <pmmintrin.h>
18 static inline void volk_32fc_x2_multiply_32fc_a_sse3(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
19 unsigned int number = 0;
20 const unsigned int halfPoints = num_points / 2;
22 __m128 x, y, yl, yh, z, tmp1, tmp2;
26 for(;number < halfPoints; number++){
28 x = _mm_load_ps((
float*)a);
29 y = _mm_load_ps((
float*)b);
31 yl = _mm_moveldup_ps(y);
32 yh = _mm_movehdup_ps(y);
34 tmp1 = _mm_mul_ps(x,yl);
36 x = _mm_shuffle_ps(x,x,0xB1);
38 tmp2 = _mm_mul_ps(x,yh);
40 z = _mm_addsub_ps(tmp1,tmp2);
42 _mm_store_ps((
float*)c,z);
49 if((num_points % 2) != 0) {
55 #ifdef LV_HAVE_GENERIC
63 static inline void volk_32fc_x2_multiply_32fc_a_generic(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
67 unsigned int number = 0;
69 for(number = 0; number < num_points; number++){
70 *cPtr++ = (*aPtr++) * (*bPtr++);
83 extern void volk_32fc_x2_multiply_32fc_a_orc_impl(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points);
84 static inline void volk_32fc_x2_multiply_32fc_a_orc(
lv_32fc_t* cVector,
const lv_32fc_t* aVector,
const lv_32fc_t* bVector,
unsigned int num_points){
85 volk_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, num_points);