64 #define NFSFT_DEFAULT_NFFT_CUTOFF 6
71 #define NFSFT_DEFAULT_THRESHOLD 1000
78 #define NFSFT_BREAK_EVEN 5
114 double _Complex last;
124 memset(plan->
f_hat_intern,0U,(2*plan->
N+2)*
sizeof(
double _Complex));
127 lowe = -plan->
N + (plan->
N%2);
131 for (n = lowe; n <= upe; n += 2)
137 for(k = 1; k <= plan->
N; k++)
148 low = -plan->
N + (1-plan->
N%2);
153 for (n = low; n <= up; n += 2)
165 *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
167 for (k = plan->
N-1; k > 0; k--)
170 *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
192 double _Complex last;
202 lowe = -plan->
N + (plan->
N%2);
206 for (n = lowe; n <= upe; n += 2)
210 xm = &(plan->
f_hat[NFSFT_INDEX(-1,n,plan)]);
211 xp = &(plan->
f_hat[NFSFT_INDEX(+1,n,plan)]);
212 for(k = 1; k <= plan->
N; k++)
220 low = -plan->
N + (1-plan->
N%2);
224 for (n = low; n <= up; n += 2)
228 xm = &(plan->
f_hat[NFSFT_INDEX(-1,n,plan)]);
229 xp = &(plan->
f_hat[NFSFT_INDEX(+1,n,plan)]);
230 for(k = 1; k <= plan->
N; k++)
235 plan->
f_hat[NFSFT_INDEX(0,n,plan)] =
236 -0.25*_Complex_I*plan->
f_hat[NFSFT_INDEX(1,n,plan)];
237 last = plan->
f_hat[NFSFT_INDEX(1,n,plan)];
238 plan->
f_hat[NFSFT_INDEX(1,n,plan)] =
239 -0.25*_Complex_I*plan->
f_hat[NFSFT_INDEX(2,n,plan)];
241 xp = &(plan->
f_hat[NFSFT_INDEX(2,n,plan)]);
242 for (k = 2; k < plan->
N; k++)
245 *xp = -0.25 * _Complex_I * (xp[1] - last);
249 *xp = 0.25 * _Complex_I * last;
251 plan->
f_hat[NFSFT_INDEX(0,n,plan)] *= 2.0;
255 void nfsft_init(
nfsft_plan *plan,
int N,
int M)
258 nfsft_init_advanced(plan, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
262 void nfsft_init_advanced(
nfsft_plan* plan,
int N,
int M,
266 nfsft_init_guru(plan, N, M, flags, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | NFFT_OMP_BLOCKWISE_ADJOINT |
270 void nfsft_init_guru(
nfsft_plan *plan,
int N,
int M,
unsigned int flags,
271 unsigned int nfft_flags,
int nfft_cutoff)
289 plan->
N_total = (2*plan->
N+2)*(2*plan->
N+2);
293 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
296 sizeof(
double _Complex));
300 if (plan->
flags & NFSFT_MALLOC_F_HAT)
303 sizeof(
double _Complex));
307 if (plan->
flags & NFSFT_MALLOC_F)
313 if (plan->
flags & NFSFT_MALLOC_X)
319 if (plan->
flags & NFSFT_NO_FAST_ALGORITHM)
328 nfft_size[0] = 2*plan->
N+2;
329 nfft_size[1] = 2*plan->
N+2;
330 fftw_size[0] = 4*plan->
N;
331 fftw_size[1] = 4*plan->
N;
335 nfft_cutoff, nfft_flags,
336 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
355 plan->
mv_trafo = (void (*) (
void* ))nfsft_trafo;
356 plan->
mv_adjoint = (void (*) (
void* ))nfsft_adjoint;
359 void nfsft_precompute(
int N,
double kappa,
unsigned int nfsft_flags,
360 unsigned int fpt_flags)
371 #pragma omp parallel default(shared)
373 int nthreads = omp_get_num_threads();
374 int threadid = omp_get_thread_num();
377 wisdom.nthreads = nthreads;
383 wisdom.flags = nfsft_flags;
387 X(next_power_of_2_exp)(N,&wisdom.
N_MAX,&wisdom.
T_MAX);
390 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
414 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
422 if (wisdom.
alpha != NULL)
425 #pragma omp parallel default(shared) private(n)
427 int nthreads = omp_get_num_threads();
428 int threadid = omp_get_thread_num();
431 wisdom.nthreads = nthreads;
435 wisdom.set_threads[threadid] = fpt_init(wisdom.
N_MAX+1, wisdom.
T_MAX,
436 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
437 for (n = 0; n <= wisdom.
N_MAX; n++)
438 fpt_precompute(wisdom.set_threads[threadid],n,&wisdom.
alpha[ROW(n)],
439 &wisdom.
beta[ROW(n)], &wisdom.
gamma[ROW(n)],n,kappa);
446 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
447 for (n = 0; n <= wisdom.
N_MAX; n++)
452 fpt_precompute(wisdom.
set,n,&wisdom.
alpha[ROW(n)],&wisdom.
beta[ROW(n)],
453 &wisdom.
gamma[ROW(n)],n,kappa);
460 #pragma omp parallel default(shared) private(n)
463 int nthreads = omp_get_num_threads();
464 int threadid = omp_get_thread_num();
467 wisdom.nthreads = nthreads;
474 wisdom.set_threads[threadid] = fpt_init(wisdom.
N_MAX+1, wisdom.
T_MAX,
475 fpt_flags | FPT_AL_SYMMETRY);
477 for (n = 0; n <= wisdom.
N_MAX; n++)
479 alpha_al_row(alpha,wisdom.
N_MAX,n);
480 beta_al_row(beta,wisdom.
N_MAX,n);
481 gamma_al_row(gamma,wisdom.
N_MAX,n);
484 fpt_precompute(wisdom.set_threads[threadid],n,alpha,beta,gamma,n,
498 fpt_flags | FPT_AL_SYMMETRY);
499 for (n = 0; n <= wisdom.
N_MAX; n++)
528 void nfsft_forget(
void)
538 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
553 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
560 for (k = 0; k < wisdom.nthreads; k++)
561 fpt_finalize(wisdom.set_threads[k]);
565 fpt_finalize(wisdom.
set);
584 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
590 if (plan->
flags & NFSFT_MALLOC_F_HAT)
597 if (plan->
flags & NFSFT_MALLOC_F)
604 if (plan->
flags & NFSFT_MALLOC_X)
626 double _Complex temp;
638 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
644 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
647 sizeof(
double _Complex));
657 if (plan->
flags & NFSFT_NORMALIZED)
661 #pragma omp parallel for default(shared) private(k,n)
663 for (k = 0; k <= plan->
N; k++)
665 for (n = -k; n <= k; n++)
669 sqrt((2*k+1)/(4.0*KPI));
680 for (m = 0; m < plan->
M_total; m++)
695 #pragma omp parallel for default(shared) private(m,stheta,sphi,f_m,n,a,n_abs,alpha,gamma,it2,it1,k,temp)
697 for (m = 0; m < plan->
M_total; m++)
700 stheta = cos(2.0*KPI*plan->
x[2*m+1]);
702 sphi = 2.0*KPI*plan->
x[2*m];
711 for (n = -plan->
N; n <= plan->N; n++)
720 alpha = &(wisdom.
alpha[ROW(n_abs)]);
721 gamma = &(wisdom.
gamma[ROW(n_abs)]);
726 for (k = plan->
N; k > n_abs + 1; k--)
728 temp = a[k-2] + it2 * gamma[k];
729 it2 = it1 + it2 * alpha[k] * stheta;
736 it2 = it1 + it2 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
745 f_m += it2 * wisdom.
gamma[ROW(n_abs)] *
746 pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
769 double _Complex temp;
779 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
785 memset(plan->
f_hat,0U,plan->
N_total*
sizeof(
double _Complex));
793 for (m = 0; m < plan->
M_total; m++)
795 plan->
f_hat[NFSFT_INDEX(0,0,plan)] += plan->
f[m];
804 #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,stheta,sphi,it2,it1,k,temp)
805 for (n = -plan->
N; n <= plan->N; n++)
811 alpha = &(wisdom.
alpha[ROW(n_abs)]);
812 gamma = &(wisdom.
gamma[ROW(n_abs)]);
815 for (m = 0; m < plan->
M_total; m++)
818 stheta = cos(2.0*KPI*plan->
x[2*m+1]);
820 sphi = 2.0*KPI*plan->
x[2*m];
825 it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
826 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
827 plan->
f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
832 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
833 plan->
f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
837 for (k = n_abs+2; k <= plan->
N; k++)
840 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
842 plan->
f_hat[NFSFT_INDEX(k,n,plan)] += it2;
848 for (m = 0; m < plan->
M_total; m++)
851 stheta = cos(2.0*KPI*plan->
x[2*m+1]);
853 sphi = 2.0*KPI*plan->
x[2*m];
856 for (n = -plan->
N; n <= plan->N; n++)
862 alpha = &(wisdom.
alpha[ROW(n_abs)]);
863 gamma = &(wisdom.
gamma[ROW(n_abs)]);
868 it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
869 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
870 plan->
f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
875 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
876 plan->
f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
880 for (k = n_abs+2; k <= plan->
N; k++)
883 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
885 plan->
f_hat[NFSFT_INDEX(k,n,plan)] += it2;
895 if (plan->
flags & NFSFT_NORMALIZED)
899 #pragma omp parallel for default(shared) private(k,n)
901 for (k = 0; k <= plan->
N; k++)
903 for (n = -k; n <= k; n++)
906 plan->
f_hat[NFSFT_INDEX(k,n,plan)] *=
907 sqrt((2*k+1)/(4.0*KPI));
913 if (plan->
flags & NFSFT_ZERO_F_HAT)
915 for (n = -plan->
N; n <= plan->N+1; n++)
917 memset(&plan->
f_hat[NFSFT_INDEX(-plan->
N-1,n,plan)],0U,
918 (plan->
N+1+abs(n))*
sizeof(
double _Complex));
931 double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
945 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
962 nfsft_trafo_direct(plan);
966 else if (plan->
N <= wisdom.
N_MAX)
969 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
972 sizeof(
double _Complex));
989 if (plan->
flags & NFSFT_NORMALIZED)
993 #pragma omp parallel for default(shared) private(k,n)
995 for (k = 0; k <= plan->
N; k++)
997 for (n = -k; n <= k; n++)
1001 sqrt((2*k+1)/(4.0*KPI));
1010 if (plan->
flags & NFSFT_USE_DPT)
1013 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1014 for (n = -plan->
N; n <= plan->N; n++)
1015 fpt_trafo_direct(wisdom.set_threads[omp_get_thread_num()],abs(n),
1021 for (n = -plan->
N; n <= plan->N; n++)
1025 fpt_trafo_direct(wisdom.
set,abs(n),
1035 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1036 for (n = -plan->
N; n <= plan->N; n++)
1037 fpt_trafo(wisdom.set_threads[omp_get_thread_num()],abs(n),
1043 for (n = -plan->
N; n <= plan->N; n++)
1047 fpt_trafo(wisdom.
set,abs(n),
1075 if (plan->
flags & NFSFT_USE_NDFT)
1107 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
1124 nfsft_adjoint_direct(plan);
1127 else if (plan->
N <= wisdom.
N_MAX)
1144 if (plan->
flags & NFSFT_USE_NDFT)
1180 if (plan->
flags & NFSFT_USE_DPT)
1183 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1184 for (n = -plan->
N; n <= plan->N; n++)
1185 fpt_transposed_direct(wisdom.set_threads[omp_get_thread_num()],abs(n),
1186 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1187 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1191 for (n = -plan->
N; n <= plan->N; n++)
1195 fpt_transposed_direct(wisdom.
set,abs(n),
1196 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1197 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1205 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1206 for (n = -plan->
N; n <= plan->N; n++)
1207 fpt_transposed(wisdom.set_threads[omp_get_thread_num()],abs(n),
1208 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1209 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1214 for (n = -plan->
N; n <= plan->N; n++)
1218 fpt_transposed(wisdom.
set,abs(n),
1219 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1220 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1233 if (plan->
flags & NFSFT_NORMALIZED)
1239 #pragma omp parallel for default(shared) private(k,n)
1241 for (k = 0; k <= plan->
N; k++)
1243 for (n = -k; n <= k; n++)
1246 plan->
f_hat[NFSFT_INDEX(k,n,plan)] *=
1247 sqrt((2*k+1)/(4.0*KPI));
1253 if (plan->
flags & NFSFT_ZERO_F_HAT)
1257 for (n = -plan->
N; n <= plan->N+1; n++)
1259 memset(&plan->
f_hat[NFSFT_INDEX(-plan->
N-1,n,plan)],0U,
1260 (plan->
N+1+abs(n))*
sizeof(
double _Complex));
1275 nfft_precompute_one_psi(&plan->
plan_nfft);
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials to coefficients m...
double * gamma
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
double * x
the nodes for ,
void(* mv_trafo)(void *)
Transform.
fftw_complex * f_hat
Fourier coefficients.
fftw_complex * f_hat
Fourier coefficients.
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
fpt_set set
Structure for discrete polynomial transform (DPT)
#define NFSFT_BREAK_EVEN
The break-even bandwidth .
int N_MAX
Stores precomputation flags.
bool initialized
Indicates wether the structure has been initialized.
void alpha_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
unsigned int flags
the planner flags
Holds data for a set of cascade summations.
#define NFSFT_DEFAULT_NFFT_CUTOFF
The default NFFT cutoff parameter.
nfft_plan plan_nfft
the internal NFFT plan
NFFT_INT M_total
Total number of samples.
double MEASURE_TIME_t[3]
Measured time for each step if MEASURE_TIME is set.
#define X(name)
Include header for C99 complex datatype.
void * nfft_malloc(size_t n)
NFFT_INT N_total
Total number of Fourier coefficients.
double * alpha
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
void(* mv_adjoint)(void *)
Adjoint transform.
void beta_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
double * beta
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
static void c2e_transposed(nfsft_plan *plan)
Transposed version of the function c2e.
void gamma_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
double * x
Nodes in time/spatial domain, size is doubles.
static struct nfsft_wisdom wisdom
The global wisdom structure for precomputed data.
unsigned flags
Flags for precomputation, (de)allocation, and FFTW usage, default setting is PRE_PHI_HUT | PRE_PSI | ...
fftw_complex * f_hat_intern
Internally used pointer to spherical Fourier coefficients.
int T_MAX
The logarithm /f$t = N_{{max}}/f$ of the maximum bandwidth.