62 #define NFSFT_DEFAULT_NFFT_CUTOFF 6
69 #define NFSFT_DEFAULT_THRESHOLD 1000
76 #define NFSFT_BREAK_EVEN 5
112 double _Complex last;
122 memset(plan->
f_hat_intern,0U,(2*plan->
N+2)*
sizeof(
double _Complex));
125 lowe = -plan->
N + (plan->
N%2);
129 for (n = lowe; n <= upe; n += 2)
135 for(k = 1; k <= plan->
N; k++)
146 low = -plan->
N + (1-plan->
N%2);
151 for (n = low; n <= up; n += 2)
163 *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
165 for (k = plan->
N-1; k > 0; k--)
168 *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
190 double _Complex last;
200 lowe = -plan->
N + (plan->
N%2);
204 for (n = lowe; n <= upe; n += 2)
208 xm = &(plan->
f_hat[NFSFT_INDEX(-1,n,plan)]);
209 xp = &(plan->
f_hat[NFSFT_INDEX(+1,n,plan)]);
210 for(k = 1; k <= plan->
N; k++)
218 low = -plan->
N + (1-plan->
N%2);
222 for (n = low; n <= up; n += 2)
226 xm = &(plan->
f_hat[NFSFT_INDEX(-1,n,plan)]);
227 xp = &(plan->
f_hat[NFSFT_INDEX(+1,n,plan)]);
228 for(k = 1; k <= plan->
N; k++)
233 plan->
f_hat[NFSFT_INDEX(0,n,plan)] =
234 -0.25*_Complex_I*plan->
f_hat[NFSFT_INDEX(1,n,plan)];
235 last = plan->
f_hat[NFSFT_INDEX(1,n,plan)];
236 plan->
f_hat[NFSFT_INDEX(1,n,plan)] =
237 -0.25*_Complex_I*plan->
f_hat[NFSFT_INDEX(2,n,plan)];
239 xp = &(plan->
f_hat[NFSFT_INDEX(2,n,plan)]);
240 for (k = 2; k < plan->
N; k++)
243 *xp = -0.25 * _Complex_I * (xp[1] - last);
247 *xp = 0.25 * _Complex_I * last;
249 plan->
f_hat[NFSFT_INDEX(0,n,plan)] *= 2.0;
253 void nfsft_init(
nfsft_plan *plan,
int N,
int M)
256 nfsft_init_advanced(plan, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
260 void nfsft_init_advanced(
nfsft_plan* plan,
int N,
int M,
264 nfsft_init_guru(plan, N, M, flags, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | NFFT_OMP_BLOCKWISE_ADJOINT |
268 void nfsft_init_guru(
nfsft_plan *plan,
int N,
int M,
unsigned int flags,
269 unsigned int nfft_flags,
int nfft_cutoff)
287 plan->
N_total = (2*plan->
N+2)*(2*plan->
N+2);
291 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
294 sizeof(
double _Complex));
298 if (plan->
flags & NFSFT_MALLOC_F_HAT)
300 plan->
f_hat = (
double _Complex*) nfft_malloc(plan->
N_total*
301 sizeof(
double _Complex));
305 if (plan->
flags & NFSFT_MALLOC_F)
307 plan->
f = (
double _Complex*) nfft_malloc(plan->
M_total*
sizeof(
double _Complex));
311 if (plan->
flags & NFSFT_MALLOC_X)
313 plan->
x = (
double*) nfft_malloc(plan->
M_total*2*
sizeof(
double));
317 if (plan->
flags & NFSFT_NO_FAST_ALGORITHM)
322 nfft_size = (
int*)nfft_malloc(2*
sizeof(
int));
323 fftw_size = (
int*)nfft_malloc(2*
sizeof(
int));
326 nfft_size[0] = 2*plan->
N+2;
327 nfft_size[1] = 2*plan->
N+2;
328 fftw_size[0] = 4*plan->
N;
329 fftw_size[1] = 4*plan->
N;
333 nfft_cutoff, nfft_flags,
334 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
349 nfft_free(nfft_size);
350 nfft_free(fftw_size);
353 plan->
mv_trafo = (void (*) (
void* ))nfsft_trafo;
354 plan->
mv_adjoint = (void (*) (
void* ))nfsft_adjoint;
357 void nfsft_precompute(
int N,
double kappa,
unsigned int nfsft_flags,
358 unsigned int fpt_flags)
369 #pragma omp parallel default(shared)
371 int nthreads = omp_get_num_threads();
372 int threadid = omp_get_thread_num();
375 wisdom.nthreads = nthreads;
381 wisdom.flags = nfsft_flags;
385 X(next_power_of_2_exp_int)(N,&wisdom.
N_MAX,&wisdom.
T_MAX);
388 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
397 wisdom.
alpha = (
double*) nfft_malloc((wisdom.
N_MAX+1)*(wisdom.
N_MAX+2)*
399 wisdom.
beta = (
double*) nfft_malloc((wisdom.
N_MAX+1)*(wisdom.
N_MAX+2)*
401 wisdom.
gamma = (
double*) nfft_malloc((wisdom.
N_MAX+1)*(wisdom.
N_MAX+2)*
412 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
420 if (wisdom.
alpha != NULL)
423 #pragma omp parallel default(shared) private(n)
425 int nthreads = omp_get_num_threads();
426 int threadid = omp_get_thread_num();
429 wisdom.nthreads = nthreads;
430 wisdom.set_threads = (
fpt_set*) nfft_malloc(nthreads*
sizeof(
fpt_set));
433 wisdom.set_threads[threadid] = fpt_init(wisdom.
N_MAX+1, wisdom.
T_MAX,
434 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
435 for (n = 0; n <= wisdom.
N_MAX; n++)
436 fpt_precompute(wisdom.set_threads[threadid],n,&wisdom.
alpha[ROW(n)],
437 &wisdom.
beta[ROW(n)], &wisdom.
gamma[ROW(n)],n,kappa);
444 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
445 for (n = 0; n <= wisdom.
N_MAX; n++)
450 fpt_precompute(wisdom.
set,n,&wisdom.
alpha[ROW(n)],&wisdom.
beta[ROW(n)],
451 &wisdom.
gamma[ROW(n)],n,kappa);
458 #pragma omp parallel default(shared) private(n)
461 int nthreads = omp_get_num_threads();
462 int threadid = omp_get_thread_num();
465 wisdom.nthreads = nthreads;
466 wisdom.set_threads = (
fpt_set*) nfft_malloc(nthreads*
sizeof(
fpt_set));
469 alpha = (
double*) nfft_malloc((wisdom.
N_MAX+2)*
sizeof(double));
470 beta = (
double*) nfft_malloc((wisdom.
N_MAX+2)*
sizeof(double));
471 gamma = (
double*) nfft_malloc((wisdom.
N_MAX+2)*
sizeof(double));
472 wisdom.set_threads[threadid] = fpt_init(wisdom.
N_MAX+1, wisdom.
T_MAX,
473 fpt_flags | FPT_AL_SYMMETRY);
475 for (n = 0; n <= wisdom.
N_MAX; n++)
477 alpha_al_row(alpha,wisdom.
N_MAX,n);
478 beta_al_row(beta,wisdom.
N_MAX,n);
479 gamma_al_row(gamma,wisdom.
N_MAX,n);
482 fpt_precompute(wisdom.set_threads[threadid],n,alpha,beta,gamma,n,
492 wisdom.
alpha = (
double*) nfft_malloc((wisdom.
N_MAX+2)*
sizeof(double));
493 wisdom.
beta = (
double*) nfft_malloc((wisdom.
N_MAX+2)*
sizeof(double));
494 wisdom.
gamma = (
double*) nfft_malloc((wisdom.
N_MAX+2)*
sizeof(double));
496 fpt_flags | FPT_AL_SYMMETRY);
497 for (n = 0; n <= wisdom.
N_MAX; n++)
512 nfft_free(wisdom.
alpha);
513 nfft_free(wisdom.
beta);
514 nfft_free(wisdom.
gamma);
526 void nfsft_forget(
void)
536 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
542 nfft_free(wisdom.
alpha);
543 nfft_free(wisdom.
beta);
544 nfft_free(wisdom.
gamma);
551 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
558 for (k = 0; k < wisdom.nthreads; k++)
559 fpt_finalize(wisdom.set_threads[k]);
560 nfft_free(wisdom.set_threads);
563 fpt_finalize(wisdom.
set);
582 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
588 if (plan->
flags & NFSFT_MALLOC_F_HAT)
591 nfft_free(plan->
f_hat);
595 if (plan->
flags & NFSFT_MALLOC_F)
602 if (plan->
flags & NFSFT_MALLOC_X)
624 double _Complex temp;
636 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
642 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
645 sizeof(
double _Complex));
655 if (plan->
flags & NFSFT_NORMALIZED)
659 #pragma omp parallel for default(shared) private(k,n)
661 for (k = 0; k <= plan->
N; k++)
663 for (n = -k; n <= k; n++)
667 sqrt((2*k+1)/(4.0*KPI));
678 for (m = 0; m < plan->
M_total; m++)
693 #pragma omp parallel for default(shared) private(m,stheta,sphi,f_m,n,a,n_abs,alpha,gamma,it2,it1,k,temp)
695 for (m = 0; m < plan->
M_total; m++)
698 stheta = cos(2.0*KPI*plan->
x[2*m+1]);
700 sphi = 2.0*KPI*plan->
x[2*m];
709 for (n = -plan->
N; n <= plan->N; n++)
718 alpha = &(wisdom.
alpha[ROW(n_abs)]);
719 gamma = &(wisdom.
gamma[ROW(n_abs)]);
724 for (k = plan->
N; k > n_abs + 1; k--)
726 temp = a[k-2] + it2 * gamma[k];
727 it2 = it1 + it2 * alpha[k] * stheta;
734 it2 = it1 + it2 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
743 f_m += it2 * wisdom.
gamma[ROW(n_abs)] *
744 pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
767 double _Complex temp;
777 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
783 memset(plan->
f_hat,0U,plan->
N_total*
sizeof(
double _Complex));
791 for (m = 0; m < plan->
M_total; m++)
793 plan->
f_hat[NFSFT_INDEX(0,0,plan)] += plan->
f[m];
802 #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,stheta,sphi,it2,it1,k,temp)
803 for (n = -plan->
N; n <= plan->N; n++)
809 alpha = &(wisdom.
alpha[ROW(n_abs)]);
810 gamma = &(wisdom.
gamma[ROW(n_abs)]);
813 for (m = 0; m < plan->
M_total; m++)
816 stheta = cos(2.0*KPI*plan->
x[2*m+1]);
818 sphi = 2.0*KPI*plan->
x[2*m];
823 it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
824 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
825 plan->
f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
830 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
831 plan->
f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
835 for (k = n_abs+2; k <= plan->
N; k++)
838 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
840 plan->
f_hat[NFSFT_INDEX(k,n,plan)] += it2;
846 for (m = 0; m < plan->
M_total; m++)
849 stheta = cos(2.0*KPI*plan->
x[2*m+1]);
851 sphi = 2.0*KPI*plan->
x[2*m];
854 for (n = -plan->
N; n <= plan->N; n++)
860 alpha = &(wisdom.
alpha[ROW(n_abs)]);
861 gamma = &(wisdom.
gamma[ROW(n_abs)]);
866 it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
867 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
868 plan->
f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
873 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
874 plan->
f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
878 for (k = n_abs+2; k <= plan->
N; k++)
881 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
883 plan->
f_hat[NFSFT_INDEX(k,n,plan)] += it2;
893 if (plan->
flags & NFSFT_NORMALIZED)
897 #pragma omp parallel for default(shared) private(k,n)
899 for (k = 0; k <= plan->
N; k++)
901 for (n = -k; n <= k; n++)
904 plan->
f_hat[NFSFT_INDEX(k,n,plan)] *=
905 sqrt((2*k+1)/(4.0*KPI));
911 if (plan->
flags & NFSFT_ZERO_F_HAT)
913 for (n = -plan->
N; n <= plan->N+1; n++)
915 memset(&plan->
f_hat[NFSFT_INDEX(-plan->
N-1,n,plan)],0U,
916 (plan->
N+1+abs(n))*
sizeof(
double _Complex));
929 double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
943 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
960 nfsft_trafo_direct(plan);
964 else if (plan->
N <= wisdom.
N_MAX)
967 if (plan->
flags & NFSFT_PRESERVE_F_HAT)
970 sizeof(
double _Complex));
987 if (plan->
flags & NFSFT_NORMALIZED)
991 #pragma omp parallel for default(shared) private(k,n)
993 for (k = 0; k <= plan->
N; k++)
995 for (n = -k; n <= k; n++)
999 sqrt((2*k+1)/(4.0*KPI));
1008 if (plan->
flags & NFSFT_USE_DPT)
1011 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1012 for (n = -plan->
N; n <= plan->N; n++)
1013 fpt_trafo_direct(wisdom.set_threads[omp_get_thread_num()],abs(n),
1019 for (n = -plan->
N; n <= plan->N; n++)
1023 fpt_trafo_direct(wisdom.
set,abs(n),
1033 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1034 for (n = -plan->
N; n <= plan->N; n++)
1035 fpt_trafo(wisdom.set_threads[omp_get_thread_num()],abs(n),
1041 for (n = -plan->
N; n <= plan->N; n++)
1045 fpt_trafo(wisdom.
set,abs(n),
1073 if (plan->
flags & NFSFT_USE_NDFT)
1105 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
1122 nfsft_adjoint_direct(plan);
1125 else if (plan->
N <= wisdom.
N_MAX)
1142 if (plan->
flags & NFSFT_USE_NDFT)
1178 if (plan->
flags & NFSFT_USE_DPT)
1181 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1182 for (n = -plan->
N; n <= plan->N; n++)
1183 fpt_transposed_direct(wisdom.set_threads[omp_get_thread_num()],abs(n),
1184 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1185 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1189 for (n = -plan->
N; n <= plan->N; n++)
1193 fpt_transposed_direct(wisdom.
set,abs(n),
1194 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1195 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1203 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1204 for (n = -plan->
N; n <= plan->N; n++)
1205 fpt_transposed(wisdom.set_threads[omp_get_thread_num()],abs(n),
1206 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1207 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1212 for (n = -plan->
N; n <= plan->N; n++)
1216 fpt_transposed(wisdom.
set,abs(n),
1217 &plan->
f_hat[NFSFT_INDEX(abs(n),n,plan)],
1218 &plan->
f_hat[NFSFT_INDEX(0,n,plan)],
1231 if (plan->
flags & NFSFT_NORMALIZED)
1237 #pragma omp parallel for default(shared) private(k,n)
1239 for (k = 0; k <= plan->
N; k++)
1241 for (n = -k; n <= k; n++)
1244 plan->
f_hat[NFSFT_INDEX(k,n,plan)] *=
1245 sqrt((2*k+1)/(4.0*KPI));
1251 if (plan->
flags & NFSFT_ZERO_F_HAT)
1255 for (n = -plan->
N; n <= plan->N+1; n++)
1257 memset(&plan->
f_hat[NFSFT_INDEX(-plan->
N-1,n,plan)],0U,
1258 (plan->
N+1+abs(n))*
sizeof(
double _Complex));
1273 nfft_precompute_one_psi(&plan->
plan_nfft);
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials to coefficients m...
double * gamma
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
double * x
the nodes for ,
void(* mv_trafo)(void *)
Transform.
fftw_complex * f_hat
Fourier coefficients.
fftw_complex * f_hat
Fourier coefficients.
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
fpt_set set
Structure for discrete polynomial transform (DPT)
#define NFSFT_BREAK_EVEN
The break-even bandwidth .
int N_MAX
Stores precomputation flags.
bool initialized
Indicates wether the structure has been initialized.
void alpha_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
unsigned int flags
the planner flags
Holds data for a set of cascade summations.
#define NFSFT_DEFAULT_NFFT_CUTOFF
The default NFFT cutoff parameter.
nfft_plan plan_nfft
the internal NFFT plan
NFFT_INT M_total
Total number of samples.
double MEASURE_TIME_t[3]
Measured time for each step if MEASURE_TIME is set.
#define X(name)
Include header for C99 complex datatype.
NFFT_INT N_total
Total number of Fourier coefficients.
double * alpha
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
void(* mv_adjoint)(void *)
Adjoint transform.
void beta_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
double * beta
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
static void c2e_transposed(nfsft_plan *plan)
Transposed version of the function c2e.
void gamma_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
double * x
Nodes in time/spatial domain, size is doubles.
static struct nfsft_wisdom wisdom
The global wisdom structure for precomputed data.
unsigned flags
Flags for precomputation, (de)allocation, and FFTW usage, default setting is PRE_PHI_HUT | PRE_PSI | ...
fftw_complex * f_hat_intern
Internally used pointer to spherical Fourier coefficients.
int T_MAX
The logarithm /f$t = N_{{max}}/f$ of the maximum bandwidth.