53 #include "../fpt/fpt.h" 65 #define NFSFT_DEFAULT_NFFT_CUTOFF 6 72 #define NFSFT_DEFAULT_THRESHOLD 1000 79 #define NFSFT_BREAK_EVEN 5 119 double _Complex last;
129 memset(plan->
f_hat_intern,0U,(2*plan->
N+2)*
sizeof(
double _Complex));
132 lowe = -plan->
N + (plan->
N%2);
136 for (n = lowe; n <= upe; n += 2)
142 for(k = 1; k <= plan->
N; k++)
153 low = -plan->
N + (1-plan->
N%2);
158 for (n = low; n <= up; n += 2)
170 *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
172 for (k = plan->
N-1; k > 0; k--)
175 *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
197 double _Complex last;
207 lowe = -plan->
N + (plan->
N%2);
211 for (n = lowe; n <= upe; n += 2)
217 for(k = 1; k <= plan->
N; k++)
225 low = -plan->
N + (1-plan->
N%2);
229 for (n = low; n <= up; n += 2)
235 for(k = 1; k <= plan->
N; k++)
247 for (k = 2; k < plan->
N; k++)
250 *xp = -0.25 * _Complex_I * (xp[1] - last);
254 *xp = 0.25 * _Complex_I * last;
275 void nfsft_init_guru(
nfsft_plan *plan,
int N,
int M,
unsigned int flags,
276 unsigned int nfft_flags,
int nfft_cutoff)
290 plan->
M_total = (2*plan->
N+2)*(plan->
N+1);
298 plan->
N_total = (2*plan->
N+2)*(2*plan->
N+2);
305 sizeof(
double _Complex));
312 sizeof(
double _Complex));
327 for (
int i=0; i<2*plan->
N+2; i++)
328 for (
int j=0; j<plan->
N+1; j++)
330 plan->
x[2*(i*(plan->
N+1) + j)] = ((double)i-plan->
N-1)/(2.0*plan->
N+2);
331 plan->
x[2*(i*(plan->
N+1) + j) + 1] = ((double)j)/(2.0*plan->
N+2);
345 nfft_size[0] = 2*plan->
N+2;
346 nfft_size[1] = 2*plan->
N+2;
347 fftw_size[0] = 4*plan->
N;
348 fftw_size[1] = 4*plan->
N;
352 nfft_cutoff, nfft_flags,
353 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
377 unsigned int fpt_flags)
388 #pragma omp parallel default(shared) 390 int nthreads = omp_get_num_threads();
393 wisdom.nthreads = nthreads;
399 wisdom.flags = nfsft_flags;
403 X(next_power_of_2_exp_int)(N,&wisdom.
N_MAX,&wisdom.
T_MAX);
438 if (wisdom.
alpha != NULL)
441 #pragma omp parallel default(shared) private(n) 443 int nthreads = omp_get_num_threads();
444 int threadid = omp_get_thread_num();
447 wisdom.nthreads = nthreads;
461 wisdom.set_threads[threadid]->dpt = wisdom.set_threads[0]->dpt;
466 for (
int n = 0; n <= wisdom.
N_MAX; n++)
467 fpt_precompute_1(wisdom.set_threads[0],n,n);
471 #pragma omp for private(n) schedule(dynamic) 472 for (n = 0; n <= wisdom.
N_MAX; n++)
473 fpt_precompute_2(wisdom.set_threads[threadid],n,&wisdom.
alpha[ROW(n)],
474 &wisdom.
beta[ROW(n)], &wisdom.
gamma[ROW(n)],n,kappa);
481 for (n = 0; n <= wisdom.
N_MAX; n++)
487 &wisdom.
gamma[ROW(n)],n,kappa);
494 #pragma omp parallel default(shared) private(n) 497 int nthreads = omp_get_num_threads();
498 int threadid = omp_get_thread_num();
501 wisdom.nthreads = nthreads;
519 wisdom.set_threads[threadid]->dpt = wisdom.set_threads[0]->dpt;
524 for (
int n = 0; n <= wisdom.
N_MAX; n++)
525 fpt_precompute_1(wisdom.set_threads[0],n,n);
529 #pragma omp for private(n) schedule(dynamic) 530 for (n = 0; n <= wisdom.
N_MAX; n++)
532 alpha_al_row(alpha,wisdom.
N_MAX,n);
533 beta_al_row(beta,wisdom.
N_MAX,n);
534 gamma_al_row(gamma,wisdom.
N_MAX,n);
537 fpt_precompute_2(wisdom.set_threads[threadid],n,alpha,beta,gamma,n,
553 for (n = 0; n <= wisdom.
N_MAX; n++)
559 alpha_al_row(alpha,wisdom.
N_MAX,n);
560 beta_al_row(beta,wisdom.
N_MAX,n);
561 gamma_al_row(gamma,wisdom.
N_MAX,n);
611 for (k = 0; k < wisdom.nthreads; k++)
612 fpt_finalize(wisdom.set_threads[k]);
616 fpt_finalize(wisdom.
set);
668 double nan_value = nan(
"");
669 for (m = 0; m < plan->
M_total; m++)
670 plan->
f[m] = nan_value;
697 nfsft_set_f_nan(plan);
705 sizeof(
double _Complex));
719 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic) 721 for (k = 0; k <= plan->
N; k++)
723 for (n = -k; n <= k; n++)
727 sqrt((2*k+1)/(4.0*KPI));
738 for (m = 0; m < plan->
M_total; m++)
753 #pragma omp parallel for default(shared) private(m,stheta,sphi,n,a,n_abs,alpha,gamma,k) 755 for (m = 0; m < plan->
M_total; m++)
758 stheta = cos(2.0*KPI*plan->
x[2*m+1]);
760 sphi = 2.0*KPI*plan->
x[2*m];
769 for (n = -plan->
N; n <= plan->N; n++)
778 alpha = &(wisdom.
alpha[ROW(n_abs)]);
779 gamma = &(wisdom.
gamma[ROW(n_abs)]);
784 long double _Complex it2 = a[plan->
N];
785 long double _Complex it1 = a[plan->
N-1];
786 for (k = plan->
N; k > n_abs + 1; k--)
788 long double _Complex temp = a[k-2] + it2 * gamma[k];
789 it2 = it1 + it2 * alpha[k] * stheta;
796 it2 = it1 + it2 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
805 long double _Complex result = it2 * wisdom.
gamma[ROW(n_abs)] *
806 powl(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
808 plan->
f[m] += result;
813 double _Complex it2 = a[plan->
N];
814 double _Complex it1 = a[plan->
N-1];
815 for (k = plan->
N; k > n_abs + 1; k--)
817 double _Complex temp = a[k-2] + it2 * gamma[k];
818 it2 = it1 + it2 * alpha[k] * stheta;
825 it2 = it1 + it2 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
834 plan->
f[m] += it2 * wisdom.
gamma[ROW(n_abs)] *
835 pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
845 static void nfsft_set_f_hat_nan(
nfsft_plan *plan)
848 double nan_value = nan(
"");
849 for (k = 0; k <= plan->
N; k++)
850 for (n = -k; n <= k; n++)
875 nfsft_set_f_hat_nan(plan);
880 memset(plan->
f_hat,0U,plan->
N_total*
sizeof(
double _Complex));
888 for (m = 0; m < plan->
M_total; m++)
899 #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,k) schedule(dynamic) 900 for (n = -plan->
N; n <= plan->N; n++)
906 alpha = &(wisdom.
alpha[ROW(n_abs)]);
907 gamma = &(wisdom.
gamma[ROW(n_abs)]);
910 for (m = 0; m < plan->
M_total; m++)
913 double stheta = cos(2.0*KPI*plan->
x[2*m+1]);
915 double sphi = 2.0*KPI*plan->
x[2*m];
921 long double _Complex it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
922 powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
924 long double _Complex it2 = 0.0;
928 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
933 for (k = n_abs+2; k <= plan->
N; k++)
935 long double _Complex temp = it2;
936 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
944 double _Complex it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
945 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
947 double _Complex it2 = 0.0;
951 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
956 for (k = n_abs+2; k <= plan->
N; k++)
958 double _Complex temp = it2;
959 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
968 for (m = 0; m < plan->
M_total; m++)
971 double stheta = cos(2.0*KPI*plan->
x[2*m+1]);
973 double sphi = 2.0*KPI*plan->
x[2*m];
976 for (n = -plan->
N; n <= plan->N; n++)
982 alpha = &(wisdom.
alpha[ROW(n_abs)]);
983 gamma = &(wisdom.
gamma[ROW(n_abs)]);
989 long double _Complex it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
990 powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
992 long double _Complex it2 = 0.0;
996 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
1001 for (k = n_abs+2; k <= plan->
N; k++)
1003 long double _Complex temp = it2;
1004 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
1012 double _Complex it1 = plan->
f[m] * wisdom.
gamma[ROW(n_abs)] *
1013 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
1015 double _Complex it2 = 0.0;
1017 if (n_abs < plan->N)
1019 it2 = it1 * wisdom.
alpha[ROWK(n_abs)+1] * stheta;
1024 for (k = n_abs+2; k <= plan->
N; k++)
1026 double _Complex temp = it2;
1027 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
1044 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic) 1046 for (k = 0; k <= plan->
N; k++)
1048 for (n = -k; n <= k; n++)
1052 sqrt((2*k+1)/(4.0*KPI));
1060 for (n = -plan->
N; n <= plan->N+1; n++)
1063 (plan->
N+1+abs(n))*
sizeof(
double _Complex));
1076 double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
1092 nfsft_set_f_nan(plan);
1101 nfsft_set_f_nan(plan);
1109 nfsft_trafo_direct(plan);
1113 else if (plan->
N <= wisdom.
N_MAX)
1119 sizeof(
double _Complex));
1143 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic) 1145 for (k = 0; k <= plan->
N; k++)
1147 for (n = -k; n <= k; n++)
1151 sqrt((2*k+1)/(4.0*KPI));
1164 fpt_trafo_direct(wisdom.set_threads[0],abs(n),
1170 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic) 1171 for (n_abs = 1; n_abs <= plan->
N; n_abs++)
1173 int threadid = omp_get_thread_num();
1175 fpt_trafo_direct(wisdom.set_threads[threadid],abs(n),
1180 fpt_trafo_direct(wisdom.set_threads[threadid],abs(n),
1187 for (n = -plan->
N; n <= plan->N; n++)
1191 fpt_trafo_direct(wisdom.
set,abs(n),
1202 fpt_trafo(wisdom.set_threads[0],abs(n),
1208 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic) 1209 for (n_abs = 1; n_abs <= plan->
N; n_abs++)
1211 int threadid = omp_get_thread_num();
1213 fpt_trafo(wisdom.set_threads[threadid],abs(n),
1218 fpt_trafo(wisdom.set_threads[threadid],abs(n),
1225 for (n = -plan->
N; n <= plan->N; n++)
1229 fpt_trafo(wisdom.
set,abs(n),
1260 INT nthreads = Y(get_num_threads)();
1266 fftw_plan plan_fftw;
1268 for (
int j=0; j<N[0]; j++)
1269 for (
int k=0; k<N[1]; k++)
1275 #pragma omp critical (nfft_omp_critical_fftw_plan)
1277 FFTW(plan_with_nthreads)(nthreads);
1283 fftw_execute(plan_fftw);
1284 for (
int j=0; j<N[0]; j++)
1287 for (
int k=N[1]/2; k<N[1]; k++)
1288 plan->
f[j*N[1]/2+(k-N[1]/2)] = plan->
f_hat_intern[j*N[1]+k] * ((j+k)%2 ? -1 : 1);
1291 #pragma omp critical (nfft_omp_critical_fftw_plan) 1293 fftw_destroy_plan(plan_fftw);
1332 nfsft_set_f_hat_nan(plan);
1341 nfsft_set_f_hat_nan(plan);
1349 nfsft_adjoint_direct(plan);
1352 else if (plan->
N <= wisdom.
N_MAX)
1377 for (
int j=0; j<N[0]; j++)
1379 for (
int k=0; k<N[1]/2+1; k++)
1380 plan->
f_hat[j*N[1]+k] = 0;
1381 for (
int k=N[1]/2; k<N[1]; k++)
1382 plan->
f_hat[j*N[1]+k] = plan->
f[j*N[1]/2+k-N[1]/2] * ((j+k)%2 ? -1 : 1);
1384 fftw_plan plan_fftw = FFTW(plan_dft)(2, N, plan->
f_hat, plan->
f_hat, FFTW_BACKWARD, FFTW_ESTIMATE);
1385 fftw_execute(plan_fftw);
1386 for (
int j=0; j<N[0]; j++)
1387 for (
int k=0; k<N[1]; k++)
1389 plan->
f_hat[j*N[1]+k] *= -1;
1390 fftw_destroy_plan(plan_fftw);
1435 fpt_transposed_direct(wisdom.set_threads[0],abs(n),
1441 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic) 1442 for (n_abs = 1; n_abs <= plan->
N; n_abs++)
1444 int threadid = omp_get_thread_num();
1446 fpt_transposed_direct(wisdom.set_threads[threadid],abs(n),
1451 fpt_transposed_direct(wisdom.set_threads[threadid],abs(n),
1458 for (n = -plan->
N; n <= plan->N; n++)
1462 fpt_transposed_direct(wisdom.
set,abs(n),
1479 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic) 1480 for (n_abs = 1; n_abs <= plan->
N; n_abs++)
1482 int threadid = omp_get_thread_num();
1497 for (n = -plan->
N; n <= plan->N; n++)
1522 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic) 1524 for (k = 0; k <= plan->
N; k++)
1526 for (n = -k; n <= k; n++)
1530 sqrt((2*k+1)/(4.0*KPI));
1540 for (n = -plan->
N; n <= plan->N+1; n++)
1543 (plan->
N+1+abs(n))*
sizeof(
double _Complex));
void nfsft_init_advanced(nfsft_plan *plan, int N, int M, unsigned int flags)
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials to coefficients m...
double * gamma
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
#define NFSFT_MALLOC_F_HAT
double * x
the nodes for ,
#define FPT_PERSISTENT_DATA
If set, TODO complete comment.
fftw_complex * f_hat
Fourier coefficients.
void nfsft_trafo(nfsft_plan *plan)
#define NFSFT_PRESERVE_F_HAT
void nfsft_adjoint(nfsft_plan *plan)
fftw_complex * f_hat
Fourier coefficients.
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
void fpt_transposed(fpt_set set, const int m, double _Complex *x, double _Complex *y, const int k_end, const unsigned int flags)
void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags, unsigned int fpt_flags)
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
fpt_set set
Structure for discrete polynomial transform (DPT)
#define NFSFT_BREAK_EVEN
The break-even bandwidth .
#define NFSFT_NO_FAST_ALGORITHM
int N_MAX
Stores precomputation flags.
bool initialized
Indicates wether the structure has been initialized.
void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta, double *gam, int k_start, const double threshold)
void(* mv_adjoint)(void *)
Adjoint transform.
void alpha_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
unsigned int flags
the planner flags
void nfsft_init(nfsft_plan *plan, int N, int M)
Holds data for a set of cascade summations.
#define NFSFT_DEFAULT_NFFT_CUTOFF
The default NFFT cutoff parameter.
nfft_plan plan_nfft
the internal NFFT plan
NFFT_INT M_total
Total number of samples.
double MEASURE_TIME_t[3]
Measured time for each step if MEASURE_TIME is set.
#define X(name)
Include header for C99 complex datatype.
#define NFSFT_NO_DIRECT_ALGORITHM
void * nfft_malloc(size_t n)
NFFT_INT N_total
Total number of Fourier coefficients.
double * alpha
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
void nfft_finalize(nfft_plan *ths)
void nfft_precompute_one_psi(nfft_plan *ths)
fpt_set fpt_init(const int M, const int t, const unsigned int flags)
void nfsft_finalize(nfsft_plan *plan)
void beta_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
double * beta
Precomputed recursion coefficients /f$^n/f$ for /f$k = 0,/ldots, N_{{max}}; n=-k,/ldots,k/f$ of associated Legendre-functions /f$P_k^n/f$.
static void c2e_transposed(nfsft_plan *plan)
Transposed version of the function c2e.
void gamma_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Header file for the nfft3 library.
double * x
Nodes in time/spatial domain, size is doubles.
#define FPT_AL_SYMMETRY
If set, TODO complete comment.
static struct nfsft_wisdom wisdom
The global wisdom structure for precomputed data.
unsigned flags
Flags for precomputation, (de)allocation, and FFTW usage, default setting is PRE_PHI_HUT | PRE_PSI | ...
fftw_complex * f_hat_intern
Internally used pointer to spherical Fourier coefficients.
int T_MAX
The logarithm /f$t = N_{{max}}/f$ of the maximum bandwidth.
#define NFSFT_INDEX(k, n, plan)
void(* mv_trafo)(void *)
Transform.