30 static FILE* file_out_tex = NULL;
32 int get_nthreads_array(
int **arr)
34 int max_threads = NFFT(get_num_threads)();
38 int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
42 *arr = (
int*) NFFT(malloc)((size_t) (max_threads) *
sizeof(int));
43 for (k = 0; k < max_threads; k++)
48 for (k = 1; k <= max_threads; k *= 2, alloc_num++)
51 *arr = (
int*) NFFT(malloc)((size_t)(alloc_num) *
sizeof(int));
53 for (k = 1; k <= max_threads; k *= 2)
55 if (k != max_threads && 2 * k > max_threads && max_threads_pw2)
57 *(*arr + ret_number) = max_threads / 2;
61 *(*arr + ret_number) = k;
64 if (k != max_threads && 2 * k > max_threads)
66 *(*arr + ret_number) = max_threads;
75 void check_result_value(
const int val,
const int ok,
const char *msg)
79 fprintf(stderr,
"ERROR %s: %d not %d\n", msg, val, ok);
85 void run_test_create(
int d,
int L,
int M)
90 "./fastsum_benchomp_createdataset %d %d %d > fastsum_benchomp_test.data",
92 fprintf(stderr,
"%s\n", cmd);
93 check_result_value(system(cmd), 0,
"createdataset");
96 void run_test_init_output()
98 FILE *f = fopen(
"fastsum_benchomp_test.result",
"w");
137 void run_test(
s_resval *res,
int nrepeat,
int n,
int m,
int p,
138 char *kernel_name, R c, R eps_I, R eps_B,
int nthreads)
143 for (t = 0; t < 16; t++)
146 res[t].min = K(1.0) / K(0.0);
152 "./fastsum_benchomp_detail_single %d %d %d %s " __FR__
" " __FR__
" " __FR__
" < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
153 n, m, p, kernel_name, c, eps_I, eps_B);
156 "./fastsum_benchomp_detail_threads %d %d %d %s " __FR__
" " __FR__
" " __FR__
" %d < fastsum_benchomp_test.data > fastsum_benchomp_test.out",
157 n, m, p, kernel_name, c, eps_I, eps_B, nthreads);
158 fprintf(stderr,
"%s\n", cmd);
159 check_result_value(system(cmd), 0, cmd);
161 for (r = 0; r < nrepeat; r++)
166 check_result_value(system(cmd), 0, cmd);
167 f = fopen(
"fastsum_benchomp_test.out",
"r");
169 "" __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
" " __FR__
"", v,
170 v + 1, v + 2, v + 3, v + 4, v + 5, v + 6, v + 7, v + 8, v + 9, v + 10,
171 v + 11, v + 12, v + 13, v + 14, v + 15);
172 check_result_value(retval, 16,
"read fastsum_benchomp_test.out");
175 for (t = 0; t < 16; t++)
178 if (res[t].min > v[t])
180 if (res[t].max < v[t])
185 for (t = 0; t < 16; t++)
186 res[t].avg /= (R)(nrepeat);
188 fprintf(stderr,
"%d %d: ", nthreads, nrepeat);
189 for (t = 0; t < 16; t++)
190 fprintf(stderr,
"%.3" __FES__
" %.3" __FES__
" %.3" __FES__
" | ", res[t].avg, res[t].min, res[t].max);
191 fprintf(stderr,
"\n");
194 const char *get_psi_string(
int flags)
198 else if (flags & PRE_ONE_PSI)
203 const char *get_sort_string(
int flags)
205 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
208 if (flags & NFFT_SORT_NODES)
214 const char *get_adjoint_omp_string(
int flags)
216 if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
222 #define MASK_FSUM_D (1U<<0)
223 #define MASK_FSUM_L (1U<<1)
224 #define MASK_FSUM_M (1U<<2)
225 #define MASK_FSUM_MULTIBW (1U<<3)
226 #define MASK_FSUM_WINM (1U<<4)
227 #define MASK_FSUM_P (1U<<5)
228 #define MASK_FSUM_KERNEL (1U<<6)
229 #define MASK_FSUM_EPSI (1U<<7)
230 #define MASK_FSUM_EPSB (1U<<8)
232 unsigned int fastsum_determine_different_parameters(
s_testset *testsets,
236 unsigned int mask = 0;
241 for (t = 1; t < ntestsets; t++)
243 if (testsets[t - 1].param.d != testsets[t].param.d)
245 if (testsets[t - 1].param.L != testsets[t].param.L)
247 if (testsets[t - 1].param.M != testsets[t].param.M)
249 if (testsets[t - 1].param.n != testsets[t].param.n)
250 mask |= MASK_FSUM_MULTIBW;
251 if (testsets[t - 1].param.m != testsets[t].param.m)
252 mask |= MASK_FSUM_WINM;
253 if (testsets[t - 1].param.p != testsets[t].param.p)
255 if (strcmp(testsets[t - 1].param.kernel_name, testsets[t].param.kernel_name)
257 mask |= MASK_FSUM_KERNEL;
258 if (testsets[t - 1].param.eps_I != testsets[t].param.eps_I)
259 mask |= MASK_FSUM_EPSI;
260 if (testsets[t - 1].param.eps_B != testsets[t].param.eps_B)
261 mask |= MASK_FSUM_EPSB;
267 void strEscapeUnderscore(
char *dst,
char *src,
int maxlen)
273 while (src[i] !=
'\0' && len + offset < maxlen - 1)
276 len = snprintf(dst + offset, maxlen - offset,
"\\_{}");
278 len = snprintf(dst + offset, maxlen - offset,
"%c", src[i]);
284 void fastsum_get_plot_title_minus_indep(
char *outstr,
int maxlen,
285 char *hostname,
s_param param,
unsigned int diff_mask)
287 unsigned int mask = ~diff_mask;
291 len = snprintf(outstr, maxlen,
"%s", hostname);
292 if (len < 0 || len + offset >= maxlen - 1)
296 if (mask & MASK_FSUM_D)
298 len = snprintf(outstr + offset, maxlen - offset,
" %dd fastsum", param.d);
299 if (len < 0 || len + offset >= maxlen - 1)
304 if ((mask & (MASK_FSUM_L | MASK_FSUM_M)) && param.L == param.M)
306 len = snprintf(outstr + offset, maxlen - offset,
" L=M=%d", param.L);
307 if (len < 0 || len + offset >= maxlen - 1)
313 if (mask & MASK_FSUM_L)
315 len = snprintf(outstr + offset, maxlen - offset,
" L=%d", param.L);
316 if (len < 0 || len + offset >= maxlen - 1)
321 if (mask & MASK_FSUM_M)
323 len = snprintf(outstr + offset, maxlen - offset,
" M=%d", param.M);
324 if (len < 0 || len + offset >= maxlen - 1)
330 if (mask & MASK_FSUM_MULTIBW)
332 len = snprintf(outstr + offset, maxlen - offset,
" n=%d", param.n);
333 if (len < 0 || len + offset >= maxlen - 1)
338 if (mask & MASK_FSUM_WINM)
340 len = snprintf(outstr + offset, maxlen - offset,
" m=%d", param.m);
341 if (len < 0 || len + offset >= maxlen - 1)
346 if (mask & MASK_FSUM_P)
348 len = snprintf(outstr + offset, maxlen - offset,
" p=%d", param.p);
349 if (len < 0 || len + offset >= maxlen - 1)
354 if (mask & MASK_FSUM_KERNEL)
357 strEscapeUnderscore(tmp, param.kernel_name, maxlen);
359 len = snprintf(outstr + offset, maxlen - offset,
" %s", tmp);
360 if (len < 0 || len + offset >= maxlen - 1)
365 if ((mask & (MASK_FSUM_EPSI | MASK_FSUM_EPSB)) && param.eps_I == param.eps_B)
367 len = snprintf(outstr + offset, maxlen - offset,
368 " $\\varepsilon_\\mathrm{I}$=$\\varepsilon_\\mathrm{B}$=%" __FGS__
"",
370 if (len < 0 || len + offset >= maxlen - 1)
376 if (mask & MASK_FSUM_EPSI)
378 len = snprintf(outstr + offset, maxlen - offset,
379 " $\\varepsilon_\\mathrm{I}$=%" __FGS__
"", param.eps_I);
380 if (len < 0 || len + offset >= maxlen - 1)
385 if (mask & MASK_FSUM_EPSB)
387 len = snprintf(outstr + offset, maxlen - offset,
388 " $\\varepsilon_\\mathrm{B}$=%" __FGS__
"", param.eps_B);
389 if (len < 0 || len + offset >= maxlen - 1)
396 void nfft_adjoint_print_output_histo_DFBRT(FILE *out,
s_testset testset)
398 int i, size = testset.nresults;
401 if (gethostname(hostname, 1024) != 0)
402 strncpy(hostname,
"unnamed", 1024);
404 fprintf(out,
"\\begin{tikzpicture}\n");
405 fprintf(out,
"\\begin{axis}[");
406 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
407 fprintf(out,
"symbolic x coords={");
408 for (i = 0; i < size; i++)
410 fprintf(out,
",%d", testset.results[i].nthreads);
412 fprintf(out,
"%d", testset.results[i].nthreads);
415 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
417 " title={%s %dd $\\textrm{NFFT}^\\top$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
418 hostname, testset.param.d, testset.param.n, testset.param.M,
420 fprintf(out,
" ]\n");
421 fprintf(out,
"\\addplot coordinates {");
422 for (i = 0; i < size; i++)
423 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
424 testset.results[i].resval[10].avg);
425 fprintf(out,
"};\n");
427 fprintf(out,
"\\addplot coordinates {");
428 for (i = 0; i < size; i++)
429 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
430 testset.results[i].resval[11].avg);
431 fprintf(out,
"};\n");
433 fprintf(out,
"\\addplot coordinates {");
434 for (i = 0; i < size; i++)
435 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
436 testset.results[i].resval[12].avg);
437 fprintf(out,
"};\n");
439 fprintf(out,
"\\addplot coordinates {");
440 for (i = 0; i < size; i++)
441 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
442 testset.results[i].resval[1].avg);
443 fprintf(out,
"};\n");
445 fprintf(out,
"\\addplot coordinates {");
446 for (i = 0; i < size; i++)
447 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
448 testset.results[i].resval[4].avg + testset.results[i].resval[1].avg);
449 fprintf(out,
"};\n");
451 "\\legend{D,$\\textrm{F}^\\top$,$\\textrm{B}^\\top$,prepsi,total}\n");
452 fprintf(out,
"\\end{axis}\n");
453 fprintf(out,
"\\end{tikzpicture}\n");
454 fprintf(out,
"\n\n");
459 void nfft_trafo_print_output_histo_DFBRT(FILE *out,
s_testset testset)
461 int i, size = testset.nresults;
464 if (gethostname(hostname, 1024) != 0)
465 strncpy(hostname,
"unnamed", 1024);
467 fprintf(out,
"\\begin{tikzpicture}\n");
468 fprintf(out,
"\\begin{axis}[");
469 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
470 fprintf(out,
"symbolic x coords={");
471 for (i = 0; i < size; i++)
473 fprintf(out,
",%d", testset.results[i].nthreads);
475 fprintf(out,
"%d", testset.results[i].nthreads);
478 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
480 " title={%s %dd $\\textrm{NFFT}$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}",
481 hostname, testset.param.d, testset.param.n, testset.param.M,
483 fprintf(out,
" ]\n");
484 fprintf(out,
"\\addplot coordinates {");
485 for (i = 0; i < size; i++)
486 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
487 testset.results[i].resval[13].avg);
488 fprintf(out,
"};\n");
490 fprintf(out,
"\\addplot coordinates {");
491 for (i = 0; i < size; i++)
492 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
493 testset.results[i].resval[14].avg);
494 fprintf(out,
"};\n");
496 fprintf(out,
"\\addplot coordinates {");
497 for (i = 0; i < size; i++)
498 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
499 testset.results[i].resval[15].avg);
500 fprintf(out,
"};\n");
502 fprintf(out,
"\\addplot coordinates {");
503 for (i = 0; i < size; i++)
504 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
505 testset.results[i].resval[2].avg);
506 fprintf(out,
"};\n");
508 fprintf(out,
"\\addplot coordinates {");
509 for (i = 0; i < size; i++)
510 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
511 testset.results[i].resval[6].avg + testset.results[i].resval[2].avg);
512 fprintf(out,
"};\n");
513 fprintf(out,
"\\legend{D,F,B,prepsi,total}\n");
514 fprintf(out,
"\\end{axis}\n");
515 fprintf(out,
"\\end{tikzpicture}\n");
516 fprintf(out,
"\n\n");
521 void fastsum_print_output_histo_PreRfNfT(FILE *out,
s_testset testset)
523 int i, size = testset.nresults;
525 char plottitle[1025];
527 if (gethostname(hostname, 1024) != 0)
528 strncpy(hostname,
"unnamed", 1024);
530 fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testset.param,
533 fprintf(out,
"\\begin{tikzpicture}\n");
534 fprintf(out,
"\\begin{axis}[");
535 fprintf(out,
"width=0.9\\textwidth, height=0.6\\textwidth, ");
536 fprintf(out,
"symbolic x coords={");
537 for (i = 0; i < size; i++)
539 fprintf(out,
",%d", testset.results[i].nthreads);
541 fprintf(out,
"%d", testset.results[i].nthreads);
544 "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
545 fprintf(out,
" title={%s}", plottitle);
546 fprintf(out,
" ]\n");
547 fprintf(out,
"\\addplot coordinates {");
548 for (i = 0; i < size; i++)
549 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
550 testset.results[i].resval[1].avg + testset.results[i].resval[2].avg);
551 fprintf(out,
"};\n");
553 fprintf(out,
"\\addplot coordinates {");
554 for (i = 0; i < size; i++)
555 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
556 testset.results[i].resval[3].avg);
557 fprintf(out,
"};\n");
559 fprintf(out,
"\\addplot coordinates {");
560 for (i = 0; i < size; i++)
561 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
562 testset.results[i].resval[4].avg + testset.results[i].resval[5].avg
563 + testset.results[i].resval[6].avg);
564 fprintf(out,
"};\n");
566 fprintf(out,
"\\addplot coordinates {");
567 for (i = 0; i < size; i++)
568 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
569 testset.results[i].resval[7].avg);
570 fprintf(out,
"};\n");
572 fprintf(out,
"\\addplot coordinates {");
573 for (i = 0; i < size; i++)
574 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
575 testset.results[i].resval[9].avg - testset.results[i].resval[0].avg);
576 fprintf(out,
"};\n");
578 "\\legend{prepsi (step 1b),init nearfield (step 1c),far field (steps 2a-c),nearfield (step 2d),total $-$ step 1a}\n");
579 fprintf(out,
"\\end{axis}\n");
580 fprintf(out,
"\\end{tikzpicture}\n");
581 fprintf(out,
"\n\n");
586 void fastsum_print_output_speedup_total_minus_indep(FILE *out,
591 char plottitle[1025];
592 unsigned int diff_mask = fastsum_determine_different_parameters(testsets,
595 if (gethostname(hostname, 1024) != 0)
596 strncpy(hostname,
"unnamed", 1024);
598 fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname,
599 testsets[0].param, diff_mask | MASK_FSUM_WINM);
601 fprintf(out,
"\\begin{tikzpicture}\n");
602 fprintf(out,
"\\begin{axis}[");
604 "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
605 fprintf(out,
" title={%s}", plottitle);
606 fprintf(out,
" ]\n");
608 for (t = 0; t < ntestsets; t++)
613 for (i = 0; i < testset.nresults; i++)
614 if (testset.results[i].nthreads == 1)
615 tref = testset.results[i].resval[9].avg
616 - testset.results[i].resval[0].avg;
618 fprintf(out,
"\\addplot coordinates {");
619 for (i = 0; i < testset.nresults; i++)
620 fprintf(out,
"(%d, %.6" __FES__
") ", testset.results[i].nthreads,
622 / (testset.results[i].resval[9].avg
623 - testset.results[i].resval[0].avg));
624 fprintf(out,
"};\n");
626 for (i = 0; i < testset.nresults; i++)
628 fprintf(stderr,
"%d:%.3" __FIS__
" ", testset.results[i].nthreads,
630 / (testset.results[i].resval[9].avg
631 - testset.results[i].resval[0].avg));
633 fprintf(stderr,
"\n\n");
636 fprintf(out,
"\\legend{{");
637 for (t = 0; t < ntestsets; t++)
642 fastsum_get_plot_title_minus_indep(title, 255,
"", testsets[t].param,
643 ~(diff_mask | MASK_FSUM_WINM));
644 fprintf(out,
"%s", title);
646 fprintf(out,
"}}\n");
647 fprintf(out,
"\\end{axis}\n");
648 fprintf(out,
"\\end{tikzpicture}\n");
649 fprintf(out,
"\n\n");
654 void run_testset(
s_testset *testset,
int d,
int L,
int M,
int n,
int m,
int p,
655 char *kernel_name, R c, R eps_I, R eps_B,
656 int *nthreads_array,
int n_threads_array_size)
659 testset->param.d = d;
660 testset->param.L = L;
661 testset->param.M = M;
662 testset->param.n = n;
663 testset->param.m = m;
664 testset->param.p = p;
665 testset->param.kernel_name = kernel_name;
666 testset->param.c = c;
667 testset->param.eps_I = eps_I;
668 testset->param.eps_B = eps_B;
670 testset->results = (
s_result*) NFFT(malloc)(
671 (size_t)(n_threads_array_size) *
sizeof(
s_result));
672 testset->nresults = n_threads_array_size;
674 run_test_create(testset->param.d, testset->param.L, testset->param.M);
675 for (i = 0; i < n_threads_array_size; i++)
677 testset->results[i].nthreads = nthreads_array[i];
678 run_test(testset->results[i].resval, NREPEAT, testset->param.n,
679 testset->param.m, testset->param.p, testset->param.kernel_name,
680 testset->param.c, testset->param.eps_I, testset->param.eps_B,
681 testset->results[i].nthreads);
686 void test1(
int *nthreads_array,
int n_threads_array_size)
690 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
691 run_testset(&testsets[0], 3, 100000, 100000, 128, 4, 7,
"one_over_x", K(0.0), K(0.03125), K(0.03125), nthreads_array, n_threads_array_size);
693 fastsum_print_output_speedup_total_minus_indep(file_out_tex, testsets, 1);
695 fastsum_print_output_histo_PreRfNfT(file_out_tex, testsets[0]);
697 nfft_adjoint_print_output_histo_DFBRT(file_out_tex, testsets[0]);
699 nfft_trafo_print_output_histo_DFBRT(file_out_tex, testsets[0]);
703 int main(
int argc,
char** argv)
706 int n_threads_array_size = get_nthreads_array(&nthreads_array);
709 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
710 fprintf(stderr,
"WARNING: Detailed time measurements are not activated.\n");
711 fprintf(stderr,
"Please re-run the configure script with options\n");
713 "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
714 fprintf(stderr,
"and run \"make clean all\"\n\n");
717 for (k = 0; k < n_threads_array_size; k++)
718 fprintf(stderr,
"%d ", nthreads_array[k]);
719 fprintf(stderr,
"\n");
721 file_out_tex = fopen(
"fastsum_benchomp_results_plots.tex",
"w");
723 test1(nthreads_array, n_threads_array_size);
725 fclose(file_out_tex);