furnace/extern/fftw/tests/fftw-bench.c

304 lines
7.2 KiB
C

/* See bench.c. We keep a few common subroutines in this file so
that they can be re-used in the MPI test program. */
#include <math.h>
#include <stdio.h>
#include <string.h>
#include "tests/fftw-bench.h"
/* define to enable code that traps floating-point exceptions.
Disabled by default because I don't want to worry about the
portability of such code. feenableexcept() seems to be a GNU
thing */
#undef TRAP_FP_EXCEPTIONS
#ifdef TRAP_FP_EXCEPTIONS
# include <signal.h>
# include <fenv.h>
#endif
#ifdef _OPENMP
# include <omp.h>
#endif
#ifdef HAVE_SMP
int threads_ok = 1;
#endif
FFTW(plan) the_plan = 0;
static const char *wisdat = "wis.dat";
unsigned the_flags = 0;
int paranoid = 0;
int usewisdom = 0;
int havewisdom = 0;
int nthreads = 1;
int amnesia = 0;
extern void install_hook(void); /* in hook.c */
extern void uninstall_hook(void); /* in hook.c */
#ifdef FFTW_RANDOM_ESTIMATOR
extern unsigned FFTW(random_estimate_seed);
#endif
#ifdef TRAP_FP_EXCEPTIONS
static void sigfpe_handler(int sig, siginfo_t *info, void *context)
{
/* fftw code is not supposed to generate FP exceptions */
UNUSED(sig); UNUSED(info); UNUSED(context);
fprintf(stderr, "caught FPE, aborting\n");
abort();
}
static void setup_sigfpe_handler(void)
{
struct sigaction a;
feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW | FE_UNDERFLOW);
memset(&a, 0, sizeof(a));
a.sa_sigaction = sigfpe_handler;
a.sa_flags = SA_SIGINFO;
if (sigaction(SIGFPE, &a, NULL) == -1) {
fprintf(stderr, "cannot install sigfpe handler\n");
exit(1);
}
}
#else
static void setup_sigfpe_handler(void)
{
}
#endif
/* dummy serial threads backend for testing threads_set_callback */
static void serial_threads(void *(*work)(char *), char *jobdata, size_t elsize, int njobs, void *data)
{
int i;
(void) data; /* unused */
for (i = 0; i < njobs; ++i)
work(jobdata + elsize * i);
}
void useropt(const char *arg)
{
int x;
double y;
if (!strcmp(arg, "patient")) the_flags |= FFTW_PATIENT;
else if (!strcmp(arg, "estimate")) the_flags |= FFTW_ESTIMATE;
else if (!strcmp(arg, "estimatepat")) the_flags |= FFTW_ESTIMATE_PATIENT;
else if (!strcmp(arg, "exhaustive")) the_flags |= FFTW_EXHAUSTIVE;
else if (!strcmp(arg, "unaligned")) the_flags |= FFTW_UNALIGNED;
else if (!strcmp(arg, "nosimd")) the_flags |= FFTW_NO_SIMD;
else if (!strcmp(arg, "noindirectop")) the_flags |= FFTW_NO_INDIRECT_OP;
else if (!strcmp(arg, "wisdom-only")) the_flags |= FFTW_WISDOM_ONLY;
else if (sscanf(arg, "flag=%d", &x) == 1) the_flags |= x;
else if (sscanf(arg, "bflag=%d", &x) == 1) the_flags |= 1U << x;
else if (!strcmp(arg, "paranoid")) paranoid = 1;
else if (!strcmp(arg, "wisdom")) usewisdom = 1;
else if (!strcmp(arg, "amnesia")) amnesia = 1;
else if (!strcmp(arg, "threads_callback"))
#ifdef HAVE_SMP
FFTW(threads_set_callback)(serial_threads, NULL);
#else
fprintf(stderr, "Serial FFTW; ignoring threads_callback option.\n");
#endif
else if (sscanf(arg, "nthreads=%d", &x) == 1) nthreads = x;
#ifdef FFTW_RANDOM_ESTIMATOR
else if (sscanf(arg, "eseed=%d", &x) == 1) FFTW(random_estimate_seed) = x;
#endif
else if (sscanf(arg, "timelimit=%lg", &y) == 1) {
FFTW(set_timelimit)(y);
}
else fprintf(stderr, "unknown user option: %s. Ignoring.\n", arg);
}
void rdwisdom(void)
{
FILE *f;
double tim;
int success = 0;
if (havewisdom) return;
#ifdef HAVE_SMP
if (threads_ok) {
BENCH_ASSERT(FFTW(init_threads)());
FFTW(plan_with_nthreads)(nthreads);
BENCH_ASSERT(FFTW(planner_nthreads)() == nthreads);
FFTW(make_planner_thread_safe)();
#ifdef _OPENMP
omp_set_num_threads(nthreads);
#endif
}
else if (nthreads > 1 && verbose > 1) {
fprintf(stderr, "bench: WARNING - nthreads = %d, but threads not supported\n", nthreads);
nthreads = 1;
}
#endif
if (!usewisdom) return;
timer_start(USER_TIMER);
if ((f = fopen(wisdat, "r"))) {
if (!import_wisdom(f))
fprintf(stderr, "bench: ERROR reading wisdom\n");
else
success = 1;
fclose(f);
}
tim = timer_stop(USER_TIMER);
if (success) {
if (verbose > 1) printf("READ WISDOM (%g seconds): ", tim);
if (verbose > 3)
export_wisdom(stdout);
if (verbose > 1)
printf("\n");
}
havewisdom = 1;
}
void wrwisdom(void)
{
FILE *f;
double tim;
if (!havewisdom) return;
timer_start(USER_TIMER);
if ((f = fopen(wisdat, "w"))) {
export_wisdom(f);
fclose(f);
}
tim = timer_stop(USER_TIMER);
if (verbose > 1) printf("write wisdom took %g seconds\n", tim);
}
static unsigned preserve_input_flags(bench_problem *p)
{
/*
* fftw3 cannot preserve input for multidimensional c2r transforms.
* Enforce FFTW_DESTROY_INPUT
*/
if (p->kind == PROBLEM_REAL &&
p->sign > 0 &&
!p->in_place &&
p->sz->rnk > 1)
p->destroy_input = 1;
if (p->destroy_input)
return FFTW_DESTROY_INPUT;
else
return FFTW_PRESERVE_INPUT;
}
int can_do(bench_problem *p)
{
double tim;
if (verbose > 2 && p->pstring)
printf("Planning %s...\n", p->pstring);
rdwisdom();
timer_start(USER_TIMER);
the_plan = mkplan(p, preserve_input_flags(p) | the_flags | FFTW_ESTIMATE);
tim = timer_stop(USER_TIMER);
if (verbose > 2) printf("estimate-planner time: %g s\n", tim);
if (the_plan) {
FFTW(destroy_plan)(the_plan);
return 1;
}
return 0;
}
void setup(bench_problem *p)
{
double tim;
setup_sigfpe_handler();
if (amnesia) {
FFTW(forget_wisdom)();
havewisdom = 0;
}
/* Regression test: check that fftw_malloc exists and links
* properly */
{
void *ptr = FFTW(malloc(42));
BENCH_ASSERT(FFTW(alignment_of)((bench_real *)ptr) == 0);
FFTW(free(ptr));
}
rdwisdom();
install_hook();
#ifdef HAVE_SMP
if (verbose > 1 && nthreads > 1) printf("NTHREADS = %d\n", nthreads);
#endif
timer_start(USER_TIMER);
the_plan = mkplan(p, preserve_input_flags(p) | the_flags);
tim = timer_stop(USER_TIMER);
if (verbose > 1) printf("planner time: %g s\n", tim);
BENCH_ASSERT(the_plan);
{
double add, mul, nfma, cost, pcost;
FFTW(flops)(the_plan, &add, &mul, &nfma);
cost = FFTW(estimate_cost)(the_plan);
pcost = FFTW(cost)(the_plan);
if (verbose > 1) {
FFTW(print_plan)(the_plan);
printf("\n");
printf("flops: %0.0f add, %0.0f mul, %0.0f fma\n",
add, mul, nfma);
printf("estimated cost: %f, pcost = %f\n", cost, pcost);
}
}
}
void doit(int iter, bench_problem *p)
{
int i;
FFTW(plan) q = the_plan;
UNUSED(p);
for (i = 0; i < iter; ++i)
FFTW(execute)(q);
}
void done(bench_problem *p)
{
UNUSED(p);
FFTW(destroy_plan)(the_plan);
uninstall_hook();
}
void cleanup(void)
{
initial_cleanup();
wrwisdom();
#ifdef HAVE_SMP
FFTW(cleanup_threads)();
#else
FFTW(cleanup)();
#endif
# ifdef FFTW_DEBUG_MALLOC
{
/* undocumented memory checker */
FFTW_EXTERN void FFTW(malloc_print_minfo)(int v);
FFTW(malloc_print_minfo)(verbose);
}
# endif
final_cleanup();
}