#include #include static void measure_pfft( const ptrdiff_t *n, MPI_Comm comm_cart_3d, int loops, unsigned pfft_opt_flags, int transposed, int inplace, int verbose, int print_timer); static void measure_fftw( const ptrdiff_t *n, int parallel, int loops, unsigned fftw_opt_flags, int transposed, int inplace, int verbose); static void loop_pfft_tests( ptrdiff_t *n, MPI_Comm comm, int loops, unsigned pfft_flags, int transposed, int inplace, int verbose, int cmp_flags, int print_timer); static void loop_fftw_tests( ptrdiff_t *n, int parallel, int loops, int transposed, int inplace, int verbose); static void init_parameters( int argc, char **argv, ptrdiff_t *n, int *np, unsigned *pfft_flags, int *loops, int *transposed, int *verbose, int *inplace, int *cmp_fftw, int *cmp_decomp, int *cmp_flags, int *print_timer); int main(int argc, char **argv) { int parallel; MPI_Comm comm_cart_1d, comm_cart_2d, comm_cart_3d; /* Set size of FFT and process mesh */ ptrdiff_t n[3] = {128,128,128}; int np[3] = {1,1,1}; int loops = 1; int verbose = 0; int inplace = 0; int cmp_fftw = 0; int cmp_decomp = 0; int cmp_flags = 0; int transposed = 0; int print_timer = 0; unsigned pfft_flags = 0; /* Initialize MPI and PFFT */ MPI_Init(&argc, &argv); pfft_init(); /* set parameters by command line */ init_parameters(argc, argv, n, np, &pfft_flags, &loops, &transposed, &verbose, &inplace, &cmp_fftw, &cmp_decomp, &cmp_flags, &print_timer); /* Create three-dimensional process grid of size np[0] x np[1] x np[2], if possible */ if( pfft_create_procmesh(3, MPI_COMM_WORLD, np, &comm_cart_3d) ){ pfft_fprintf(MPI_COMM_WORLD, stderr, "Error: Procmesh of size %d x %d x %d does not fit to number of allocated processes.\n", np[0], np[1], np[2]); pfft_fprintf(MPI_COMM_WORLD, stderr, " Please allocate %d processes (mpiexec -np %d ...) or change the procmesh (with -pfft_np * * *).\n", np[0]*np[1]*np[2], np[0]*np[1]*np[2]); MPI_Finalize(); return 1; } int num_serial_dims = (np[0]==1) + (np[1]==1) + (np[2]==1); if( cmp_decomp || num_serial_dims==0){ pfft_printf(MPI_COMM_WORLD, "* PFFT runtimes (3d data decomposition):\n"); loop_pfft_tests(n, comm_cart_3d, loops, pfft_flags, transposed, inplace, verbose, cmp_flags, print_timer); pfft_printf(MPI_COMM_WORLD, "\n"); MPI_Comm_free(&comm_cart_3d); } /* run 2d-data decomposition if possible */ if( num_serial_dims >= 1 ){ if( cmp_decomp || num_serial_dims==1){ /* move serial dims to the end */ if(np[1]==1){ np[1] = np[2]; np[2] = 1; } if(np[0]==1){ np[0] = np[1]; np[1] = 1; } if(np[1]==1){ np[1] = np[2]; np[2] = 1; } if( pfft_create_procmesh(2, MPI_COMM_WORLD, np, &comm_cart_2d) ) pfft_printf(MPI_COMM_WORLD, "Error in creation of 2d procmesh of size %d x %d\n", np[0], np[1]); pfft_printf(MPI_COMM_WORLD, "* PFFT runtimes (2d data decomposition):\n"); loop_pfft_tests(n, comm_cart_2d, loops, pfft_flags, transposed, inplace, verbose, cmp_flags, print_timer); pfft_printf(MPI_COMM_WORLD, "\n"); MPI_Comm_free(&comm_cart_2d); } } /* run 1d-data decomposition if possible */ if( num_serial_dims >= 2 ){ /* move serial dims to the end */ if(np[1]==1){ np[1] = np[2]; np[2] = 1; } if(np[0]==1){ np[0] = np[1]; np[1] = 1; } if( pfft_create_procmesh(1, MPI_COMM_WORLD, np, &comm_cart_1d) ) pfft_printf(MPI_COMM_WORLD, "Error in creation of 2d procmesh of size %d\n", np[0]); pfft_printf(MPI_COMM_WORLD, "* PFFT runtimes (1d data decomposition):\n"); loop_pfft_tests(n, comm_cart_1d, loops, pfft_flags, transposed, inplace, verbose, cmp_flags, print_timer); pfft_printf(MPI_COMM_WORLD, "\n"); MPI_Comm_free(&comm_cart_1d); if(cmp_fftw){ pfft_printf(MPI_COMM_WORLD, "* FFTW_MPI runtimes (1d data decomposition):\n"); loop_fftw_tests(n, parallel=1, loops, transposed, inplace, verbose); } } /* run serial if possible */ if( np[0]*np[1]*np[2] == 1 ){ if(cmp_fftw){ pfft_printf(MPI_COMM_WORLD, "* serial FFTW runtimes (no data decomposition at all):\n"); loop_fftw_tests(n, parallel=0, loops, transposed, inplace, verbose); pfft_printf(MPI_COMM_WORLD, "\n"); } } /* free mem and finalize */ MPI_Finalize(); return 0; } static void loop_pfft_tests( ptrdiff_t *n, MPI_Comm comm, int loops, unsigned pfft_flags, int transposed, int inplace, int verbose, int cmp_flags, int print_timer ) { unsigned tune, measure, destroy; if(!cmp_flags){ measure_pfft(n, comm, loops, pfft_flags, transposed, inplace, verbose, print_timer); return; } destroy = 0; for(int k=0; k<2; k++){ measure = PFFT_ESTIMATE; for(int l=0; l<2; l++){ tune = PFFT_NO_TUNE; for(int m=0; m<2; m++){ measure_pfft(n, comm, loops, tune | measure | destroy, transposed, inplace, verbose, print_timer); tune = PFFT_TUNE; } measure = PFFT_MEASURE; } if(inplace) break; destroy = PFFT_DESTROY_INPUT; } } static void loop_fftw_tests( ptrdiff_t *n, int parallel, int loops, int transposed, int inplace, int verbose ) { unsigned measure, destroy; destroy = 0; for(int k=0; k<2; k++){ measure = FFTW_ESTIMATE; for(int l=0; l<2; l++){ measure_fftw(n, parallel, loops, measure | destroy, transposed, inplace, verbose); measure = FFTW_MEASURE; } if(inplace) break; destroy = FFTW_DESTROY_INPUT; } } static void measure_pfft( const ptrdiff_t *n, MPI_Comm comm_cart, int loops, unsigned pfft_opt_flags, int transposed, int inplace, int verbose, int print_timer ) { ptrdiff_t alloc_local; ptrdiff_t local_ni[3], local_i_start[3]; ptrdiff_t local_no[3], local_o_start[3]; double err, timer[4]; pfft_complex *in, *out; pfft_plan plan_forw=NULL, plan_back=NULL; unsigned tr_in = (transposed) ? PFFT_TRANSPOSED_IN : PFFT_TRANSPOSED_NONE; unsigned tr_out = (transposed) ? PFFT_TRANSPOSED_OUT : PFFT_TRANSPOSED_NONE; /* Get parameters of data distribution */ alloc_local = pfft_local_size_dft_3d(n, comm_cart, tr_out, local_ni, local_i_start, local_no, local_o_start); /* Allocate memory */ in = pfft_alloc_complex(alloc_local); if(inplace) out = in; else out = pfft_alloc_complex(alloc_local); /* Plan parallel forward FFT */ timer[0] = -MPI_Wtime(); plan_forw = pfft_plan_dft_3d( n, in, out, comm_cart, PFFT_FORWARD, tr_out | pfft_opt_flags); timer[0] += MPI_Wtime(); /* Plan parallel backward FFT */ timer[1] = -MPI_Wtime(); plan_back = pfft_plan_dft_3d( n, out, in, comm_cart, PFFT_BACKWARD, tr_in | pfft_opt_flags); timer[1] += MPI_Wtime(); /* Initialize input with random numbers */ pfft_init_input_complex_3d(n, local_ni, local_i_start, in); if(verbose) pfft_apr_complex_3d(in, local_ni, local_i_start, "PFFT Input", comm_cart); /* execute parallel forward FFT */ timer[2] = -MPI_Wtime(); for(int t=0; t