/*
* Copyright (c) 2010-2013 Michael Pippig
*
* This file is part of PFFT.
*
* PFFT is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* PFFT is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with PFFT. If not, see .
*
*/
#include "pfft.h"
#include "ipfft.h"
static void get_global_transp_param(
int step, int rnk_pm, const INT *ni, const INT *no,
const INT *local_ni, const INT *local_no,
INT tuple_size, const INT *iblock, const INT *oblock,
unsigned trafo_flag,
INT *N0, INT *N1, INT *h0, INT *h1,
INT *hm, INT *blk0, INT *blk1);
static gtransp_plan gtransp_mkplan(
void);
#if PFFT_DEBUG_GTRANSP
static gtransp_dbg gtransp_mkdbg(
INT *N, INT hm, INT *blk, R *in, R *out,
MPI_Comm comm, unsigned fftw_flags);
static void gtransp_rmdbg(
gtransp_dbg ths);
static void print_dbg(
gtransp_dbg ths);
static void print_parameters(
INT N0, INT N1, INT hm, INT blk0, INT blk1, R *in, R *out,
INT local_N0, INT local_N0_start,
INT local_N1, INT local_N1_start,
MPI_Comm comm, unsigned fftw_flags);
#endif
/* MPI Transpose for real data. Use hm==2 for complex data. */
/* Input:
* (N0 x h0) x (N1/P x h1) x hm (default)
* (N1/P x h1) x (N0 x h0) x hm (TRANSPOSED_IN)
* Output:
* (N1 x h1) x (N0/P x h0) x hm (default)
* (N0/P x h0) x (N1 x h1) x hm (TRANSPOSED_OUT)
*
* This is a collective routine.
* N0, h0, N1, h1 must be equal on all calling processes.
* */
void PX(get_global_transp_param)(
int step, int rnk_pm, const INT *ni, const INT *no,
const INT *local_ni, const INT *local_no,
INT tuple_size, const INT *iblock, const INT *oblock,
unsigned trafo_flag, unsigned transp_flag,
INT *N0, INT *N1, INT *h0, INT *h1,
INT *hm, INT *blk0, INT *blk1
)
{
/* TRANSPOSED_IN is planned in backward direction.
* Therefore switch input and output parameters. */
if(transp_flag & PFFT_TRANSPOSED_IN)
get_global_transp_param(
step, rnk_pm, no, ni, local_no, local_ni,
tuple_size, oblock, iblock, trafo_flag,
N1, N0, h1, h0, hm, blk1, blk0);
else
get_global_transp_param(
step, rnk_pm, ni, no, local_ni, local_no,
tuple_size, iblock, oblock, trafo_flag,
N0, N1, h0, h1, hm, blk0, blk1);
}
static void get_global_transp_param(
int step, int rnk_pm, const INT *ni, const INT *no,
const INT *local_ni, const INT *local_no,
INT tuple_size, const INT *iblock, const INT *oblock,
unsigned trafo_flag,
INT *N0, INT *N1, INT *h0, INT *h1,
INT *hm, INT *blk0, INT *blk1
)
{
*N0 = no[rnk_pm - step];
*N1 = ni[rnk_pm - 1 - step];
*h0 = 1;
for(int t=0; tdbg = gtransp_mkdbg(N, hm, blk, in, out, comm, fftw_flags);
#endif
ths->plan.plan = XM(plan_many_transpose)(
N[0], N[1], hm, blk[0], blk[1], in, out,
comm, fftw_flags);
ths->plan.planned_in = in;
ths->plan.planned_out = out;
ths->plan.execute = (PX(fftw_execute))(XM(execute_r2r));
return ths;
}
static gtransp_plan gtransp_mkplan(
void
)
{
gtransp_plan ths = (gtransp_plan) malloc(sizeof(gtransp_plan_s));
/* initialize to NULL for easy checks */
ths->plan.plan=NULL;
/* initialize debug info */
#if PFFT_DEBUG_GTRANSP
ths->dbg = NULL;
#endif
return ths;
}
void PX(gtransp_rmplan)(
gtransp_plan ths
)
{
/* plan was already destroyed or never initialized */
if(ths==NULL)
return;
/* take care of unsuccessful FFTW planing */
if(ths->plan.plan != NULL)
X(destroy_plan)(ths->plan.plan);
#if PFFT_DEBUG_GTRANSP
if(ths->dbg != NULL)
gtransp_rmdbg(ths->dbg);
#endif
/* free memory */
free(ths);
/* ths=NULL; would be senseless, since we can not change the pointer itself */
}
void PX(execute_gtransp)(
gtransp_plan ths,
R *planned_in, R *planned_out,
R *executed_in, R *executed_out
)
{
#if PFFT_DEBUG_GTRANSP
static int counter=0;
int myrank;
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
R lsum, gsum;
INT n_total;
if(!myrank) fprintf(stderr, "\n");
if(!myrank){
if(ths != NULL){
if(ths->plan.plan != NULL){
fprintf(stderr, "PFFT_DBG_GTRANSP: counter = %d\n", counter);
print_dbg(ths->dbg);
} else
fprintf(stderr, "PFFT_DBG_GTRANSP: nothing to do for FFTW plan\n");
}
}
n_total = (ths != NULL) ? ths->dbg->local_N0 * ths->dbg->N1 * ths->dbg->hm : 0;
/* Checksum inputs */
lsum=0.0;
if(ths != NULL)
if(ths->plan.plan != NULL)
for(INT k=0; kdbg->in[k]);
MPI_Reduce(&lsum, &gsum, 1, PFFT_MPI_REAL_TYPE, MPI_SUM, 0, MPI_COMM_WORLD);
if(ths != NULL)
if(ths->plan.plan != NULL)
if(!myrank) fprintf(stderr, "PFFT_DBG_GTRANSP: counter = %d, Checksum(in) = %e\n", counter, gsum);
#endif
/* Global transposition */
if(ths != NULL)
if(ths->plan.plan != NULL) {
PX(execute_fftw_plan)(&ths->plan, planned_in, planned_out, executed_in, executed_out);
}
#if PFFT_DEBUG_GTRANSP
n_total = (ths != NULL) ? ths->dbg->N0 * ths->dbg->local_N1 * ths->dbg->hm : 0;
/* Checksum outputs */
lsum=0.0;
if(ths != NULL)
if(ths->plan.plan != NULL)
for(INT k=0; kdbg->out[k]);
MPI_Reduce(&lsum, &gsum, 1, PFFT_MPI_REAL_TYPE, MPI_SUM, 0, MPI_COMM_WORLD);
if(ths != NULL)
if(ths->plan.plan != NULL)
if(!myrank) fprintf(stderr, "PFFT_DBG_GTRANSP: counter = %d, Checksum(out) = %e\n", counter, gsum);
// if(counter==3){
// int np, rnk, np_world;
// MPI_Comm_size(ths->dbg->comm, &np);
// MPI_Comm_size(MPI_COMM_WORLD, &np_world);
// MPI_Comm_rank(ths->dbg->comm, &rnk);
// R *lsums = (R*) malloc(sizeof(R)*np);
// R *gsums = (R*) malloc(sizeof(R)*np);
//
// /* calculate horizontal checksums */
// for(int t=0; tdbg->local_N1_start/ths->dbg->blk1 ] = lsum;
//
// MPI_Allreduce(lsums, gsums, np, PFFT_MPI_REAL_TYPE, MPI_SUM, MPI_COMM_WORLD);
//
// for(int t=0; tdbg->comm);
// if(!rnk) fprintf(stderr, "local_N1 = %td, local_N1_start = %td, vertical gsum = %e\n",
// ths->dbg->local_N1, ths->dbg->local_N1_start, gsum);
// }
counter++;
#endif
}
#if PFFT_DEBUG_GTRANSP
static gtransp_dbg gtransp_mkdbg(
INT *N, INT hm, INT *blk, R *in, R *out,
MPI_Comm comm, unsigned fftw_flags
)
{
gtransp_dbg ths = (gtransp_dbg) malloc(sizeof(gtransp_dbg_s));
ths->N0 = N[0];
ths->N1 = N[1];
ths->hm = hm;
ths->blk0 = blk[0];
ths->blk1 = blk[1];
ths->in = in;
ths->out = out;
MPI_Comm_dup(comm, &ths->comm);
ths->fftw_flags = fftw_flags;
ths->mem = XM(local_size_many_transposed)(
2, N, hm, blk[0], blk[1], comm,
&ths->local_N0, &ths->local_N0_start,
&ths->local_N1, &ths->local_N1_start);
return ths;
}
static void gtransp_rmdbg(
gtransp_dbg ths
)
{
/* plan was already destroyed or never initialized */
if(ths==NULL)
return;
/* free memory */
free(ths);
/* ths=NULL; would be senseless, since we can not change the pointer itself */
}
static void print_dbg(
gtransp_dbg ths
)
{
print_parameters(
ths->N0, ths->N1, ths->hm, ths->blk0, ths->blk1, ths->in, ths->out,
ths->local_N0, ths->local_N0_start, ths->local_N1, ths->local_N1_start,
ths->comm, ths->fftw_flags);
}
static void print_parameters(
INT N0, INT N1, INT hm, INT blk0, INT blk1, R *in, R *out,
INT local_N0, INT local_N0_start,
INT local_N1, INT local_N1_start,
MPI_Comm comm, unsigned fftw_flags
)
{
int np;
MPI_Comm_size(comm, &np);
fprintf(stderr, "PFFT_DBG_GTRANSP: N[0] = %td; N[1] = %td; hm = %td; blk[0] = %td; blk[1] = %td; np = %d;\n",
N0, N1, hm, blk0, blk1, np);
fprintf(stderr, "PFFT_DBG_GTRANSP: local_N0 = %td; local_N0_start = %td; local_N1 = %td; local_N1_start = %td;\n",
local_N0, local_N0_start, local_N1, local_N1_start);
fprintf(stderr, "PFFT_DBG_GTRANSP: fftw_flags = 0");
if(fftw_flags & FFTW_PRESERVE_INPUT)
fprintf(stderr, "| FFTW_PRESERVE_INPUT");
if(fftw_flags & FFTW_ESTIMATE)
fprintf(stderr, "| FFTW_ESTIMATE");
if(fftw_flags & FFTW_MEASURE)
fprintf(stderr, "| FFTW_MEASURE");
if(fftw_flags & FFTW_PATIENT)
fprintf(stderr, "| FFTW_PATIENT");
if(fftw_flags & FFTW_EXHAUSTIVE)
fprintf(stderr, "| FFTW_EXHAUSTIVE");
if(fftw_flags & FFTW_MPI_TRANSPOSED_IN)
fprintf(stderr, "| FFTW_MPI_TRANSPOSED_IN");
if(fftw_flags & FFTW_MPI_TRANSPOSED_OUT)
fprintf(stderr, "| FFTW_MPI_TRANSPOSED_OUT");
fprintf(stderr, ";\n");
fprintf(stderr, "PFFT_DBG_GTRANSP: mem = XM(local_size_many_transposed)(\n");
fprintf(stderr, "PFFT_DBG_GTRANSP: 2, N, hm, blk[0], blk[1], comm,\n");
fprintf(stderr, "PFFT_DBG_GTRANSP: &dummy0, &dummy1, &dummy2, &dummy3);\n");
fprintf(stderr, "PFFT_DBG_GTRANSP: ths = XM(plan_many_transpose)(\n");
fprintf(stderr, "PFFT_DBG_GTRANSP: N[0], N[1], hm, blk[0], blk[1], in,");
if(in==out)
fprintf(stderr, " in,\n");
else
fprintf(stderr, " out,\n");
fprintf(stderr, "PFFT_DBG_GTRANSP: comm, fftw_flags);\n");
}
#endif