/*
* Copyright (c) 2010-2013 Michael Pippig
*
* This file is part of PFFT.
*
* PFFT is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* PFFT is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with PFFT. If not, see .
*
*/
#include "pfft.h"
#include "ipfft.h"
#include "util.h"
static INT initialize_Nb(
int rnk_pm, const INT *local_ni, const INT *local_no, unsigned transp_flag);
static INT calculate_tuple_size(
int rnk_n, const INT *ni, const INT *no, int rnk_pm, INT howmany,
unsigned trafo_flag, unsigned transp_flag);
static void local_size_transposed(
int rnk_n, const INT *ni, const INT *no,
const INT *iblock, const INT *oblock,
int rnk_pm, const int *coords_pm,
unsigned trafo_flag, unsigned transp_flag,
INT *local_ni, INT *local_i_start,
INT *local_no, INT *local_o_start);
static void decompose_nontransposed(
int rnk_n, const INT *n, const INT *blk,
int rnk_pm, const int *coords_pm,
unsigned trafo_flag,
INT *local_n, INT *local_start);
static void decompose_transposed(
int rnk_n, const INT *n, const INT *blk,
int rnk_pm, const int *coords_pm,
unsigned trafo_flag,
INT *local_n, INT *local_start);
/* call these routines with transp_flag
* PFFT_TRANSPOSED_IN or PFFT_TRANSPOSED_OUT */
void PX(local_block_partrafo_transposed)(
int rnk_n, const INT *ni, const INT *no,
const INT *iblock, const INT *oblock,
int rnk_pm, int *coords_pm,
unsigned transp_flag, unsigned trafo_flag,
INT *local_ni, INT *local_i_start,
INT *local_no, INT *local_o_start
)
{
/* get initial and final data decomposition */
local_size_transposed(rnk_n, ni, no, iblock, oblock,
rnk_pm, coords_pm, trafo_flag, transp_flag,
local_ni, local_i_start, local_no, local_o_start);
/* Overwrite physical size of r2c input,
* since PFFT user interface returns number of real inputs.
* Distinguish the padded and non-padded case. */
if( trafo_flag & PFFTI_TRAFO_R2C ){
if( trafo_flag & PFFTI_TRAFO_PADDED )
local_ni[rnk_n-1] *= 2;
else
local_ni[rnk_n-1] = ni[rnk_n-1];
}
/* Overwrite physical size of c2r output,
* since PFFT user interface returns number of real outputs.
* Distinguish the padded and non-padded case. */
if( trafo_flag & PFFTI_TRAFO_C2R ){
if( trafo_flag & PFFTI_TRAFO_PADDED )
local_no[rnk_n-1] *= 2;
else
local_no[rnk_n-1] = no[rnk_n-1];
}
}
INT PX(local_size_partrafo_transposed)(
int rnk_n, const INT *n, const INT *ni, const INT *no,
INT howmany, const INT *iblock, const INT *oblock,
int rnk_pm, MPI_Comm *comms_pm,
unsigned transp_flag, const unsigned *trafo_flags,
INT *local_ni, INT *local_i_start,
INT *local_no, INT *local_o_start
)
{
unsigned trafo_flag = trafo_flags[rnk_pm];
INT mem=1, mem_tmp;
INT Nb, tuple_size, N0, N1, h0, h1, hm, blk0, blk1, N, Ni, No;
INT *pni, *pno;
X(r2r_kind) kind, *kinds=NULL;
int *coords_pm = PX(malloc_int)(rnk_pm);
PX(get_coords)(rnk_pm, comms_pm,
coords_pm);
/* get initial and final data decomposition */
local_size_transposed(rnk_n, ni, no, iblock, oblock,
rnk_pm, coords_pm, trafo_flag, transp_flag,
local_ni, local_i_start, local_no, local_o_start);
/* parameter of canonicalized trafo */
Nb = initialize_Nb(rnk_pm, local_ni, local_no, transp_flag);
tuple_size = calculate_tuple_size(
rnk_n, ni, no, rnk_pm, howmany, trafo_flag, transp_flag);
/* plan forward trafo of last dims */
mem_tmp = PX(local_size_outrafo)(
Nb, rnk_n - rnk_pm,
&n[rnk_pm], &ni[rnk_pm], &no[rnk_pm],
howmany, trafo_flag);
mem = MAX(mem, mem_tmp);
pni = PX(malloc_INT)(rnk_n);
pno = PX(malloc_INT)(rnk_n);
PX(physical_dft_size)(rnk_n, ni, trafo_flag,
pni);
PX(physical_dft_size)(rnk_n, no, trafo_flag,
pno);
/* only trafo of last dimensions is r2c or c2r */
if(trafo_flag & PFFTI_TRAFO_RDFT)
trafo_flag = PFFTI_TRAFO_C2C;
for(int t=0; t