/* * Copyright (c) 2010-2013 Michael Pippig * * This file is part of PFFT. * * PFFT is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PFFT is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with PFFT. If not, see . * */ #include "pfft.h" #include "ipfft.h" #include "util.h" static INT initialize_Nb( int rnk_pm, const INT *local_ni, const INT *local_no, unsigned transp_flag); static INT calculate_tuple_size( int rnk_n, const INT *ni, const INT *no, int rnk_pm, INT howmany, unsigned trafo_flag, unsigned transp_flag); static void local_size_transposed( int rnk_n, const INT *ni, const INT *no, const INT *iblock, const INT *oblock, int rnk_pm, const int *coords_pm, unsigned trafo_flag, unsigned transp_flag, INT *local_ni, INT *local_i_start, INT *local_no, INT *local_o_start); static void decompose_nontransposed( int rnk_n, const INT *n, const INT *blk, int rnk_pm, const int *coords_pm, unsigned trafo_flag, INT *local_n, INT *local_start); static void decompose_transposed( int rnk_n, const INT *n, const INT *blk, int rnk_pm, const int *coords_pm, unsigned trafo_flag, INT *local_n, INT *local_start); /* call these routines with transp_flag * PFFT_TRANSPOSED_IN or PFFT_TRANSPOSED_OUT */ void PX(local_block_partrafo_transposed)( int rnk_n, const INT *ni, const INT *no, const INT *iblock, const INT *oblock, int rnk_pm, int *coords_pm, unsigned transp_flag, unsigned trafo_flag, INT *local_ni, INT *local_i_start, INT *local_no, INT *local_o_start ) { /* get initial and final data decomposition */ local_size_transposed(rnk_n, ni, no, iblock, oblock, rnk_pm, coords_pm, trafo_flag, transp_flag, local_ni, local_i_start, local_no, local_o_start); /* Overwrite physical size of r2c input, * since PFFT user interface returns number of real inputs. * Distinguish the padded and non-padded case. */ if( trafo_flag & PFFTI_TRAFO_R2C ){ if( trafo_flag & PFFTI_TRAFO_PADDED ) local_ni[rnk_n-1] *= 2; else local_ni[rnk_n-1] = ni[rnk_n-1]; } /* Overwrite physical size of c2r output, * since PFFT user interface returns number of real outputs. * Distinguish the padded and non-padded case. */ if( trafo_flag & PFFTI_TRAFO_C2R ){ if( trafo_flag & PFFTI_TRAFO_PADDED ) local_no[rnk_n-1] *= 2; else local_no[rnk_n-1] = no[rnk_n-1]; } } INT PX(local_size_partrafo_transposed)( int rnk_n, const INT *n, const INT *ni, const INT *no, INT howmany, const INT *iblock, const INT *oblock, int rnk_pm, MPI_Comm *comms_pm, unsigned transp_flag, const unsigned *trafo_flags, INT *local_ni, INT *local_i_start, INT *local_no, INT *local_o_start ) { unsigned trafo_flag = trafo_flags[rnk_pm]; INT mem=1, mem_tmp; INT Nb, tuple_size, N0, N1, h0, h1, hm, blk0, blk1, N, Ni, No; INT *pni, *pno; X(r2r_kind) kind, *kinds=NULL; int *coords_pm = PX(malloc_int)(rnk_pm); PX(get_coords)(rnk_pm, comms_pm, coords_pm); /* get initial and final data decomposition */ local_size_transposed(rnk_n, ni, no, iblock, oblock, rnk_pm, coords_pm, trafo_flag, transp_flag, local_ni, local_i_start, local_no, local_o_start); /* parameter of canonicalized trafo */ Nb = initialize_Nb(rnk_pm, local_ni, local_no, transp_flag); tuple_size = calculate_tuple_size( rnk_n, ni, no, rnk_pm, howmany, trafo_flag, transp_flag); /* plan forward trafo of last dims */ mem_tmp = PX(local_size_outrafo)( Nb, rnk_n - rnk_pm, &n[rnk_pm], &ni[rnk_pm], &no[rnk_pm], howmany, trafo_flag); mem = MAX(mem, mem_tmp); pni = PX(malloc_INT)(rnk_n); pno = PX(malloc_INT)(rnk_n); PX(physical_dft_size)(rnk_n, ni, trafo_flag, pni); PX(physical_dft_size)(rnk_n, no, trafo_flag, pno); /* only trafo of last dimensions is r2c or c2r */ if(trafo_flag & PFFTI_TRAFO_RDFT) trafo_flag = PFFTI_TRAFO_C2C; for(int t=0; t