/* * Copyright (c) 2011-2013 Michael Pippig * * This file is part of PFFT. * * PFFT is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PFFT is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with PFFT. If not, see . * */ #include "pfft.h" #include "ipfft.h" #include "util.h" #include /* memcpy, memset */ static ousam_plan_dd ousam_dd_mkplan( int rnk); static ousam_plan_1d ousam_1d_mkplan(void); static void ousam_1d_rmplan( ousam_plan_1d ths); static void execute_ousam_1d( ousam_plan_1d ths, R *planned_in, R *planned_out, R *executed_in, R *executed_out); static int illegal_params( int rnk, const INT *ni, const INT *no, unsigned ousam_flag); static ousam_plan_1d plan_ousam_1d( INT n0, INT n1i, INT n1o, INT howmany, R *in, R *out, unsigned trafo_flag, unsigned si_flag, unsigned ousam_flag); static int is_complex_data( unsigned trafo_flag, unsigned ousam_flag); static void execute_embed( ousam_plan_1d ths, R *planned_in, R *planned_out, R *executed_in, R *executed_out); static void execute_trunc( ousam_plan_1d ths, R *planned_in, R *planned_out, R *executed_in, R *executed_out); INT PX(local_size_ousam_dd)( INT nb, int rnk, const INT *ni, const INT *no, INT howmany, unsigned trafo_flag ) { INT *pni = (INT*) malloc(sizeof(INT) * (size_t) rnk); INT *pno = (INT*) malloc(sizeof(INT) * (size_t) rnk); PX(physical_dft_size)(rnk, ni, trafo_flag, pni); PX(physical_dft_size)(rnk, no, trafo_flag, pno); INT memi = nb * PX(prod_INT)(rnk, pni) * howmany; INT memo = nb * PX(prod_INT)(rnk, pno) * howmany; free(pni); free(pno); return MAX(memi, memo); } /* generalizes 'plan_ousam_1d' to multiple dimensions */ /* default: * nb x ni0 x ni1 x ... nir x hm -> nb x no0 x no1 x ... nor x hm * transposed (PFFTI_OUSAM_TRANSPOSED): * ni0 x nb x ni1 x ... nir x hm -> no0 x nb x no1 x ... nor x hm * * */ ousam_plan_dd PX(plan_ousam_dd)( INT nb, int rnk, const INT *ni, const INT *no, INT howmany, R *in, R *out, unsigned trafo_flag_user, unsigned si_flag, unsigned ousam_flag ) { INT n0, tuple_size, n1i, n1o; INT *pni, *pno; unsigned trafo_flag = trafo_flag_user; ousam_plan_dd ths; if( illegal_params(rnk, ni, no, ousam_flag) ) return NULL; ths = ousam_dd_mkplan(rnk); pni = (INT*) malloc(sizeof(INT) * (size_t) rnk); pno = (INT*) malloc(sizeof(INT) * (size_t) rnk); PX(physical_dft_size)(rnk, ni, trafo_flag, pni); PX(physical_dft_size)(rnk, no, trafo_flag, pno); /* Index 'r' determines the order of plan execution. * Index 't' determines the dimensional order 1d embed/trunc. * Note that 't' runs backward for embed and forward for trunc. */ for(int r=0; rousam_1d[r] = plan_ousam_1d( n0, n1i, n1o, tuple_size, in, out, trafo_flag, si_flag, ousam_flag); } free(pni); free(pno); return ths; } static int illegal_params( int rnk, const INT *ni, const INT *no, unsigned ousam_flag ) { if( ousam_flag & PFFTI_OUSAM_EMBED ){ /* check ni < no for all dims */ for(int t=0; t no[t] ) return 1; } else if( ousam_flag & PFFTI_OUSAM_TRUNC ){ /* check ni > no for all dims */ for(int t=0; t n0 x n1o x h */ /* case n1i < n1o: * map array of size n0 x n1i x h into bigger array * of size n0 x n1o x h and fill the gaps with zeros */ /* case n1i > n1o: * truncate array of size n0 x n1i x h to smaller array * n0 x n1o x h and put it contiguous into memory */ static ousam_plan_1d plan_ousam_1d( INT n0, INT n1i, INT n1o, INT howmany, R *in, R *out, unsigned trafo_flag, unsigned si_flag, unsigned ousam_flag ) { INT pni, pno; ousam_plan_1d ths; /* Return empty plan if nothing to do and inplace. * Attention: ousam generates the padding for r2c and c2r trafos also for n1i == n1o. */ if( (n1i == n1o) && (in == out) ) if( !work_on_r2c_input(trafo_flag, ousam_flag) ) if( !work_on_c2r_output(trafo_flag, ousam_flag) ) return NULL; ths = ousam_1d_mkplan(); pni = PX(physical_dft_size_1d)(n1i, trafo_flag); pno = PX(physical_dft_size_1d)(n1o, trafo_flag); /* double 'howmany' for complex data type */ if( is_complex_data(trafo_flag, ousam_flag) ) howmany *= 2; ths->N0 = n0; ths->N1i = n1i; ths->N1o = n1o; /* default: no padding (only necessary for r2c and c2r) */ ths->Pi = 0; ths->Po = 0; if(ousam_flag & PFFTI_OUSAM_EMBED) { ths->Zl = 0; ths->D = ths->N1i; ths->Zr = ths->N1o - ths->N1i; if(si_flag & PFFT_SHIFTED_IN) ths->Zl = ths->Zr = ths->Zr/2; } if(ousam_flag & PFFTI_OUSAM_TRUNC) { ths->Zl = 0; ths->D = ths->N1o; ths->Zr = ths->N1i - ths->N1o; if(si_flag & PFFT_SHIFTED_OUT) ths->Zl = ths->Zr = ths->Zr/2; } /* special case r2c */ if( trafo_flag & PFFTI_TRAFO_R2C ) { /* use physical array size for r2c output */ if( ousam_flag & PFFTI_OUSAM_TRUNC ) { ths->N1i = pni; ths->N1o = pno; ths->Zl = pni - pno; ths->D = pno; ths->Zr = 0; } /* generate padding for r2c input */ if( ousam_flag & PFFTI_OUSAM_EMBED ){ ths->Zl = 0; ths->D = n1i; ths->Zr = n1o - n1i; ths->Po = 2*pno-n1o; ths->N1o = 2*pno; /* skip padding generation if the inputs are already padded */ if( trafo_flag & PFFTI_TRAFO_PADDED ){ ths->Pi = 2*pni-n1i; ths->N1i = 2*pni; } if( si_flag & PFFT_SHIFTED_IN ) ths->Zl = ths->Zr = ths->Zr/2; } } /* special case c2r */ if( trafo_flag & PFFTI_TRAFO_C2R ) { /* use physical array size for c2r input */ if( ousam_flag & PFFTI_OUSAM_EMBED ) { ths->N1i = pni; ths->N1o = pno; ths->Zl = pno - pni; ths->D = pni; ths->Zr = 0; } /* truncate padding for c2r output */ if( ousam_flag & PFFTI_OUSAM_TRUNC ){ ths->Zl = 0; ths->D = n1o; ths->Zr = n1i - n1o; ths->Pi = 2*pni-n1i; ths->N1i = 2*pni; /* do not truncate padding, if the user explicitly wants it */ if( trafo_flag & PFFTI_TRAFO_PADDED ){ ths->Po = 2*pno-n1o; ths->N1o = 2*pno; } if( si_flag & PFFT_SHIFTED_OUT ) ths->Zl = ths->Zr = ths->Zr/2; } } /* include howmany into parameters */ ths->N1i *= howmany; ths->N1o *= howmany; ths->Zl *= howmany; ths->D *= howmany; ths->Zr *= howmany; ths->Pi *= howmany; ths->Po *= howmany; ths->in = in; ths->out = out; ths->ousam_flag = ousam_flag; return ths; } static int is_complex_data( unsigned trafo_flag, unsigned ousam_flag ) { if( trafo_flag & PFFTI_TRAFO_C2C ) return 1; if( trafo_flag & PFFTI_TRAFO_R2C) if( ousam_flag & PFFTI_OUSAM_TRUNC ) return 1; if( trafo_flag & PFFTI_TRAFO_C2R ) if( ousam_flag & PFFTI_OUSAM_EMBED ) return 1; return 0; } void PX(execute_ousam_dd)( ousam_plan_dd ths, R *planned_in, R *planned_out, R *executed_in, R *executed_out ) { if(ths == NULL) return; for(int t=0; trnk; t++) execute_ousam_1d(ths->ousam_1d[t], planned_in, planned_out, executed_in, executed_out); } static void execute_ousam_1d( ousam_plan_1d ths, R *planned_in, R *planned_out, R *executed_in, R *executed_out ) { if(ths == NULL) return; if( ths->ousam_flag & PFFTI_OUSAM_EMBED ) execute_embed(ths, planned_in, planned_out, executed_in, executed_out); else execute_trunc(ths, planned_in, planned_out, executed_in, executed_out); } /* oversampling in one local dimension: * split the array at Cl and add Cm zeros in the gap */ static void execute_embed( ousam_plan_1d ths, R *planned_in, R *planned_out, R *executed_in, R *executed_out ) { INT mi, mo, k0, k1; const INT N0 = ths->N0, N1i = ths->N1i, N1o = ths->N1o; const INT Zl = ths->Zl, Zr = ths->Zr; const INT D = ths->D; const INT Pi = ths->Pi, Po = ths->Po; R *in = (ths->in == planned_in) ? executed_in : executed_out; R *out = (ths->out == planned_out) ? executed_out : executed_in; if( (Zl == 0) && (Zr == 0) && (Pi == 0) && (Po == 0) ){ if( in != out ) if(N0*N1i > 0) memcpy(in, out, sizeof(R) * (size_t) (N0*N1i)); return; } if( in != out ){ mi = mo = 0; for(k0 = 0; k0 < N0; k0++){ /* fill in left zero block */ if(Zl > 0) memset(&out[mo], 0, sizeof(R) * (size_t) Zl); mo += Zl; /* copy data block */ for(k1 = 0; k1 < D; k1++) out[mo++] = in[mi++]; /* fill in right zero block */ if(Zr > 0) memset(&out[mo], 0, sizeof(R) * (size_t) Zr); mo += Zr; /* padding for r2c/c2r */ mi += Pi; mo += Po; } } else { /* copy from last to first element to allow inplace execute */ mi = N0 * N1i - 1; mo = N0 * N1o - 1; for(k0 = 0; k0 < N0; k0++){ /* padding for r2c/c2r */ mi -= Pi; mo -= Po; /* fill in right zero block */ mo -= Zr; if(Zr > 0) memset(&out[mo+1], 0, sizeof(R) * (size_t) Zr); /* special case: N0 == 1 and Zl == 0 * left data block stays where it is */ if( (N0 != 1) || (Zl != 0) ){ /* copy data block */ for(k1 = 0; k1 < D; k1++) out[mo--] = in[mi--]; /* fill in left zero block */ mo -= Zl; if(Zl > 0) memset(&out[mo+1], 0, sizeof(R) * (size_t) Zl); } } } } /* undersampling in one local dimension: * skip Zl coefficients starting at 0 and Zr coefficients starting at D */ static void execute_trunc( ousam_plan_1d ths, R *planned_in, R *planned_out, R *executed_in, R *executed_out ) { INT mi, mo, k0, k1; const INT N0 = ths->N0, N1i = ths->N1i; const INT Zl = ths->Zl, Zr = ths->Zr; const INT D = ths->D; const INT Pi = ths->Pi, Po = ths->Po; R *in = (ths->in == planned_in) ? executed_in : executed_out; R *out = (ths->out == planned_out) ? executed_out : executed_in; if( (Zl == 0) && (Zr == 0) && (Pi == 0) && (Po == 0) ){ if( in != out ) if(N0*N1i > 0) memcpy(in, out, sizeof(R) * (size_t) (N0*N1i)); return; } mi = mo = 0; if( (in == out) && (N0 == 1) && (Zl == 0) ){ /* special case: (N0 == 1) and (Zl == 0) and inplace * data block stays where it is */ mi += D; mo += D; /* skip right zero block */ mi += Zr; } else { for(k0 = 0; k0 < N0; k0++){ /* skip left zero block */ mi += Zl; /* copy data block */ for(k1 = 0; k1 < D; k1++) out[mo++] = in[mi++]; /* skip right zero block */ mi += Zr; /* skip padding for inplace r2c */ mi += Pi; mo += Po; } } } static ousam_plan_dd ousam_dd_mkplan( int rnk ) { ousam_plan_dd ths = (ousam_plan_dd) malloc(sizeof(ousam_plan_dd_s)); ths->ousam_1d = (ousam_plan_1d*) malloc(sizeof(ousam_plan_1d) * (size_t)rnk); ths->rnk = rnk; return ths; } static ousam_plan_1d ousam_1d_mkplan(void) { return (ousam_plan_1d) malloc(sizeof(ousam_plan_1d_s)); } void PX(ousam_dd_rmplan)( ousam_plan_dd ths ) { /* plan was already destroyed or never initialized */ if(ths==NULL) return; /* destroy all ousam plans and free the array of pointers */ if(ths->ousam_1d != NULL){ for(int t=0; trnk; t++) ousam_1d_rmplan(ths->ousam_1d[t]); free(ths->ousam_1d); } /* free struct */ free(ths); } static void ousam_1d_rmplan( ousam_plan_1d ths ) { /* plan was already destroyed or never initialized */ if(ths==NULL) return; free(ths); }