Commit 1851a9f7 authored by Stan Gerald Moore's avatar Stan Gerald Moore
Browse files

Refactor Kokkos FFT_DATA

parent 0d0c5e29
Loading
Loading
Loading
Loading
+48 −46
Original line number Diff line number Diff line
@@ -88,8 +88,8 @@ FFT3dKokkos<DeviceType>::~FFT3dKokkos()
template<class DeviceType>
void FFT3dKokkos<DeviceType>::compute(typename AT::t_FFT_SCALAR_1d d_in, typename AT::t_FFT_SCALAR_1d d_out, int flag)
{
  typename AT::t_FFT_DATA_1d d_in_data(d_in.data(),d_in.size()/2);
  typename AT::t_FFT_DATA_1d d_out_data(d_out.data(),d_out.size()/2);
  typename AT::t_FFT_DATA_1d d_in_data((FFT_DATA*)d_in.data(),d_in.size()/2);
  typename AT::t_FFT_DATA_1d d_out_data((FFT_DATA*)d_out.data(),d_out.size()/2);

  fft_3d_kokkos(d_in_data,d_out_data,flag,plan);
}
@@ -99,7 +99,7 @@ void FFT3dKokkos<DeviceType>::compute(typename AT::t_FFT_SCALAR_1d d_in, typenam
template<class DeviceType>
void FFT3dKokkos<DeviceType>::timing1d(typename AT::t_FFT_SCALAR_1d d_in, int nsize, int flag)
{
  typename AT::t_FFT_DATA_1d d_in_data(d_in.data(),d_in.size());
  typename AT::t_FFT_DATA_1d d_in_data((FFT_DATA*)d_in.data(),d_in.size()/2);

  fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan);
}
@@ -151,7 +151,9 @@ public:
    FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i);
    *(out_ptr++) *= norm;
    *(out_ptr++) *= norm;
#else    /* FFT_MKL or FFT_KISS */
#elif defined(FFT_MKL)
    d_out(i) *= norm;
#else // FFT_KISS
    d_out(i,0) *= norm;
    d_out(i,1) *= norm;
#endif
@@ -200,9 +202,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
    if (plan->pre_target == 0) d_copy = d_out;
    else d_copy = plan->d_copy;

     d_in_scalar = typename AT::t_FFT_SCALAR_1d(d_in.data(),d_in.size());
     d_copy_scalar = typename AT::t_FFT_SCALAR_1d(d_copy.data(),d_copy.size());
     d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size());
     d_in_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2);
     d_copy_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2);
     d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2);

    remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar,
             d_scratch_scalar, plan->pre_plan);
@@ -217,16 +219,16 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ

  #if defined(FFT_MKL)
    if (flag == -1)
      DftiComputeForward(plan->handle_fast,(FFT_DATA *)d_data.data());
      DftiComputeForward(plan->handle_fast,d_data.data());
    else
      DftiComputeBackward(plan->handle_fast,(FFT_DATA *)d_data.data());
      DftiComputeBackward(plan->handle_fast,d_data.data());
  #elif defined(FFT_FFTW3)
    if (flag == -1)
      FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data());
      FFTW_API(execute_dft)(plan->plan_fast_forward,d_data.data(),d_data.data());
    else
      FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data());
      FFTW_API(execute_dft)(plan->plan_fast_backward,d_data.data(),d_data.data());
  #elif defined(FFT_CUFFT)
    cufftExec(plan->plan_fast,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data(),flag);
    cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
  #else
    typename AT::t_FFT_DATA_1d d_tmp =
     typename AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0());
@@ -247,9 +249,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
  if (plan->mid1_target == 0) d_copy = d_out;
  else d_copy = plan->d_copy;

  d_data_scalar = typename AT::t_FFT_SCALAR_1d(d_data.data(),d_data.size()*2);
  d_copy_scalar = typename AT::t_FFT_SCALAR_1d(d_copy.data(),d_copy.size()*2);
  d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size()*2);
  d_data_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2);
  d_copy_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2);
  d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2);

  remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar,
           d_scratch_scalar, plan->mid1_plan);
@@ -263,16 +265,16 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ

  #if defined(FFT_MKL)
    if (flag == -1)
      DftiComputeForward(plan->handle_mid,(FFT_DATA *)d_data.data());
      DftiComputeForward(plan->handle_mid,d_data.data());
    else
      DftiComputeBackward(plan->handle_mid,(FFT_DATA *)d_data.data());
      DftiComputeBackward(plan->handle_mid,d_data.data());
  #elif defined(FFT_FFTW3)
    if (flag == -1)
      FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data());
      FFTW_API(execute_dft)(plan->plan_mid_forward,d_data.data(),d_data.data());
    else
      FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data());
      FFTW_API(execute_dft)(plan->plan_mid_backward,d_data.data(),d_data.data());
  #elif defined(FFT_CUFFT)
    cufftExec(plan->plan_mid,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data(),flag);
    cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
  #else
    if (flag == -1)
      f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_forward,length);
@@ -289,9 +291,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
  if (plan->mid2_target == 0) d_copy = d_out;
  else d_copy = plan->d_copy;

  d_data_scalar = typename AT::t_FFT_SCALAR_1d(d_data.data(),d_data.size());
  d_copy_scalar = typename AT::t_FFT_SCALAR_1d(d_copy.data(),d_copy.size());
  d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size());
  d_data_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2);
  d_copy_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2);
  d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2);

  remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar,
           d_scratch_scalar, plan->mid2_plan);
@@ -305,16 +307,16 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ

  #if defined(FFT_MKL)
    if (flag == -1)
      DftiComputeForward(plan->handle_slow,(FFT_DATA *)d_data.data());
      DftiComputeForward(plan->handle_slow,d_data.data());
    else
      DftiComputeBackward(plan->handle_slow,(FFT_DATA *)d_data.data());
      DftiComputeBackward(plan->handle_slow,d_data.data());
  #elif defined(FFT_FFTW3)
    if (flag == -1)
      FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data());
      FFTW_API(execute_dft)(plan->plan_slow_forward,d_data.data(),d_data.data());
    else
      FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data());
      FFTW_API(execute_dft)(plan->plan_slow_backward,d_data.data(),d_data.data());
  #elif defined(FFT_CUFFT)
    cufftExec(plan->plan_slow,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data(),flag);
    cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
  #else
    if (flag == -1)
      f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_slow_forward,length);
@@ -329,9 +331,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
  // destination is always out

  if (plan->post_plan) {
    d_data_scalar = typename AT::t_FFT_SCALAR_1d(d_data.data(),d_data.size());
    d_out_scalar = typename AT::t_FFT_SCALAR_1d(d_out.data(),d_out.size());
    d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size());
    d_data_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2);
    d_out_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2);
    d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2);

    remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar,
             d_scratch_scalar, plan->post_plan);
@@ -839,28 +841,28 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d

#if defined(FFT_MKL)
  if (flag == -1) {
    DftiComputeForward(plan->handle_fast,(FFT_DATA*)d_data.data());
    DftiComputeForward(plan->handle_mid,(FFT_DATA*)d_data.data());
    DftiComputeForward(plan->handle_slow,(FFT_DATA *)d_data.data());
    DftiComputeForward(plan->handle_fast,d_data.data());
    DftiComputeForward(plan->handle_mid,d_data.data());
    DftiComputeForward(plan->handle_slow,d_data.data());
  } else {
    DftiComputeBackward(plan->handle_fast,(FFT_DATA*)d_data.data());
    DftiComputeBackward(plan->handle_mid,(FFT_DATA*)d_data.data());
    DftiComputeBackward(plan->handle_slow,(FFT_DATA*)d_data.data());
    DftiComputeBackward(plan->handle_fast,d_data.data());
    DftiComputeBackward(plan->handle_mid,d_data.data());
    DftiComputeBackward(plan->handle_slow,d_data.data());
  }
#elif defined(FFT_FFTW3)
  if (flag == -1) {
    FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
    FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
    FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
    FFTW_API(execute_dft)(plan->plan_fast_forward,d_data.data(),d_data.data());
    FFTW_API(execute_dft)(plan->plan_mid_forward,d_data.data(),d_data.data());
    FFTW_API(execute_dft)(plan->plan_slow_forward,d_data.data(),d_data.data());
  } else {
    FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
    FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
    FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
    FFTW_API(execute_dft)(plan->plan_fast_backward,d_data.data(),d_data.data());
    FFTW_API(execute_dft)(plan->plan_mid_backward,d_data.data(),d_data.data());
    FFTW_API(execute_dft)(plan->plan_slow_backward,d_data.data(),d_data.data());
  }
#elif defined(FFT_CUFFT)
  cufftExec(plan->plan_fast,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag);
  cufftExec(plan->plan_mid,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag);
  cufftExec(plan->plan_slow,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag);
  cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
  cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
  cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
#else
  kiss_fft_functor<DeviceType> f;
  typename AT::t_FFT_DATA_1d d_tmp = typename AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.dimension_0());
+0 −67
Original line number Diff line number Diff line
@@ -19,73 +19,6 @@
#include "remap_kokkos.h"
#include "fftdata_kokkos.h"

// with KOKKOS in CUDA mode we can only have
// CUFFT or KISSFFT, thus undefine all other
// FFTs here, since they may be valid in fft3d.cpp

#if defined(KOKKOS_ENABLE_CUDA)
# if defined(FFT_FFTW)
#  undef FFT_FFTW
# endif
# if defined(FFT_FFTW3)
#  undef FFT_FFTW3
# endif
# if defined(FFT_MKL)
#  undef FFT_MKL
# endif
# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT)
#  define FFT_KISSFFT
# endif
#else
# if defined(FFT_CUFFT)
#  error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT"
# endif
// if user set FFTW, it means FFTW3
# ifdef FFT_FFTW
#  define FFT_FFTW3
# endif
# ifdef FFT_FFTW_THREADS
#  if !defined(FFT_FFTW3)
#   error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS"
#  endif
# endif
#endif

#if defined(FFT_MKL)
  #include "mkl_dfti.h"
  #if defined(FFT_SINGLE)
    typedef float _Complex FFT_DATA;
    #define FFT_MKL_PREC DFTI_SINGLE
  #else
    typedef double _Complex FFT_DATA;
    #define FFT_MKL_PREC DFTI_DOUBLE
  #endif
#elif defined(FFT_FFTW3)
  #include "fftw3.h"
  #if defined(FFT_SINGLE)
    typedef fftwf_complex FFT_DATA;
    #define FFTW_API(function)  fftwf_ ## function
  #else
    typedef fftw_complex FFT_DATA;
    #define FFTW_API(function) fftw_ ## function
  #endif
#elif defined(FFT_CUFFT)
  #include "cufft.h"
  #if defined(FFT_SINGLE)
    #define cufftExec cufftExecC2C
    #define CUFFT_TYPE CUFFT_C2C
    typedef cufftComplex FFT_DATA;
  #else
    #define cufftExec cufftExecZ2Z
    #define CUFFT_TYPE CUFFT_Z2Z
    typedef cufftDoubleComplex FFT_DATA;
  #endif
#else
  #include "kissfft_kokkos.h"
  #ifndef FFT_KISSFFT
  #define FFT_KISSFFT
  #endif
#endif

namespace LAMMPS_NS {

+78 −0
Original line number Diff line number Diff line
@@ -43,4 +43,82 @@ typedef double FFT_SCALAR;
#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)"
#endif


// with KOKKOS in CUDA mode we can only have
// CUFFT or KISSFFT, thus undefine all other
// FFTs here, since they may be valid in fft3d.cpp

#if defined(KOKKOS_ENABLE_CUDA)
# if defined(FFT_FFTW)
#  undef FFT_FFTW
# endif
# if defined(FFT_FFTW3)
#  undef FFT_FFTW3
# endif
# if defined(FFT_MKL)
#  undef FFT_MKL
# endif
# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT)
#  define FFT_KISSFFT
# endif
#else
# if defined(FFT_CUFFT)
#  error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT"
# endif
// if user set FFTW, it means FFTW3
# ifdef FFT_FFTW
#  define FFT_FFTW3
# endif
# ifdef FFT_FFTW_THREADS
#  if !defined(FFT_FFTW3)
#   error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS"
#  endif
# endif
#endif

#if defined(FFT_MKL)
  #include "mkl_dfti.h"
  #if defined(FFT_SINGLE)
    typedef float _Complex FFT_DATA;
    #define FFT_MKL_PREC DFTI_SINGLE
  #else
    typedef double _Complex FFT_DATA;
    #define FFT_MKL_PREC DFTI_DOUBLE
  #endif
#elif defined(FFT_FFTW3)
  #include "fftw3.h"
  #if defined(FFT_SINGLE)
    typedef fftwf_complex FFT_DATA;
    #define FFTW_API(function)  fftwf_ ## function
  #else
    typedef fftw_complex FFT_DATA;
    #define FFTW_API(function) fftw_ ## function
  #endif
#elif defined(FFT_CUFFT)
  #include "cufft.h"
  #if defined(FFT_SINGLE)
    #define cufftExec cufftExecC2C
    #define CUFFT_TYPE CUFFT_C2C
    typedef cufftComplex FFT_DATA;
  #else
    #define cufftExec cufftExecZ2Z
    #define CUFFT_TYPE CUFFT_Z2Z
    typedef cufftDoubleComplex FFT_DATA;
  #endif
#else
  #include "kissfft_kokkos.h"
  #if defined(FFT_SINGLE)
    #define kiss_fft_scalar float
  #else
    #define kiss_fft_scalar double
    typedef struct {
        kiss_fft_scalar re;
        kiss_fft_scalar im;
    } FFT_DATA;
  #endif
  #ifndef FFT_KISSFFT
  #define FFT_KISSFFT
  #endif
#endif

#endif
+3 −17
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#define LMP_LMPTYPE_KOKKOS_H

#include "pointers.h"
#include "fftdata_kokkos.h"

#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
@@ -29,21 +30,6 @@ enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u};
#define ISFINITE(x) std::isfinite(x)
#endif

// User-settable FFT precision

// FFT_PRECISION = 1 is single-precision complex (4-byte real, 4-byte imag)
// FFT_PRECISION = 2 is double-precision complex (8-byte real, 8-byte imag)

#ifdef FFT_SINGLE
#define FFT_PRECISION 1
#define MPI_FFT_SCALAR MPI_FLOAT
typedef float FFT_SCALAR;
#else
#define FFT_PRECISION 2
#define MPI_FFT_SCALAR MPI_DOUBLE
typedef double FFT_SCALAR;
#endif

#define MAX_TYPES_STACKPARAMS 12
#define NeighClusterSize 8

@@ -769,7 +755,7 @@ typedef Kokkos::DualView<FFT_SCALAR***,Kokkos::LayoutRight,LMPDeviceType> tdual_
typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d;

typedef Kokkos::
  DualView<FFT_SCALAR*[2], Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
  DualView<FFT_DATA*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d;
typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um;

@@ -1030,7 +1016,7 @@ typedef Kokkos::DualView<FFT_SCALAR***,Kokkos::LayoutRight,LMPDeviceType> tdual_
typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d;

typedef Kokkos::
  DualView<FFT_SCALAR*[2], Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
  DualView<FFT_DATA*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d;
typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um;