Unverified Commit 795f2fd8 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer
Browse files

properly support threaded FFTs with MKL and document it. remove commented out FFTW2 code

parent 4134d7fd
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -739,7 +739,7 @@ if(PKG_KSPACE)
  else()
    message(STATUS "Using double precision FFTs")
  endif()
  if(FFT_FFTW_THREADS)
  if(FFT_FFTW_THREADS OR FFT_MKL_THREADS)
    message(STATUS "Using threaded FFTs")
  else()
    message(STATUS "Using non-threaded FFTs")
+5 −1
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@ if(PKG_KSPACE)
    include_directories(${${FFTW}_INCLUDE_DIRS})
    list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES})
    if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
      option(FFT_FFTW_THREADS "Use threaded FFT library" ON)
      option(FFT_FFTW_THREADS "Use threaded FFTW library" ON)
    else()
      option(FFT_FFTW_THREADS "Use threaded FFT library" OFF)
    endif()
@@ -38,6 +38,10 @@ if(PKG_KSPACE)
  elseif(FFT STREQUAL "MKL")
    find_package(MKL REQUIRED)
    add_definitions(-DFFT_MKL)
    option(FFT_MKL_THREADS "Use threaded MKL FFT" ON)
    if(FFT_MKL_THREADS)
      add_definitions(-DFFT_MKL_THREADS)
    endif()
    include_directories(${MKL_INCLUDE_DIRS})
    list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES})
  else()
+8 −4
Original line number Diff line number Diff line
@@ -106,6 +106,7 @@ to assist:
   -D FFTW3_LIBRARIES=path     # path to FFTW3 libraries
   -D FFT_FFTW_THREADS=on      # enable using threaded FFTW3 libraries
   -D MKL_INCLUDE_DIRS=path    # ditto for Intel MKL library
   -D FFT_MKL_THREADS=on       # enable using threaded FFTs with MKL libraries
   -D MKL_LIBRARIES=path

**Makefile.machine settings**\ :
@@ -117,6 +118,7 @@ to assist:
                                 # default is KISS if not specified
   FFT_INC = -DFFT_SINGLE        # do not specify for double precision
   FFT_INC = -DFFT_FFTW_THREADS  # enable using threaded FFTW3 libraries
   FFT_INC = -DFFT_MKL_THREADS   # enable using threaded FFTs with MKL libraries
   FFT_INC = -DFFT_PACK_ARRAY    # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY

# default is FFT\_PACK\_ARRAY if not specified
@@ -129,12 +131,14 @@ to assist:
   FFT_LIB =       -lfftw3             # FFTW3 double precision
   FFT_LIB =       -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS)
   FFT_LIB =       -lfftw3 -lfftw3f    # FFTW3 single precision
   FFT_LIB =       -lmkl_intel_lp64 -lmkl_sequential -lmkl_core  # MKL with Intel compiler
   FFT_LIB =       -lmkl_gf_lp64 -lmkl_sequential -lmkl_core     # MKL with GNU compier
   FFT_LIB =       -lmkl_intel_lp64 -lmkl_sequential -lmkl_core   # MKL with Intel compiler, serial interface
   FFT_LIB =       -lmkl_gf_lp64 -lmkl_sequential -lmkl_core      # MKL with GNU compier, serial interface
   FFT_LIB =       -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core # MKL with Intel compiler, threaded interface
   FFT_LIB =       -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core      # MKL with GNU compiler, threaded interface

As with CMake, you do not need to set paths in FFT\_INC or FFT\_PATH, if
make can find the FFT header and library files.  You must specify
FFT\_LIB with the appropriate FFT libraries to include in the link.
the compiler can find the FFT header and library files in its default search path.
You must specify FFT\_LIB with the appropriate FFT libraries to include in the link.

**CMake and make info**\ :

+38 −13
Original line number Diff line number Diff line
@@ -147,12 +147,10 @@ public:

  KOKKOS_INLINE_FUNCTION
  void operator() (const int &i) const {
#if defined(FFT_FFTW3) || defined(FFT_CUFFT)
#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_MKL)
    FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i);
    *(out_ptr++) *= norm;
    *(out_ptr++) *= norm;
#elif defined(FFT_MKL)
    d_out(i) *= norm;
#else
    d_out(i,0) *= norm;
    d_out(i,1) *= norm;
@@ -607,7 +605,9 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
  DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE);
  DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast);
  DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast);
  //DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#if defined(FFT_MKL_THREADS)
  DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
  DftiCommitDescriptor(plan->handle_fast);

  DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1,
@@ -617,7 +617,9 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
  DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE);
  DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid);
  DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid);
  //DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#if defined(FFT_MKL_THREADS)
  DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
  DftiCommitDescriptor(plan->handle_mid);

  DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1,
@@ -627,7 +629,9 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
  DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE);
  DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow);
  DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow);
  //DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#if defined(FFT_MKL_THREADS)
  DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
  DftiCommitDescriptor(plan->handle_slow);

  if (scaled == 0)
@@ -684,6 +688,16 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
                       NULL,&nslow,1,plan->length3,
                       NULL,&nslow,1,plan->length3,
                       FFTW_BACKWARD,FFTW_ESTIMATE);

  if (scaled == 0)
    plan->scaled = 0;
  else {
    plan->scaled = 1;
    plan->norm = 1.0/(nfast*nmid*nslow);
    plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
      (out_khi-out_klo+1);
  }

#elif defined(FFT_CUFFT)
  cufftPlanMany(&(plan->plan_fast), 1, &nfast,
    &nfast,1,plan->length1,
@@ -699,6 +713,16 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
    &nslow,1,plan->length3,
    &nslow,1,plan->length3,
    CUFFT_TYPE,plan->total3/plan->length3);

  if (scaled == 0)
    plan->scaled = 0;
  else {
    plan->scaled = 1;
    plan->norm = 1.0/(nfast*nmid*nslow);
    plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
      (out_khi-out_klo+1);
  }

#else
  kissfftKK = new KissFFTKokkos<DeviceType>();

@@ -726,7 +750,6 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
    plan->cfg_slow_forward = KissFFTKokkos<DeviceType>::kiss_fft_alloc_kokkos(nslow,0,NULL,NULL);
    plan->cfg_slow_backward = KissFFTKokkos<DeviceType>::kiss_fft_alloc_kokkos(nslow,1,NULL,NULL);
  }
#endif

  if (scaled == 0)
    plan->scaled = 0;
@@ -737,6 +760,8 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
      (out_khi-out_klo+1);
  }

#endif

  return plan;
}

@@ -839,13 +864,13 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d

#if defined(FFT_MKL)
  if (flag == -1) {
    DftiComputeForward(plan->handle_fast,data);
    DftiComputeForward(plan->handle_mid,data);
    DftiComputeForward(plan->handle_slow,data);
    DftiComputeForward(plan->handle_fast,(FFT_DATA*)d_data.data());
    DftiComputeForward(plan->handle_mid,(FFT_DATA*)d_data.data());
    DftiComputeForward(plan->handle_slow,(FFT_DATA *)d_data.data());
  } else {
    DftiComputeBackward(plan->handle_fast,data);
    DftiComputeBackward(plan->handle_mid,data);
    DftiComputeBackward(plan->handle_slow,data);
    DftiComputeBackward(plan->handle_fast,(FFT_DATA*)d_data.data());
    DftiComputeBackward(plan->handle_mid,(FFT_DATA*)d_data.data());
    DftiComputeBackward(plan->handle_slow,(FFT_DATA*)d_data.data());
  }
#elif defined(FFT_FFTW3)
  if (flag == -1) {
+9 −91
Original line number Diff line number Diff line
@@ -107,13 +107,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
    DftiComputeForward(plan->handle_fast,data);
  else
    DftiComputeBackward(plan->handle_fast,data);
  /*
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0);
  else
   fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0);
  */
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_fast_forward;
@@ -148,13 +141,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
    DftiComputeForward(plan->handle_mid,data);
  else
    DftiComputeBackward(plan->handle_mid,data);
  /*
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0);
  */
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_mid_forward;
@@ -189,13 +175,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
    DftiComputeForward(plan->handle_slow,data);
  else
    DftiComputeBackward(plan->handle_slow,data);
  /*
#elif defined(FFT_FFTW2)
  if (flag == -1)
    fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0);
  else
    fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0);
  */
#elif defined(FFT_FFTW3)
  if (flag == -1)
    theplan=plan->plan_slow_forward;
@@ -508,6 +487,9 @@ struct fft_plan_3d *fft_3d_create_plan(
  DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE);
  DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast);
  DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast);
#if defined(FFT_MKL_THREADS)
  DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
  DftiCommitDescriptor(plan->handle_fast);

  DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1,
@@ -517,6 +499,9 @@ struct fft_plan_3d *fft_3d_create_plan(
  DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE);
  DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid);
  DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid);
#if defined(FFT_MKL_THREADS)
  DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
  DftiCommitDescriptor(plan->handle_mid);

  DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1,
@@ -526,6 +511,9 @@ struct fft_plan_3d *fft_3d_create_plan(
  DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE);
  DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow);
  DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow);
#if defined(FFT_MKL_THREADS)
  DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
  DftiCommitDescriptor(plan->handle_slow);

  if (scaled == 0)
@@ -537,50 +525,6 @@ struct fft_plan_3d *fft_3d_create_plan(
      (out_khi-out_klo+1);
  }

  /*
#elif defined(FFT_FFTW2)

  plan->plan_fast_forward =
    fftw_create_plan(nfast,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
  plan->plan_fast_backward =
    fftw_create_plan(nfast,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);

  if (nmid == nfast) {
    plan->plan_mid_forward = plan->plan_fast_forward;
    plan->plan_mid_backward = plan->plan_fast_backward;
  }
  else {
    plan->plan_mid_forward =
      fftw_create_plan(nmid,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
    plan->plan_mid_backward =
      fftw_create_plan(nmid,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
  }

  if (nslow == nfast) {
    plan->plan_slow_forward = plan->plan_fast_forward;
    plan->plan_slow_backward = plan->plan_fast_backward;
  }
  else if (nslow == nmid) {
    plan->plan_slow_forward = plan->plan_mid_forward;
    plan->plan_slow_backward = plan->plan_mid_backward;
  }
  else {
    plan->plan_slow_forward =
      fftw_create_plan(nslow,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
    plan->plan_slow_backward =
      fftw_create_plan(nslow,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
  }

  if (scaled == 0)
    plan->scaled = 0;
  else {
    plan->scaled = 1;
    plan->norm = 1.0/(nfast*nmid*nslow);
    plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
      (out_khi-out_klo+1);
  }
  */

#elif defined(FFT_FFTW3)
#if defined(FFT_FFTW_THREADS)
  if (nthreads > 1) {
@@ -686,20 +630,6 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan)
  DftiFreeDescriptor(&(plan->handle_fast));
  DftiFreeDescriptor(&(plan->handle_mid));
  DftiFreeDescriptor(&(plan->handle_slow));
  /*
#elif defined(FFT_FFTW2)
  if (plan->plan_slow_forward != plan->plan_fast_forward &&
      plan->plan_slow_forward != plan->plan_mid_forward) {
    fftw_destroy_plan(plan->plan_slow_forward);
    fftw_destroy_plan(plan->plan_slow_backward);
  }
  if (plan->plan_mid_forward != plan->plan_fast_forward) {
    fftw_destroy_plan(plan->plan_mid_forward);
    fftw_destroy_plan(plan->plan_mid_backward);
  }
  fftw_destroy_plan(plan->plan_fast_forward);
  fftw_destroy_plan(plan->plan_fast_backward);
  */
#elif defined(FFT_FFTW3)
  FFTW_API(destroy_plan)(plan->plan_slow_forward);
  FFTW_API(destroy_plan)(plan->plan_slow_backward);
@@ -840,18 +770,6 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
    DftiComputeBackward(plan->handle_mid,data);
    DftiComputeBackward(plan->handle_slow,data);
  }
  /*
#elif defined(FFT_FFTW2)
  if (flag == -1) {
    fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0);
    fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0);
    fftw(plan->plan_slow_forward,total3/length3,data,1,0,NULL,0,0);
  } else {
    fftw(plan->plan_fast_backward,total1/length1,data,1,0,NULL,0,0);
    fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0);
    fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0);
  }
  */
#elif defined(FFT_FFTW3)
  FFTW_API(plan) theplan;
  if (flag == -1)
Loading