Unverified Commit 968ac3d8 authored by Axel Kohlmeyer's avatar Axel Kohlmeyer Committed by GitHub
Browse files

Merge pull request #1367 from mkanski/reax_better_errors

Removing calls to MPI_Abort from reax/c + a fix for a memory leak
parents f73ccc30 6ff1fee6
Loading
Loading
Loading
Loading
+33 −41
Original line number Diff line number Diff line
@@ -447,23 +447,23 @@ int PairReaxCKokkos<DeviceType>::Init_Lookup_Tables()
  num_atom_types = atom->ntypes;
  dr = control->nonb_cut / control->tabulate;
  h = (double*)
    smalloc( (control->tabulate+2) * sizeof(double), "lookup:h", world );
    smalloc( control->error_ptr, (control->tabulate+2) * sizeof(double), "lookup:h");
  fh = (double*)
    smalloc( (control->tabulate+2) * sizeof(double), "lookup:fh", world );
    smalloc( control->error_ptr, (control->tabulate+2) * sizeof(double), "lookup:fh");
  fvdw = (double*)
    smalloc( (control->tabulate+2) * sizeof(double), "lookup:fvdw", world );
    smalloc( control->error_ptr, (control->tabulate+2) * sizeof(double), "lookup:fvdw");
  fCEvd = (double*)
    smalloc( (control->tabulate+2) * sizeof(double), "lookup:fCEvd", world );
    smalloc( control->error_ptr, (control->tabulate+2) * sizeof(double), "lookup:fCEvd");
  fele = (double*)
    smalloc( (control->tabulate+2) * sizeof(double), "lookup:fele", world );
    smalloc( control->error_ptr, (control->tabulate+2) * sizeof(double), "lookup:fele");
  fCEclmb = (double*)
    smalloc( (control->tabulate+2) * sizeof(double), "lookup:fCEclmb", world );
    smalloc( control->error_ptr, (control->tabulate+2) * sizeof(double), "lookup:fCEclmb");

  LR = (LR_lookup_table**)
    scalloc( num_atom_types+1, sizeof(LR_lookup_table*), "lookup:LR", world );
    scalloc( control->error_ptr, num_atom_types+1, sizeof(LR_lookup_table*), "lookup:LR");
  for( i = 0; i < num_atom_types+1; ++i )
    LR[i] = (LR_lookup_table*)
      scalloc( num_atom_types+1, sizeof(LR_lookup_table), "lookup:LR[i]", world );
      scalloc( control->error_ptr, num_atom_types+1, sizeof(LR_lookup_table), "lookup:LR[i]");

  for( i = 1; i <= num_atom_types; ++i ) {
    for( j = i; j <= num_atom_types; ++j ) {
@@ -473,22 +473,18 @@ int PairReaxCKokkos<DeviceType>::Init_Lookup_Tables()
      LR[i][j].dx = dr;
      LR[i][j].inv_dx = control->tabulate / control->nonb_cut;
      LR[i][j].y = (LR_data*)
        smalloc( LR[i][j].n * sizeof(LR_data), "lookup:LR[i,j].y", world );
        smalloc( control->error_ptr, LR[i][j].n * sizeof(LR_data), "lookup:LR[i,j].y");
      LR[i][j].H = (cubic_spline_coef*)
        smalloc( LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].H" ,
                 world );
        smalloc( control->error_ptr, LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].H");
      LR[i][j].vdW = (cubic_spline_coef*)
        smalloc( LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].vdW",
                 world);
        smalloc( control->error_ptr, LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].vdW");
      LR[i][j].CEvd = (cubic_spline_coef*)
        smalloc( LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].CEvd",
                 world);
        smalloc( control->error_ptr, LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].CEvd");
      LR[i][j].ele = (cubic_spline_coef*)
        smalloc( LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].ele",
                 world );
        smalloc( control->error_ptr, LR[i][j].n*sizeof(cubic_spline_coef),"lookup:LR[i,j].ele");
      LR[i][j].CEclmb = (cubic_spline_coef*)
        smalloc( LR[i][j].n*sizeof(cubic_spline_coef),
                 "lookup:LR[i,j].CEclmb", world );
        smalloc( control->error_ptr, LR[i][j].n*sizeof(cubic_spline_coef),
                 "lookup:LR[i,j].CEclmb");

      for( r = 1; r <= control->tabulate; ++r ) {
        LR_vdW_Coulomb(i, j, r * dr, &(LR[i][j].y[r]) );
@@ -512,24 +508,20 @@ int PairReaxCKokkos<DeviceType>::Init_Lookup_Tables()
      vlast_vdw = fCEvd[r-1];
      vlast_ele = fele[r-1];

      Natural_Cubic_Spline( &h[1], &fh[1],
                            &(LR[i][j].H[1]), control->tabulate+1, world );
      Natural_Cubic_Spline( control->error_ptr, &h[1], &fh[1],
                            &(LR[i][j].H[1]), control->tabulate+1 );

      Complete_Cubic_Spline( &h[1], &fvdw[1], v0_vdw, vlast_vdw,
                             &(LR[i][j].vdW[1]), control->tabulate+1,
                             world );
      Complete_Cubic_Spline( control->error_ptr, &h[1], &fvdw[1], v0_vdw, vlast_vdw,
                             &(LR[i][j].vdW[1]), control->tabulate+1 );

      Natural_Cubic_Spline( &h[1], &fCEvd[1],
                            &(LR[i][j].CEvd[1]), control->tabulate+1,
                            world );
      Natural_Cubic_Spline( control->error_ptr, &h[1], &fCEvd[1],
                            &(LR[i][j].CEvd[1]), control->tabulate+1 );

      Complete_Cubic_Spline( &h[1], &fele[1], v0_ele, vlast_ele,
                             &(LR[i][j].ele[1]), control->tabulate+1,
                             world );
      Complete_Cubic_Spline( control->error_ptr, &h[1], &fele[1], v0_ele, vlast_ele,
                             &(LR[i][j].ele[1]), control->tabulate+1 );

      Natural_Cubic_Spline( &h[1], &fCEclmb[1],
                            &(LR[i][j].CEclmb[1]), control->tabulate+1,
                            world );
      Natural_Cubic_Spline( control->error_ptr, &h[1], &fCEclmb[1],
                            &(LR[i][j].CEclmb[1]), control->tabulate+1 );
    }
  }
  free(h);
@@ -555,16 +547,16 @@ void PairReaxCKokkos<DeviceType>::Deallocate_Lookup_Tables()
  for( i = 0; i <= ntypes; ++i ) {
    for( j = i; j <= ntypes; ++j )
      if (LR[i][j].n) {
        sfree( LR[i][j].y, "LR[i,j].y" );
        sfree( LR[i][j].H, "LR[i,j].H" );
        sfree( LR[i][j].vdW, "LR[i,j].vdW" );
        sfree( LR[i][j].CEvd, "LR[i,j].CEvd" );
        sfree( LR[i][j].ele, "LR[i,j].ele" );
        sfree( LR[i][j].CEclmb, "LR[i,j].CEclmb" );
        sfree( control->error_ptr, LR[i][j].y, "LR[i,j].y" );
        sfree( control->error_ptr, LR[i][j].H, "LR[i,j].H" );
        sfree( control->error_ptr, LR[i][j].vdW, "LR[i,j].vdW" );
        sfree( control->error_ptr, LR[i][j].CEvd, "LR[i,j].CEvd" );
        sfree( control->error_ptr, LR[i][j].ele, "LR[i,j].ele" );
        sfree( control->error_ptr, LR[i][j].CEclmb, "LR[i,j].CEclmb" );
      }
    sfree( LR[i], "LR[i]" );
    sfree( control->error_ptr, LR[i], "LR[i]" );
  }
  sfree( LR, "LR" );
  sfree( control->error_ptr, LR, "LR" );
}

/* ---------------------------------------------------------------------- */
+5 −5
Original line number Diff line number Diff line
@@ -113,7 +113,7 @@ PairReaxCOMP::~PairReaxCOMP()
  if (setup_flag) {
    reax_list * bonds = lists+BONDS;
    for (int i=0; i<bonds->num_intrs; ++i)
      sfree(bonds->select.bond_list[i].bo_data.CdboReduction, "CdboReduction");
      sfree(error, bonds->select.bond_list[i].bo_data.CdboReduction, "CdboReduction");
  }
  memory->destroy(num_nbrs_offset);

@@ -209,7 +209,7 @@ void PairReaxCOMP::compute(int eflag, int vflag)

  setup();

  Reset( system, control, data, workspace, &lists, world );
  Reset( system, control, data, workspace, &lists );

  // Why not update workspace like in MPI-only code?
  // Using the MPI-only way messes up the hb energy
@@ -410,12 +410,12 @@ void PairReaxCOMP::setup( )

    // initialize my data structures

    PreAllocate_Space( system, control, workspace, world );
    PreAllocate_Space( system, control, workspace );
    write_reax_atoms();

    int num_nbrs = estimate_reax_lists();
    if(!Make_List(system->total_cap, num_nbrs, TYP_FAR_NEIGHBOR,
                  lists+FAR_NBRS, world))
                  lists+FAR_NBRS))
      error->all(FLERR,"Pair reax/c problem in far neighbor list");

    write_reax_lists();
@@ -445,7 +445,7 @@ void PairReaxCOMP::setup( )

    // check if I need to shrink/extend my data-structs

    ReAllocate( system, control, data, workspace, &lists, mpi_data );
    ReAllocate( system, control, data, workspace, &lists );
  }
}

+7 −5
Original line number Diff line number Diff line
@@ -289,9 +289,10 @@ void Validate_ListsOMP( reax_system *system, storage * /*workspace */, reax_list
      else comp = bonds->num_intrs;

      if (End_Index(i, bonds) > comp) {
        fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n",
        char errmsg[256];
        snprintf(errmsg, 256, "step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n",
                  step, i, End_Index(i,bonds), comp );
        MPI_Abort( comm, INSUFFICIENT_MEMORY );
        system->error_ptr->one(FLERR,errmsg);
      }
    }
  }
@@ -315,9 +316,10 @@ void Validate_ListsOMP( reax_system *system, storage * /*workspace */, reax_list
        else comp = hbonds->num_intrs;

        if (End_Index(Hindex, hbonds) > comp) {
          fprintf(stderr,"step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n",
          char errmsg[256];
          snprintf(errmsg, 256, "step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n",
                  step, Hindex, End_Index(Hindex,hbonds), comp );
          MPI_Abort( comm, INSUFFICIENT_MEMORY );
          system->error_ptr->one(FLERR, errmsg);
        }
      }
    }
+30 −43
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@
extern int Init_MPI_Datatypes(reax_system*, storage*, mpi_datatypes*, MPI_Comm, char*);
extern int Init_System(reax_system*, control_params*, char*);
extern int Init_Simulation_Data(reax_system*, control_params*, simulation_data*, char*);
extern int Init_Workspace(reax_system*, control_params*, storage*, MPI_Comm, char*);
extern int Init_Workspace(reax_system*, control_params*, storage*, char*);

/* ---------------------------------------------------------------------- */

@@ -63,7 +63,7 @@ int Init_ListsOMP( reax_system *system, control_params *control,
  bond_top = (int*) calloc( system->total_cap, sizeof(int) );
  hb_top = (int*) calloc( system->local_cap, sizeof(int) );
  Estimate_Storages( system, control, lists,
                     &Htop, hb_top, bond_top, &num_3body, comm );
                     &Htop, hb_top, bond_top, &num_3body );

  if (control->hbond_cut > 0) {
    /* init H indexes */
@@ -75,9 +75,8 @@ int Init_ListsOMP( reax_system *system, control_params *control,
    total_hbonds = (int)(MAX( total_hbonds*saferzone, mincap*MIN_HBONDS ));

    if( !Make_List( system->Hcap, total_hbonds, TYP_HBOND,
                    *lists+HBONDS, comm ) ) {
      fprintf( stderr, "not enough space for hbonds list. terminating!\n" );
      MPI_Abort( comm, INSUFFICIENT_MEMORY );
                    *lists+HBONDS ) ) {
      system->error_ptr->one( FLERR, "Not enough space for hbonds list. Terminating!" );
    }
  }

@@ -89,9 +88,8 @@ int Init_ListsOMP( reax_system *system, control_params *control,
  bond_cap = (int)(MAX( total_bonds*safezone, mincap*MIN_BONDS ));

  if( !Make_List( system->total_cap, bond_cap, TYP_BOND,
                  *lists+BONDS, comm ) ) {
    fprintf( stderr, "not enough space for bonds list. terminating!\n" );
    MPI_Abort( comm, INSUFFICIENT_MEMORY );
                  *lists+BONDS ) ) {
    system->error_ptr->one( FLERR, "Not enough space for bonds list. Terminating!\n" );
  }

  int nthreads = control->nthreads;
@@ -99,15 +97,14 @@ int Init_ListsOMP( reax_system *system, control_params *control,

  for (i = 0; i < bonds->num_intrs; ++i)
    bonds->select.bond_list[i].bo_data.CdboReduction =
      (double*) smalloc(sizeof(double)*nthreads, "CdboReduction", comm);
      (double*) smalloc(system->error_ptr, sizeof(double)*nthreads, "CdboReduction");

  /* 3bodies list */
  cap_3body = (int)(MAX( num_3body*safezone, MIN_3BODIES ));
  if( !Make_List( bond_cap, cap_3body, TYP_THREE_BODY,
                  *lists+THREE_BODIES, comm ) ){
                  *lists+THREE_BODIES ) ){

    fprintf( stderr, "Problem in initializing angles list. Terminating!\n" );
    MPI_Abort( comm, INSUFFICIENT_MEMORY );
    system->error_ptr->one( FLERR, "Problem in initializing angles list. Terminating!" );
  }

  free( hb_top );
@@ -125,60 +122,50 @@ void InitializeOMP( reax_system *system, control_params *control,
                 mpi_datatypes *mpi_data, MPI_Comm comm )
{
  char msg[MAX_STR];
  char errmsg[512];


  if (Init_MPI_Datatypes(system, workspace, mpi_data, comm, msg) == FAILURE) {
    fprintf( stderr, "p%d: init_mpi_datatypes: could not create datatypes\n",
             system->my_rank );
    fprintf( stderr, "p%d: mpi_data couldn't be initialized! terminating.\n",
             system->my_rank );
    MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
    system->error_ptr->one( FLERR, "init_mpi_datatypes: could not create datatypes. "
    "Mpi_data couldn't be initialized! Terminating.");
  }

  if (Init_System(system, control, msg) == FAILURE) {
    fprintf( stderr, "p%d: %s\n", system->my_rank, msg );
    fprintf( stderr, "p%d: system could not be initialized! terminating.\n",
             system->my_rank );
    MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
    snprintf( errmsg, 512, "Error on: %s. "
    "System could not be initialized! Terminating.", msg );
    system->error_ptr->one(FLERR, errmsg);
  }

  if (Init_Simulation_Data( system, control, data, msg ) == FAILURE) {
    fprintf( stderr, "p%d: %s\n", system->my_rank, msg );
    fprintf( stderr, "p%d: sim_data couldn't be initialized! terminating.\n",
             system->my_rank );
    MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
    snprintf( errmsg, 512, "Error on: %s. "
    "Sim_data couldn't be initialized! Terminating.", msg );
    system->error_ptr->one(FLERR, errmsg);
  }

  if (Init_Workspace( system, control, workspace, mpi_data->world, msg ) ==
  if (Init_Workspace( system, control, workspace, msg ) ==
      FAILURE) {
    fprintf( stderr, "p%d:init_workspace: not enough memory\n",
             system->my_rank );
    fprintf( stderr, "p%d:workspace couldn't be initialized! terminating.\n",
             system->my_rank );
    MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
    system->error_ptr->one(FLERR, "init_workspace: not enough memory. "
    "Workspace couldn't be initialized! Terminating.");
  }

  if (Init_ListsOMP( system, control, data, workspace, lists, mpi_data, msg ) ==
      FAILURE) {
      fprintf( stderr, "p%d: %s\n", system->my_rank, msg );
      fprintf( stderr, "p%d: system could not be initialized! terminating.\n",
               system->my_rank );
      MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
      snprintf( errmsg, 512, "Error on: %s. "
      "System could not be initialized! Terminating.", msg );
      system->error_ptr->one(FLERR, errmsg);
    }

  if (Init_Output_Files(system,control,out_control,mpi_data,msg)== FAILURE) {
    fprintf( stderr, "p%d: %s\n", system->my_rank, msg );
    fprintf( stderr, "p%d: could not open output files! terminating...\n",
             system->my_rank );
    MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
    snprintf( errmsg, 512, "Error on: %s"
    "Could not open output files! Terminating.", msg );
    system->error_ptr->one(FLERR, errmsg);
  }

  if (control->tabulate) {
    if (Init_Lookup_Tables( system, control, workspace, mpi_data, msg ) == FAILURE) {
      fprintf( stderr, "p%d: %s\n", system->my_rank, msg );
      fprintf( stderr, "p%d: couldn't create lookup table! terminating.\n",
               system->my_rank );
      MPI_Abort( mpi_data->world, CANNOT_INITIALIZE );
      snprintf( errmsg, 512, "Error on: %s."
      " Couldn't create lookup table! Terminating.", msg );
      system->error_ptr->one(FLERR, errmsg);
    }
  }

+10 −9
Original line number Diff line number Diff line
@@ -237,12 +237,12 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,

      // Confirm that thb_intrs->num_intrs / nthreads is enough to hold all angles from a single atom
      if(my_offset >= (tid+1)*per_thread) {
        int me;
        MPI_Comm_rank(MPI_COMM_WORLD,&me);
        fprintf( stderr, "step%d-ran out of space on angle_list on proc %i for atom %i:", data->step, me, j);
        fprintf( stderr, " nthreads= %d, tid=%d, my_offset=%d, per_thread=%d\n", nthreads, tid, my_offset, per_thread);
        fprintf( stderr, " num_intrs= %i  N= %i\n",thb_intrs->num_intrs , system->N);
        MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY );
        char errmsg[512];
        snprintf( errmsg, 512, "step%d-ran out of space on angle_list for atom %i:\n"
        " nthreads= %d, tid=%d, my_offset=%d, per_thread=%d\n"
        " num_intrs= %i  N= %i\n"
        , data->step, j, nthreads, tid, my_offset, per_thread,thb_intrs->num_intrs , system->N);
        control->error_ptr->one(FLERR, errmsg);
      }

      // Number of angles owned by this atom
@@ -601,9 +601,10 @@ void Valence_AnglesOMP( reax_system *system, control_params *control,
  if (num_thb_intrs >= thb_intrs->num_intrs * DANGER_ZONE) {
    workspace->realloc.num_3body = num_thb_intrs * TWICE;
    if (num_thb_intrs > thb_intrs->num_intrs) {
      fprintf( stderr, "step%d-ran out of space on angle_list: top=%d, max=%d",
      char errmsg[128];
      snprintf(errmsg, 128, "step%d-ran out of space on angle_list: top=%d, max=%d",
               data->step, num_thb_intrs, thb_intrs->num_intrs);
      MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY );
      control->error_ptr->one(FLERR, errmsg);
    }
  }

Loading