Commit 6669b523 authored by Yutong Zhao's avatar Yutong Zhao
Browse files

Add additional types and constraints

parent 0566d2c9
Loading
Loading
Loading
Loading
+16 −4
Original line number Diff line number Diff line
@@ -171,7 +171,7 @@ class ANIRegression(TensorGraph):
      feed_dict[self.atom_feats] = np.array(X[:upper_lim, :, :], dtype=float)
      return self.session.run([self.grad], feed_dict=feed_dict)

  def pred_one(self, X, atomic_nums):
  def pred_one(self, X, atomic_nums, constraints=None):
    """
    Makes an energy prediction for a set of atomic coordinates.

@@ -182,6 +182,8 @@ class ANIRegression(TensorGraph):
      dtype is float-like
    atomic_nums: np.array
      numpy array of shape (a,) where a is the same as that of X. 
    constraints: unused
      This parameter is mainly for compatibility purposes for scipy optimize

    Returns
    -------
@@ -201,7 +203,7 @@ class ANIRegression(TensorGraph):
    dd = dc.data.NumpyDataset(np.array(X).reshape((1, self.max_atoms, 4)), np.array(0), np.array(1))
    return self.predict(dd)[0]

  def grad_one(self, X, atomic_nums):
  def grad_one(self, X, atomic_nums, constraints=None):
    """
    Computes gradients for that of a single structure.

@@ -212,6 +214,9 @@ class ANIRegression(TensorGraph):
      dtype is float-like
    atomic_nums: np.array
      numpy array of shape (a,) where a is the same as that of X. 
    constraints: np.array
      numpy array of indices of X used for constraining a subset
      of the atoms of the molecule.

    Returns
    -------
@@ -230,9 +235,16 @@ class ANIRegression(TensorGraph):
    dd = dc.data.NumpyDataset(inp, np.array([1]), np.array([1]))
    res = self.compute_grad(dd)[0][0][0]
    res = res[:num_atoms, 1:]

    if constraints is not None:
      for idx in constraints:
        res[idx][0] = 0
        res[idx][1] = 0
        res[idx][2] = 0

    return res.reshape((num_atoms*3,))

  def minimize_structure(self, X, atomic_nums):
  def minimize_structure(self, X, atomic_nums, constraints=None):
    """
    Minimizes a structure, as defined by a set of coordinates and their atomic
    numbers.
@@ -271,7 +283,7 @@ class ANIRegression(TensorGraph):
    res = scipy.optimize.minimize(
      self.pred_one,
      X,
      args=(atomic_nums,),
      args=(atomic_nums, constraints),
      jac=self.grad_one,
      method="BFGS",
      tol=1e-6,
+29 −0
Original line number Diff line number Diff line
@@ -68,5 +68,34 @@ class TestANIRegression(unittest.TestCase):

    np.testing.assert_array_almost_equal(grad_approx, grad_exact, decimal=3)

    grad_exact_constrained = model.grad_one(new_x, new_atomic_nums, constraints=[0, 2])

    assert grad_exact_constrained[0] == 0
    assert grad_exact_constrained[1] == 0
    assert grad_exact_constrained[2] == 0

    assert grad_exact_constrained[3] == grad_exact[3]
    assert grad_exact_constrained[4] == grad_exact[4]
    assert grad_exact_constrained[5] == grad_exact[5]

    assert grad_exact_constrained[6] == 0
    assert grad_exact_constrained[7] == 0
    assert grad_exact_constrained[8] == 0

    min_coords = model.minimize_structure(new_x, new_atomic_nums, constraints=[0,2])

    assert min_coords[0][0] == new_x[0]
    assert min_coords[0][1] == new_x[1]
    assert min_coords[0][2] == new_x[2]

    # assert min_coords[1][0] != new_x[3]
    # assert min_coords[1][1] != new_x[4]
    # assert min_coords[1][2] != new_x[5]

    assert min_coords[2][0] == new_x[6]
    assert min_coords[2][1] == new_x[7]
    assert min_coords[2][2] == new_x[8]


if __name__ == '__main__':
  unittest.main()
 No newline at end of file
+13 −2
Original line number Diff line number Diff line
@@ -37,11 +37,22 @@ def minimize():
  if not content or not 'X' in content:
    abort(400)
  X = np.array(content['X'])

  constraints = None

  if 'constraints' in content:
    constraints = content['constraints']
    print('setting constraints')


  num_atoms = X.shape[0]
  x0 = X[:, 1:]
  a0 = X[:, :1]

  res = webapp.model.minimize_structure(x0, a0)


  res = webapp.model.minimize_structure(x0, a0, constraints)
  res = res.reshape((num_atoms, 3))
  y = webapp.model.pred_one(res, a0).tolist()[0]

  return flask.jsonify({'X': res.tolist()}), 200
 No newline at end of file
  return flask.jsonify({'X': res.tolist(), 'y': y}), 200
 No newline at end of file
+50 −10
Original line number Diff line number Diff line
@@ -12,8 +12,26 @@ def convert_species_to_atomic_nums(s):
    res.append(PERIODIC_TABLE[k])
  return np.array(res, dtype=np.float32)


def load_roiterberg_ANI(relative=True):
def load_roiterberg_ANI(mode="atomization"):
  """
  Load the ANI dataset.

  Parameters
  ----------
  mode: str
    Accepted modes are "relative", "atomization", or "absolute". These settings are used
    to adjust the dynamic range of the model, with absolute having the greatest and relative
    having the lowest. Note that for atomization we approximate the single atom energy
    using a different level of theory


  Returns
  -------
  tuples
    Elements returned are 3-tuple (a,b,c) where and b are the train and test datasets, respectively,
    and c is an array of indices denoting the group of each 

  """
  if "ROITBERG_ANI" not in os.environ:
    raise ValueError(
        "Please set environment variable ROITBERG_ANI to where the ani_dgb_s0x.h5 files are."
@@ -28,9 +46,9 @@ def load_roiterberg_ANI(relative=True):
      'ani_gdb_s01.h5',
      'ani_gdb_s02.h5',
      'ani_gdb_s03.h5',
      # 'ani_gdb_s04.h5',
      # 'ani_gdb_s05.h5',
      # 'ani_gdb_s06.h5',
      'ani_gdb_s04.h5',
      'ani_gdb_s05.h5',
      'ani_gdb_s06.h5',
      # 'ani_gdb_s07.h5',
      # 'ani_gdb_s08.h5'
  ]
@@ -77,10 +95,29 @@ def load_roiterberg_ANI(relative=True):
        nonpadded = convert_species_to_atomic_nums(S)
        Z_padded[:nonpadded.shape[0]] = nonpadded

        if relative:
        if mode == "relative":
          offset = np.amin(E)
        else:
        elif mode == "atomization":

          # approximation via B3LYP/6-31G* in Jaguar
          # to shrink the dynamic range         
          atomizationMapInHartrees = {
              0: 0,
              1: -0.50027278266,
              6: -37.77568079030,
              7: -54.47752723207,
              8: -74.95668301867,
              16: -398.04142581461
          }

          offset = 0

          for z in nonpadded:
            offset -= atomizationMapInHartrees[z]
        elif mode == "absolute":
          offset = 0
        else:
          raise Exception("Unsupported mode: ", mode)

        for k in range(len(E)):
          R_padded = np.zeros((23, 3), dtype=np.float32)
@@ -88,7 +125,7 @@ def load_roiterberg_ANI(relative=True):

          X = np.concatenate([np.expand_dims(Z_padded, 1), R_padded], axis=1)

          y = E[k] - offset  # offset is zero if we're not computing relative
          y = E[k] - offset

          if len(X_cache) == shard_size:

@@ -149,7 +186,7 @@ if __name__ == "__main__":
      dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
  ]

  model_dir = "/tmp/ani.pkl"
  model_dir = "/tmp/ani3.pkl"

  if os.path.exists(model_dir):
    print("Restoring existing model...")
@@ -157,7 +194,7 @@ if __name__ == "__main__":
  else:
    print("Fitting new model...")

    train_valid_dataset, test_dataset, all_groups = load_roiterberg_ANI(relative=True)
    train_valid_dataset, test_dataset, all_groups = load_roiterberg_ANI(mode="relative")

    splitter = dc.splits.RandomGroupSplitter(broadcast(train_valid_dataset, all_groups))

@@ -172,6 +209,9 @@ if __name__ == "__main__":
            transform_y=True, dataset=train_dataset)
    ]

    print("Total training set shape: ", train_dataset.get_shape())


    for transformer in transformers:
      train_dataset = transformer.transform(train_dataset)
      valid_dataset = transformer.transform(valid_dataset)