Commit a38ee74f authored by miaecle's avatar miaecle
Browse files

dag transformer

parent 77b9e075
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -63,6 +63,7 @@ hps['dag'] = {
    'nb_epoch': 50,
    'learning_rate': 0.001,
    'n_graph_feat': 30,
    'default_max_atoms': 60,
    'seed': 123
}
hps['weave'] = {
@@ -137,6 +138,7 @@ hps['dag_regression'] = {
    'nb_epoch': 50,
    'learning_rate': 0.001,
    'n_graph_feat': 30,
    'default_max_atoms': 60,
    'seed': 123
}
hps['weave_regression'] = {
+6 −2
Original line number Diff line number Diff line
@@ -227,12 +227,14 @@ def benchmark_classification(train_dataset,
    nb_epoch = hyper_parameters['nb_epoch']
    learning_rate = hyper_parameters['learning_rate']
    n_graph_feat = hyper_parameters['n_graph_feat']
    default_max_atoms = hyper_parameters['default_max_atoms']

    max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
    max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
    max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
    max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

    max_atoms = min([max_atoms, default_max_atoms])
    print('Maximum number of atoms: %i' % max_atoms)
    reshard_size = 256
    transformer = deepchem.trans.DAGTransformer(max_atoms=max_atoms)
    train_dataset.reshard(reshard_size)
@@ -560,12 +562,14 @@ def benchmark_regression(train_dataset,
    nb_epoch = hyper_parameters['nb_epoch']
    learning_rate = hyper_parameters['learning_rate']
    n_graph_feat = hyper_parameters['n_graph_feat']
    default_max_atoms = hyper_parameters['default_max_atoms']

    max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
    max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
    max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
    max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

    max_atoms = min([max_atoms, default_max_atoms])
    print('Maximum number of atoms: %i' % max_atoms)
    reshard_size = 512
    transformer = deepchem.trans.DAGTransformer(max_atoms=max_atoms)
    train_dataset.reshard(reshard_size)
+10 −5
Original line number Diff line number Diff line
@@ -865,14 +865,14 @@ class DAGTransformer(Transformer):
      # DAG starts from the target atom, calculation should go in reverse
      for edge in reversed(DAG):
        # `edge[1]` is the parent of `edge[0]`
        parent[edge[0]].append(edge[1])
        parent[edge[0]].append(edge[1]%max_atoms)
        # all the parents of `edge[1]` is also the parents of `edge[0]`
        parent[edge[0]].extend(parent[edge[1]])
      # after this loop, `parents[i]` includes all parents of atom i

      for ids, atom in enumerate(parent):
        # manually adding the atom index into its parents list
        parent[ids].insert(0, ids)
        parent[ids].insert(0, ids%max_atoms)
      # after this loop, `parents[i][0]` is i, `parents[i][1:]` are all parents of atom i

      # atoms with less parents(farther from the target atom) come first.
@@ -885,8 +885,13 @@ class DAGTransformer(Transformer):
      for ids, atom in enumerate(parent):
        n_par = len(atom)
        # padding with `max_atoms`
        if n_par < max_atoms:
          parent[ids].extend([max_atoms for i in range(max_atoms - n_par)])
        if n_par > max_atoms:
          parent[ids] = parent[ids][:max_atoms]
      
      if len(parent) > max_atoms:
        parent = parent[-max_atoms:]
      while len(parent) < max_atoms:
        # padding
        parent.insert(0, [max_atoms] * max_atoms)