Unverified Commit faab4d73 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1003 from rbharath/dragonn-cleanup

Removing some commented out dragonn code
parents 4de77b5a eb67d0c3
Loading
Loading
Loading
Loading
+0 −63
Original line number Diff line number Diff line
@@ -7,13 +7,7 @@ import sys
import tempfile
matplotlib.use('pdf')
import matplotlib.pyplot as plt
from abc import abstractmethod, ABCMeta
from dragonn.metrics import ClassificationResult
from sklearn.svm import SVC as scikit_SVC
from sklearn.tree import DecisionTreeClassifier as scikit_DecisionTree
from sklearn.ensemble import RandomForestClassifier
from keras.models import model_from_json
from keras.models import Sequential
from keras.layers.core import (Activation, Dense, Dropout, Flatten, Permute,
                                Reshape, TimeDistributedDense)
from keras.layers.convolutional import Convolution2D, MaxPooling2D
@@ -25,28 +19,6 @@ from keras.layers.recurrent import GRU
from keras.callbacks import EarlyStopping


#class Model(object):
#  __metaclass__ = ABCMeta
#
#  @abstractmethod
#  def __init__(self, **hyperparameters):
#    pass
#
#  @abstractmethod
#  def train(self, X, y, validation_data):
#    pass
#
#  @abstractmethod
#  def predict(self, X):
#    pass
#
#  def test(self, X, y):
#    return ClassificationResult(y, self.predict(X))
#
#  def score(self, X, y, metric):
#    return self.test(X, y)[metric]


#class SequenceDNN(Model):
#  """
#  Sequence DNN models.
@@ -453,38 +425,3 @@ class gkmSVM(Model):
    y = np.array([line.split()[-1] for line in temp_ofp], dtype=float)
    temp_ofp.close()
    return np.expand_dims(y, 1)


class SVC(Model):

  def __init__(self):
    self.classifier = scikit_SVC(probability=True, kernel='linear')

  def train(self, X, y, validation_data=None):
    self.classifier.fit(X, y)

  def predict(self, X):
    return self.classifier.predict_proba(X)[:, 1:]


class DecisionTree(Model):

  def __init__(self):
    self.classifier = scikit_DecisionTree()

  def train(self, X, y, validation_data=None):
    self.classifier.fit(X, y)

  def predict(self, X):
    predictions = np.asarray(self.classifier.predict_proba(X))[..., 1]
    if len(predictions.shape) == 2:  # multitask
      predictions = predictions.T
    else:  # single-task
      predictions = np.expand_dims(predictions, 1)
    return predictions


class RandomForest(DecisionTree):

  def __init__(self):
    self.classifier = RandomForestClassifier(n_estimators=100)
+0 −93
Original line number Diff line number Diff line
@@ -15,99 +15,6 @@ from dragonn.plot import add_letters_to_axis, plot_motif
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

#Data = namedtuple(
#    'Data',
#    ('X_train', 'X_valid', 'X_test', 'train_embeddings', 'valid_embeddings',
#     'test_embeddings', 'y_train', 'y_valid', 'y_test', 'motif_names'))


#def get_available_simulations():
#  return [
#      function_name for function_name in dir(simulations)
#      if "simulate" in function_name
#  ]


#def print_available_simulations():
#  for function_name in get_available_simulations():
#    print(function_name)


#def get_simulation_function(simulation_name):
#  if simulation_name in get_available_simulations():
#    return getattr(simulations, simulation_name)
#  else:
#    print("%s is not available. Available simulations are:" % (simulation_name))
#    print_available_simulations()


#def print_simulation_info(simulation_name):
#  simulation_function = get_simulation_function(simulation_name)
#  if simulation_function is not None:
#    print(simulation_function.__doc__)


#def get_simulation_data(simulation_name,
#                        simulation_parameters,
#                        test_set_size=4000,
#                        validation_set_size=3200):
#  simulation_function = get_simulation_function(simulation_name)
#  sequences, y, embeddings = simulation_function(**simulation_parameters)
#  if simulation_name == "simulate_heterodimer_grammar":
#    motif_names = [
#        simulation_parameters["motif1"], simulation_parameters["motif2"]
#    ]
#  elif simulation_name == "simulate_multi_motif_embedding":
#    motif_names = simulation_parameters["motif_names"]
#  else:
#    motif_names = [simulation_parameters["motif_name"]]
#
#  train_sequences, test_sequences, train_embeddings, test_embeddings, y_train, y_test = \
#      train_test_split(sequences, embeddings, y, test_size=test_set_size)
#  train_sequences, valid_sequences, train_embeddings, valid_embeddings, y_train, y_valid = \
#      train_test_split(train_sequences, train_embeddings, y_train, test_size=validation_set_size)
#  X_train = one_hot_encode(train_sequences)
#  X_valid = one_hot_encode(valid_sequences)
#  X_test = one_hot_encode(test_sequences)
#
#  print("X_train.shape")
#  print(X_train.shape)
#  print("X_valid.shape")
#  print(X_valid.shape)
#  print("X_test.shape")
#  print(X_test.shape)
#  print("y_train.shape")
#  print(y_train.shape)
#  print("y_valid.shape")
#  print(y_valid.shape)
#  print("y_test.shape")
#  print(y_test.shape)
#
#  return Data(X_train, X_valid, X_test, train_embeddings, valid_embeddings,
#              test_embeddings, y_train, y_valid, y_test, motif_names)


#def inspect_SequenceDNN():
#  print(inspect.getdoc(SequenceDNN))
#  print("\nAvailable methods:\n")
#  for (method_name, _) in inspect.getmembers(
#      SequenceDNN, predicate=inspect.ismethod):
#    if method_name != "__init__":
#      print(method_name)


#def get_SequenceDNN(SequenceDNN_parameters):
#  return SequenceDNN(**SequenceDNN_parameters)


#def train_SequenceDNN(dnn, simulation_data):
#  assert issubclass(type(simulation_data), tuple)
#  random.seed(1)
#  np.random.seed(1)
#  dnn.train(simulation_data.X_train, simulation_data.y_train,
#            (simulation_data.X_valid, simulation_data.y_valid))


def SequenceDNN_learning_curve(dnn):
  if dnn.valid_metrics is not None:
    train_losses, valid_losses = [