Commit 709f0faf authored by DavidRFB's avatar DavidRFB
Browse files

New Comments in Tutorial

parent fea2fad5
Loading
Loading
Loading
Loading
+1489 −0

File added.

Preview size limit exceeded, changes collapsed.

+345 −0
Original line number Diff line number Diff line
get_ipython().getoutput("curl -Lo conda_installer.py https://raw.githubusercontent.com/deepchem/deepchem/master/scripts/colab_install.py")
import conda_installer
conda_installer.install()
get_ipython().getoutput("/root/miniconda/bin/conda info -e")


get_ipython().getoutput("pip install --pre deepchem")


get_ipython().getoutput("pip install propy3 ")


#imports 
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt


data =pd.read_csv("./all_measurementsProtherDB.tsv",delimiter="\t")


data.shape


df = data[ (data['MEASURE'] == 'DSC') & (data['MUTATION'] == 'wild-type')]


df.shape


no_mutants_dataset = df.drop_duplicates(['PDB_wild'])


no_mutants_dataset.shape


df2 = data[ (data['MEASURE'] == 'DSC')]


df2.shape


no_wild_type_repeated  =df2.drop_duplicates(['MUTATION','PDB_wild'],keep='first')


no_wild_type_repeated.sort_values('UniProt_ID')[['UniProt_ID','PDB_wild','PROTEIN']]


Mutation_list = no_wild_type_repeated[['UniProt_ID','MUTATION']].set_index('UniProt_ID')


Mutation_list


import requests, sys
def Seq_From_AccNum(num):
    '''
    perform a http  request to the EBI-API to obtain the sequence based on the Uniprot Accession number
    return a string 
    
    '''
    requestURL = "https://www.ebi.ac.uk/proteins/api/features/{}".format(num)
    r = requests.get(requestURL, headers={ "Accept" : "application/json"})
    if not r.ok:
        print("Failure in Uniprot request")
        return None
        r.raise_for_status()
        sys.exit()
    responseBody = r.json()
    return responseBody['sequence']


import re
def MutateSeq(seq,Mutant):
    '''
    mutate a sequence based on a string (Mutant) that has the notation : 
    A###B where A is the wildtype aminoacid ### the position and B the mutation
    
    '''
    aalist = re.findall('([A-Z])([0-9]+)([A-Z])', Mutant)
    
    #(len(aalist)==1):
    newseq=seq
    listseq=list(newseq)
    for aas in aalist:
        wildAA = aas[0]
        pos = int(aas[1]) -1
        if(pos >= len(listseq)):
            print("Mutation not in the range of the protein")
            return None
        MutAA = aas[-1]
        
        if(listseq[pos]==wildAA):
            
            listseq[pos]=MutAA
            
        else:
            print("WildType AA does not match")
            return None
    return("".join(listseq))



Mutation_list.sort_values('UniProt_ID',inplace=True)


Mutation_list


import time 
t0 = time.time()
Sequences = {}
fail  = {}
AccNum = []
count = 0 
print("Perfoming data curation : ")

for accnumber, row in Mutation_list.iterrows():
    #if(count == 100):
    #    break
    #count += 1
    mutation = row['MUTATION'].split("(")[0].strip()
    print("{} - {}".format(accnumber,mutation), end =" ")
    name ="{}-{}".format(accnumber,mutation)
    if(accnumber=='-'):
        fail[accnumber] = [mutation]
        continue
    if accnumber not in AccNum:
        AccNum.append(accnumber)
        seq = Seq_From_AccNum(accnumber)
        if(seq == None):
            continue
    if(mutation =='wild-type'):
        name ="{}-{}".format(accnumber,"WT")
        Sequences[name]=seq
    else:
        mutseq = MutateSeq(seq,mutation)
        if(mutseq==None):
            if(accnumber not in fail ):
                fail[accnumber] = [mutation]
            else:
                fail[accnumber].append(mutation)
        Sequences[name] = mutseq

print("Total time analyzing all the dataFrame {} s".format(time.time() - t0))


fail


temp_accnum = no_wild_type_repeated[['UniProt_ID','MUTATION','Tm_(C)','PDB_wild']]
arr_acc_temp = temp_accnum.to_numpy()

temp_dic={}

for i in arr_acc_temp:
    acc = i[0]
    if(i[1] == 'wild-type'):
        mut ='WT'
    else:
        mut = i[1].split('(')[0].strip()

    name="{}-{}".format(acc,mut)
    temp_dic[name]=[i[-2],i[-1].upper()]
    
with open('sequences_protherm.csv','w') as file:
    file.write("AccNumber,T_m,PDBID,Sequence \n")
    for k,v in Sequences.items():
        if(v==None):
            continue
        temp = temp_dic[k][0]
        code_pdb = temp_dic[k][1]


        text = "{},{},{},{} \n".format(k,temp,code_pdb,v)
        file.write(text)


import deepchem as dc
from rdkit import Chem
import pandas as pd 


Final_Data  = pd.read_csv("sequences_protherm.csv")


seq_list = list(Final_Data['Sequence '])


codes = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
         'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']


max_seq= 0 
for i in seq_list:
    if(len(i)>max_seq):
        max_seq=len(i)
max_seq


OneHotFeaturizer = dc.feat.OneHotFeaturizer(codes,max_length=max_seq)


features = OneHotFeaturizer.featurize(seq_list)


temp = [float(x.split("(")[0]) for x in list(Final_Data['T_m'])]


dc_dataset = dc.data.NumpyDataset(X=features,y=temp)


dc_dataset.X.shape


from deepchem import splits
splitter = splits.RandomSplitter()
train, test  = splitter.train_test_split(dc_dataset,seed=42)


dc_dataset.X.shape[1:]


from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([
    layers.Dense(units=32, activation='relu', input_shape=dc_dataset.X.shape[1:]),
    layers.Dropout(0.2),
    layers.Dense(units=32, activation='relu'), 
    layers.Dropout(0.2),
    layers.Dense(units=32, activation='relu'), 
    layers.Dropout(0.2),
    layers.Dense(units=1),
])
model.compile(loss='mae', optimizer='adam')

print(model.summary())

history = model.fit(
    train.X, train.y,
    validation_data=(test.X,test.y),
    batch_size=24,
    epochs=10,
)

## perform a plot of loss vs epochs 
import matplotlib.pyplot as plt
history_df = pd.DataFrame(history.history)
history_df[['loss', 'val_loss']].plot()


dc_model = dc.models.KerasModel(model,dc.metrics.mae_score)


from deepchem import splits
splitter = splits.RandomSplitter()
train, test  = splitter.train_test_split(dc_dataset,seed=42)


train.X.shape


from sklearn.ensemble import RandomForestRegressor
from deepchem.utils.evaluate import Evaluator
import pandas as pd
seed = 42 # Set a random seed to get stable results
sklearn_model = RandomForestRegressor(n_estimators=100, max_features='sqrt')
sklearn_model.random_state = seed
model = dc.models.SklearnModel(sklearn_model)
model.fit(train)


from propy import PyPro


aaComplist = []
CTDList =[]
for seq in seq_list:
    Obj = PyPro.GetProDes(seq)
    aaComplist.append(np.array(list(Obj.GetAAComp().values())))
    CTDList.append(np.array(list(Obj.GetCTD().values())))


dc_dataset_aacomp = dc.data.NumpyDataset(X=aaComplist,y= temp)
dc_dataset_ctd = dc.data.NumpyDataset(X=CTDList,y= temp)


from deepchem import splits
splitter = splits.RandomSplitter()
train, test  = splitter.train_test_split(dc_dataset_aacomp,seed=42)
from sklearn.ensemble import RandomForestRegressor
from deepchem.utils.evaluate import Evaluator
import pandas as pd
print("RandomForestRegressor")
seed = 42 # Set a random seed to get stable results
sklearn_model = RandomForestRegressor(n_estimators=100, max_features='sqrt')
sklearn_model.random_state = seed
model = dc.models.SklearnModel(sklearn_model)
model.fit(train)
metric = dc.metrics.Metric(dc.metrics.mae_score)
train_score = model.evaluate(train, [metric])
test_score = model.evaluate(test, [metric])
print("Train score is : {}".format(train_score))
print("Test score is : {}".format(test_score))
print("SupportVectorMachineRegressor")
from sklearn.svm import SVR
svr_sklearn = SVR(kernel="poly",degree=4)
svr_sklearn.random_state = seed 
model = dc.models.SklearnModel(svr_sklearn)
model.fit(train)
metric = dc.metrics.Metric(dc.metrics.mae_score)
train_score = model.evaluate(train, [metric])
test_score = model.evaluate(test, [metric])
print("Train score is : {}".format(train_score))
print("Test score is : {}".format(test_score))


from deepchem import splits
splitter = splits.RandomSplitter()
train, test  = splitter.train_test_split(dc_dataset_ctd,seed=42)
from sklearn.ensemble import RandomForestRegressor
from deepchem.utils.evaluate import Evaluator
import pandas as pd
print("RandomForestRegressor")
seed = 42 # Set a random seed to get stable results
sklearn_model = RandomForestRegressor(n_estimators=100, max_features='sqrt')
sklearn_model.random_state = seed
model = dc.models.SklearnModel(sklearn_model)
model.fit(train)
metric = dc.metrics.Metric(dc.metrics.mae_score)
train_score = model.evaluate(train, [metric])
test_score = model.evaluate(test, [metric])
print("Train score is : {}".format(train_score))
print("Test score is : {}".format(test_score))
print("SupportVectorMachineRegressor")
from sklearn.svm import SVR
svr_sklearn = SVR(kernel="poly",degree=4)
svr_sklearn.random_state = seed 
model = dc.models.SklearnModel(svr_sklearn)
model.fit(train)
metric = dc.metrics.Metric(dc.metrics.mae_score)
train_score = model.evaluate(train, [metric])
test_score = model.evaluate(test, [metric])
print("Train score is : {}".format(train_score))
print("Test score is : {}".format(test_score))
+61 −48
Original line number Diff line number Diff line
%% Cell type:markdown id: tags:

# Tutorial 30: Protein Deep Learning
In this tutorial we will explore differents featurization of protein sequences, including one hot encoders, aminoacids composition and 3D contact maps. We will use some tools of DeepChem and additional packages to create a model to predict melting temperature of proteins ( a good measurement of protein stability )
In this tutorial we will  compare featurization of protein sequences such as one hot encoders and aminoacids composition. We will use some tools of DeepChem and additional packages to create a model to predict melting temperature of proteins ( a good measurement of protein stability )

the Melting temperature (MT) of a protein is a measurement of protein stability. This measure could vary from a big variety of experimental conditions, however, databases as  ProthermDB https://web.iitm.ac.in/bioinfo2/prothermdb/index.html contains a lot of thermodynamicall information of proteins and therefore a a big resource for the study of protein stability. Other information related with protein stability could be the change in Gibbs Free Energy $ \Delta \Delta G°$ due to a mutatation.

The study of protein stability is important in areas such as protein engineering and biocatalysis

%% Cell type:markdown id: tags:

# TODO
- include colab installer
- improve Melting temperature description and importance
- include 3D contact map
# Setup

%% Cell type:markdown id: tags:

To run DeepChem within Colab, you'll need to run the following installation commands. This will take about 5 minutes to run to completion and install your environment. You can of course run this tutorial locally if you prefer. In that case, don't run these cells since they will download and install Anaconda on your local machine.

%% Cell type:code id: tags:

``` python
!curl -Lo conda_installer.py https://raw.githubusercontent.com/deepchem/deepchem/master/scripts/colab_install.py
import conda_installer
conda_installer.install()
!/root/miniconda/bin/conda info -e
```

%% Cell type:code id: tags:

``` python
!pip install --pre deepchem
```

%% Cell type:code id: tags:

``` python
!pip install propy3
```

%% Cell type:markdown id: tags:

# Melting temperature  MT
the MT of a protein is a measurement of protein stability. This measure could vary from a big variety of experimental conditions, however, databases as  ProthermDB https://web.iitm.ac.in/bioinfo2/prothermdb/index.html contains a lot of thermodynamicall information of proteins and therefore a a big resource for the study of protein stability
# Dataset Extraction

%% Cell type:code id: tags:

``` python
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
```

%% Cell type:markdown id: tags:

The file all_measuremnetsProtherDB.tsv was extracted from the ProThermDB selecting a query of $T_m$ in the range of -52 and 220 °C. Additionaly the experiment conditions and Uniprot AccesionNumber were also requested

%% Cell type:code id: tags:

``` python
data =pd.read_csv("./all_measurementsProtherDB.tsv",delimiter="\t")
```

%% Cell type:code id: tags:

``` python
data.shape
```

%% Output

    (15629, 17)

%% Cell type:markdown id: tags:

# Data only with Measurement of Differential Scanning Calorymetry (DSC)
Data only with Measurement of Differential Scanning Calorymetry (DSC)

%% Cell type:code id: tags:

``` python
df = data[ (data['MEASURE'] == 'DSC') & (data['MUTATION'] == 'wild-type')]
```

%% Cell type:code id: tags:

``` python
df.shape
```

%% Output

    (3310, 17)

%% Cell type:code id: tags:

``` python
no_mutants_dataset = df.drop_duplicates(['PDB_wild'])
```

%% Cell type:code id: tags:

``` python
no_mutants_dataset.shape
```

%% Output

    (298, 17)

%% Cell type:markdown id: tags:

# Separate the only entries performed with Differential Scanning calorimetry
Separate the only entries performed with Differential Scanning calorimetry

%% Cell type:code id: tags:

``` python
df2 = data[ (data['MEASURE'] == 'DSC')]
```

%% Cell type:code id: tags:

``` python
df2.shape
```

%% Output

    (5797, 17)

%% Cell type:markdown id: tags:

# ProThermDB contains wild type entries of the same protein with differente value of melting temperature. In this tutorial we will keep the first wild type entry. ( Variations in $T_m$ could be caused by buffer selection or pH )
ProThermDB contains wild type entries of the same protein with differente value of melting temperature. In this tutorial we will keep the first wild type entry. ( Variations in $T_m$ could be caused by buffer selection or pH )

%% Cell type:code id: tags:

``` python
no_wild_type_repeated  =df2.drop_duplicates(['MUTATION','PDB_wild'],keep='first')
```

%% Cell type:code id: tags:

``` python
no_wild_type_repeated.sort_values('UniProt_ID')[['UniProt_ID','PDB_wild','PROTEIN']]
```

%% Output

          UniProt_ID PDB_wild            PROTEIN
    14049          -     3NA9  Fab15 light chain
    14116          -     3HC0     anti-LTbR scFv
    14115          -     3HC0     anti-LTbR scFv
    14114          -     3HC0     anti-LTbR scFv
    14113          -     3HC0     anti-LTbR scFv
    ...          ...      ...                ...
    7451      R9S082     1BVC          Myoglobin
    7452      R9S082     1BVC          Myoglobin
    7453      R9S082     1BVC          Myoglobin
    7439      R9S082     1BVC          Myoglobin
    7448      R9S082     1BVC          Myoglobin
    
    [1443 rows x 3 columns]

%% Cell type:code id: tags:

``` python
Mutation_list = no_wild_type_repeated[['UniProt_ID','MUTATION']].set_index('UniProt_ID')
```

%% Cell type:code id: tags:

``` python
Mutation_list
```

%% Output

                                        MUTATION
    UniProt_ID
    P0A877       P28S (Based on UniProt and PDB)
    P0A877       S33L (Based on UniProt and PDB)
    P0A877                             wild-type
    P0A877      M101V (Based on UniProt and PDB)
    P0A877      M101T (Based on UniProt and PDB)
    ...                                      ...
    P0AA25                             wild-type
    O00189      D190A (Based on UniProt and PDB)
    O00189      R283D (Based on UniProt and PDB)
    O49003                             wild-type
    O49003      C450A (Based on UniProt and PDB)
    
    [1443 rows x 1 columns]

%% Cell type:markdown id: tags:

# based on the UniprotAcc Numbers, we will use the python request module to access information of sequence in each entry
based on the UniprotAcc Numbers, we will use the python request module to access information of sequence in each entry

%% Cell type:code id: tags:

``` python
import requests, sys
def Seq_From_AccNum(num):
    '''
    perform a http  request to the EBI-API to obtain the sequence based on the Uniprot Accession number
    return a string

    '''
    requestURL = "https://www.ebi.ac.uk/proteins/api/features/{}".format(num)
    r = requests.get(requestURL, headers={ "Accept" : "application/json"})
    if not r.ok:
        print("Failure in Uniprot request")
        return None
        r.raise_for_status()
        sys.exit()
    responseBody = r.json()
    return responseBody['sequence']
```

%% Cell type:code id: tags:

``` python
import re
def MutateSeq(seq,Mutant):
    '''
    mutate a sequence based on a string (Mutant) that has the notation :
    A###B where A is the wildtype aminoacid ### the position and B the mutation

    '''
    aalist = re.findall('([A-Z])([0-9]+)([A-Z])', Mutant)

    #(len(aalist)==1):
    newseq=seq
    listseq=list(newseq)
    for aas in aalist:
        wildAA = aas[0]
        pos = int(aas[1]) -1
        if(pos >= len(listseq)):
            print("Mutation not in the range of the protein")
            return None
        MutAA = aas[-1]

        if(listseq[pos]==wildAA):

            listseq[pos]=MutAA

        else:
            print("WildType AA does not match")
            return None
    return("".join(listseq))
```

%% Cell type:code id: tags:

``` python
Mutation_list.sort_values('UniProt_ID',inplace=True)
```

%% Cell type:code id: tags:

``` python
Mutation_list
```

%% Output

                                                         MUTATION
    UniProt_ID
    -           3na9_H:V34I 3na9_H:G35S 3na9_H:E95Q 3na9_L:S95...
    -                                  3HC0_A:V56G (Based on PDB)
    -                                  3HC0_A:V68L (Based on PDB)
    -                                  3HC0_A:V68I (Based on PDB)
    -                                   3hc0_H:Q3G (Based on PDB)
    ...                                                       ...
    R9S082                                L70A (Based on UniProt)
    R9S082                               L136I (Based on UniProt)
    R9S082                               L136M (Based on UniProt)
    R9S082                               I112A (Based on UniProt)
    R9S082                                L70I (Based on UniProt)
    
    [1443 rows x 1 columns]

%% Cell type:markdown id: tags:

# The next cell will download from the EBI-API the sequence and perform all the reported mutations by ProthermDB that are consistent with Uniprot sequence .
The next cell will download from the EBI-API the sequence and perform all the reported mutations by ProthermDB that are consistent with Uniprot sequence .

%% Cell type:code id: tags:

``` python
import time
t0 = time.time()
Sequences = {}
fail  = {}
AccNum = []
count = 0
print("Perfoming data curation : ")

for accnumber, row in Mutation_list.iterrows():
    #if(count == 100):
    #    break
    #count += 1
    mutation = row['MUTATION'].split("(")[0].strip()
    print("{} - {}".format(accnumber,mutation), end =" ")
    name ="{}-{}".format(accnumber,mutation)
    if(accnumber=='-'):
        fail[accnumber] = [mutation]
        continue
    if accnumber not in AccNum:
        AccNum.append(accnumber)
        seq = Seq_From_AccNum(accnumber)
        if(seq == None):
            continue
    if(mutation =='wild-type'):
        name ="{}-{}".format(accnumber,"WT")
        Sequences[name]=seq
    else:
        mutseq = MutateSeq(seq,mutation)
        if(mutseq==None):
            if(accnumber not in fail ):
                fail[accnumber] = [mutation]
            else:
                fail[accnumber].append(mutation)
        Sequences[name] = mutseq

print("Total time analyzing all the dataFrame {} s".format(time.time() - t0))
```

%% Output

    Perfoming data curation :
    - - 3na9_H:V34I 3na9_H:G35S 3na9_H:E95Q 3na9_L:S95P - - 3HC0_A:V56G - - 3HC0_A:V68L - - 3HC0_A:V68I - - 3hc0_H:Q3G - - 3hc0_H:Q3D - - 3hc0_H:Q3V - - wild-type - - 3hc0_H:S16Q 3hc0_L:S46L - - 3hc0_H:Q3S - - 3na9_H:V34I 3na9_H:G35S - - 3na9_H:F50R - - 3na9_H:A60S 3na9_H:Q66H - - 3na9_H:V34I 3na9_H:G35S 3na9_H:E95Q - - wild-type - - 3hc0_H:M48I - - 3na9_H:V34I 3na9_H:G35S 3na9_H:F50R 3na9_H:A60S 3na9_H:Q66H - - 3na9_H:V34H - - 3hc0_H:M48G - - 3hc0_H:S16E 3hc0_L:S46L - - - - - wild-type - - 3hc0_L:S46L - - 3hc0_H:V20I - - 3hc0_H:S16E - - 3hc0_H:S16Q - - 3hc0_H:Q3A - - wild-type - - - - - 3na9_H:V34I 3na9_H:G35S 3na9_H:F50R 3na9_H:E95Q 3na9_H:A60S 3na9_H:Q66H A0A1D5PBP6 - wild-type A0QWG6 - wild-type A4QUT2 - Y273F A4QUT2 - M299A A4QUT2 - wild-type A4QUT2 - W140F B8YLY0 - S227G Mutation not in the range of the protein
    B8YLY0 - wild-type D0WVP7 - L272R D0WVP7 - L261Q D0WVP7 - L272P O00189 - D190A O00189 - R283D O15350 - wild-type O25103 - wild-type O25776 - wild-type O26652 - wild-type O26652 - D54R O26652 - R85E O26652 - A159E O26652 - Y102A O26652 - W138A O26652 - K143A O49003 - wild-type O49003 - C450A O58720 - wild-type O59170 - wild-type O66037 - wild-type O66037 - wild-type O66529 - wild-type O68541 - A26C A334V G348D L380C Mutation not in the range of the protein
    O68541 - A334V G348D O68541 - G348D O68541 - A334V O68541 - A26C L380C Mutation not in the range of the protein
    O74035 - D135A O74035 - D105A O74035 - E8Q O74035 - E8A O74035 - D7N O74035 - wild-type P00004 - wild-type P00044 - T102A C108T P00044 - C108A P00044 - N58I P00044 - G12A N58I P00044 - N58Q P00044 - N58S P00044 - N58H P00044 - N58T P00044 - N58A P00044 - N58V P00044 - N58M P00044 - N58L P00044 - F88Y L91A P00044 - wild-type P00044 - C108T P00044 - F88Y P00044 - T75E C108T P00044 - L91A P00044 - T75E T102A C108T P00099 - F56Y E65Y V100I P00099 - wild-type P00099 - F29A V35M F56Y E65Y V100I P00099 - F29A V35M P00099 - F56Y E65Y P00099 - V100I P00099 - F29A V35M F56Y E65Y P00099 - F29A V35M V100I P00169 - wild-type P00173 - R31H E36S WildType AA does not match
    P00183 - wild-type P00183 - T102V P00257 - H114Q P00257 - wild-type P00257 - H114R P00257 - T112S P00257 - T112A P00257 - Y140S P00257 - D134E P00257 - Y140L P00257 - Y140F P00257 - H114T P00257 - Y140W P00257 - C153S P00282 - wild-type P00282 - C23A C46A P00282 - D43E K148R P00282 - K44R P00282 - D43E K148R K44R P00282 - D82C K94C P00289 - wild-type P00362 - C152S P00362 - wild-type P00362 - N315T P00362 - H179N P00362 - Y285V P00362 - D284G P00362 - T36Q T40S L44Q P00362 - Y47G S49G P00362 - Y47G R53G P00362 - W312F P00383 - wild-type P00396 - wild-type P00439 - wild-type P00441 - C7A P00441 - C7A C112S P00441 - wild-type P00441 - C112S P00442 - wild-type P00560 - wild-type P00563 - wild-type P00590 - wild-type P00644 - E155G P00644 - D159G P00644 - G170V P00644 - E155G E157G P00644 - E157G D159G P00644 - E155G E157G D159G P00644 - V148L P00644 - E157V P00644 - D159A P00644 - V148A P00644 - wild-type P00644 - V148W P00644 - L219A P00644 - G170W P00644 - E157G P00644 - E157A P00644 - A151T P00644 - K106G P00644 - V105A P00644 - L89A P00644 - G161S P00644 - L107A P00644 - A172S P00644 - E155G D159G P00644 - H206L P00644 - V148L G170V P00644 - V148L G161S G170V P00648 - S132C H149C P00648 - D55A P00648 - D59G P00648 - wild-type P00648 - D69M P00648 - D101N P00648 - R130Q P00648 - I51A I98V P00648 - S139A P00648 - T73G P00648 - T63R P00648 - I98V P00648 - I143V P00648 - R116M P00648 - I135V P00648 - S138A P00648 - G100A P00648 - G99A P00648 - G100V P00648 - G99V P00649 - wild-type P00651 - D102S P00651 - F72E WildType AA does not match
    P00651 - F72K WildType AA does not match
    P00651 - H66T W85Y P00651 - H118A P00651 - W85Y H118A P00651 - A30C G114R V144C WildType AA does not match
    P00651 - G114R P00651 - A30C C91A C98A G114R V144C WildType AA does not match
    P00651 - C91A C98A G114R WildType AA does not match
    P00651 - C66A G114R C121A WildType AA does not match
    P00651 - P140G Mutation not in the range of the protein
    P00651 - D102A P00651 - N93D WildType AA does not match
    P00651 - Q51K P00651 - wild-type P00651 - W85Y P00651 - Y50W P00651 - Y50W W85Y P00651 - Y68W P00651 - Y71W P00651 - Y71W W85Y P00651 - H66T P00651 - D102N P00651 - P119G WildType AA does not match
    P00651 - Y68W W85Y P00651 - F72V WildType AA does not match
    P00651 - T117V T119A P00651 - wild-type P00651 - T117A P00651 - V42A P00651 - V42S P00651 - V42T P00651 - V42C P00651 - V104T P00651 - T117V P00651 - T119A P00651 - wild-type P00651 - T119V P00651 - T117A T119A P00651 - T117A T119V P00651 - T117V T119V P00655 - wild-type P00690 - wild-type P00698 - M30L L74F P00698 - G135A P00698 - M30F L74F P00698 - G120A P00698 - C48A C133A P00698 - M30F P00698 - C24S C145A P00698 - C82A C98A P00698 - G89A P00698 - wild-type P00698 - G67A P00698 - M30L P00698 - G85A P00698 - C94A C112A P00711 - wild-type P00712 - W79F W137F P00712 - wild-type P00712 - W137F P00712 - W79F P00720 - A82P P00720 - T157V P00720 - T157R P00720 - T157L P00720 - I3P P00720 - T157I P00720 - T157E P00720 - I3L P00720 - C54T C97A P00720 - G113A P00720 - A93P P00720 - T157N P00720 - I3T P00720 - T157A P00720 - A42K P00720 - S44A P00720 - R96H P00720 - wild-type P00720 - L84M L91M L99M L118M L121M L133M F153M V87M V111M P00720 - L84M L91M L99M L118M L121M V111M L133M P00720 - L84M L91M L99M L118M L121M L133M F153M P00720 - L84M L91M L99M L118M L121M P00720 - L84M L91M L99M P00720 - K48A P00720 - I3F P00720 - I3E P00722 - Y503F WildType AA does not match
    P00722 - wild-type P00722 - E417Q P00735 - wild-type P00747 - wild-type P00749 - W50F P00749 - wild-type P00749 - wild-type P00749 - W50Y P00750 - W277Y P00750 - W277S P00750 - Y290A P00750 - Y290E P00750 - Y290Q P00750 - W277H P00750 - W277F P00750 - Y290K P00750 - Y290L P00750 - Y290F P00750 - Y290W P00750 - V280M P00750 - wild-type P00750 - wild-type P00750 - W288Y P00750 - W288F P00750 - W288L P00750 - W288S P00750 - V280A P00750 - V280I P00750 - V280T P00750 - V280S P00750 - V280L P00766 - wild-type P00779 - wild-type P00782 - G276A P00782 - N325A P00782 - T129C S194C P00782 - S194C P00782 - M157F P00782 - N325S P00782 - Y324K P00782 - Q313C P00782 - N183D P00782 - M157F Y324K N325S S328C P00782 - T129C P00782 - wild-type P00784 - wild-type P00784 - wild-type P00791 - wild-type P00800 - wild-type P00883 - D129A P00883 - D129N P00883 - D129V P00883 - D129G P00883 - wild-type P00883 - D129Q P00918 - Y7S P00918 - Y7N P00918 - Y7D P00918 - Y7F P00918 - Y7W P00918 - Y7A P00918 - Y7I P00918 - wild-type P00918 - Y7R P00918 - K169P P00918 - E233P P00918 - wild-type P00921 - wild-type P00929 - wild-type P00942 - wild-type P00943 - K14G P00943 - H13N P00943 - H13N K14G P00943 - wild-type P00974 - C65V C86A P00974 - C65A C86A P00974 - N79G P00974 - C65G C86A P00974 - C65T C86A P00974 - G92A P00974 - G91A P00974 - M87A P00974 - R88A P00974 - C65S C86A P00974 - T89A P00974 - G47A P00974 - E84A P00974 - N59A P00974 - R55A P00974 - I54A P00974 - I53A P00974 - R52A P00974 - P48A P00974 - K50T P00974 - R36A P00974 - P37A P00974 - D38A P00974 - F39A P00974 - L41A P00974 - E42A P00974 - P43A P00974 - P44A P00974 - Y45A P00974 - T46A P00974 - K61A P00974 - D85A P00974 - G63A P00974 - T67A P00974 - S82A P00974 - K81A P00974 - N79A P00974 - R77A P00974 - wild-type P00974 - F57A P00974 - Y58A P00974 - Y70G P00974 - N78G P00974 - K76A P00974 - C65A C86S P00974 - F80A P00974 - R74A P00974 - G72A P00974 - G71A P00974 - Y70A P00974 - V69A P00974 - Q66A P00974 - L64A P00974 - K50S P00974 - R74M P00974 - K50M P00974 - K50V P00974 - K50L P00974 - K50Y P00974 - K50F P00974 - K50I P00974 - K50W P00974 - K81M P00974 - K50E P00974 - R88M P00974 - E84M P00974 - K50A P00974 - K50H P00974 - T89M P00974 - K50Q P00974 - K50G P00974 - K50D P00974 - K50N P00974 - M87L P00974 - T67M P00974 - L64M P00974 - L41M P00974 - T46M P00974 - F57M P00974 - K50R P01006 - M104E P01006 - V44M P01006 - V44L P01006 - V44I P01006 - M134G P01006 - M104L P01006 - M134L P01006 - V44G P01006 - V44A P01006 - M104K P01006 - M134V P01006 - M134A P01006 - M104A P01006 - M104V P01006 - wild-type P01006 - M104I P01006 - M104D P01006 - V44F P01006 - M134I P01006 - M104G P01006 - D114C P01006 - D114N P01012 - wild-type P01040 - wild-type P01051 - V54A P01051 - wild-type P01051 - V14A P01053 - E34A E35A P01053 - L69A P01053 - V71A P01053 - I77V P01053 - I77A P01053 - S32G E34A E35A P01053 - I49A I77V P01053 - I49A P01053 - wild-type P01053 - S32A P01053 - S32G P01053 - S32A E34A E35A P01053 - I40V P01053 - V39A P01053 - L28A P01053 - I49V P01053 - V67A P01092 - wild-type P01308 - S98D T51E P01308 - P52D P01308 - wild-type P01584 - wild-type P01588 - wild-type P01607 - wild-type Failure in Uniprot request
    P01704 - wild-type P01857 - K243Q Q301L Q321R P01857 - V146A P01857 - K243Q Q321R P01857 - Q230E P01857 - S237P P01857 - Q230E Q301L P01857 - Q321R P01857 - K243E P01857 - K243Q P01857 - K275R P01857 - S283F P01857 - Q301L P01857 - S307T P01857 - T320I P01857 - Q321K P01857 - N304D P01857 - wild-type P01857 - K243Q G329C P01857 - K243Q Q301L P01857 - S258C P279C P01857 - P226C A314C P01857 - P226C A314C S258C P279C P01868 - wild-type P02454 - wild-type P02526 - wild-type P02549 - R34W P02549 - R28C P02549 - R28H P02549 - R28L P02549 - I24T P02549 - I24S P02549 - wild-type P02549 - R45T P02549 - R41W P02549 - V31A P02549 - G46V P02549 - L49F P02549 - K48R P02549 - R28S P02549 - R45S P02625 - P22A WildType AA does not match
    P02625 - K81S P02625 - A22P K81S P02625 - P22A P27A WildType AA does not match
    P02625 - P27A WildType AA does not match
    P02625 - H27P P02625 - A22P P02625 - A22P H27P P02625 - E109S P02625 - wild-type P02625 - E109S P02625 - wild-type P02672 - wild-type P02699 - wild-type P02730 - K56E P02730 - P327R P02730 - G130R P02730 - wild-type P02730 - E40K P02751 - wild-type P02753 - wild-type P02754 - wild-type P02768 - wild-type P02768 - wild-type P02792 - wild-type P02872 - wild-type P02877 - wild-type P02924 - wild-type P02945 - wild-type P02945 - wild-type P02945 - E22Q E207Q E217Q P02966 - wild-type P03034 - Y89C G47A G49A P03034 - G47A P03034 - G49A P03034 - K5Q P03034 - Q34Y P03034 - Q45Y P03034 - A50V P03034 - Y23H P03034 - Y89C P03034 - A67T P03034 - I85S P03034 - wild-type P03034 - G47A G49A P03034 - G49S P03034 - G49N P03036 - wild-type P03036 - wild-type P03036 - V55C WildType AA does not match
    P03040 - wild-type P03051 - L41A P03051 - C38A C52V P03051 - wild-type P03051 - L41V P03069 - V278V P03069 - R249V Y265W P03069 - M250V Y265W P03069 - wild-type P03437 - wild-type P03695 - wild-type P04070 - wild-type P04080 - wild-type P04083 - wild-type P04114 - wild-type P04168 - Y12H P04168 - Y12W P04168 - wild-type P04168 - Y12F P04179 - Q167H P04179 - Q167S P04179 - Q167N P04179 - Y58F P04179 - Q167E P04179 - Q167A P04179 - Q167V P04179 - I82T P04179 - wild-type P04190 - wild-type P04268 - wild-type P04275 - G1324S P04275 - wild-type P04275 - G1324A P04391 - S56H P04391 - K87Q P04391 - R320A P04391 - wild-type P04391 - A326G P04525 - wild-type P04637 - M133L V203A N239Y N268D P04637 - M133L V203A N239Y N268D Y236F T253I P04637 - wild-type P04637 - wild-type P04637 - Y236F T253I P04745 - wild-type Failure in Uniprot request
    P05230 - L59P L88V V124L P05230 - C132S P05230 - C98S P05230 - wild-type P05230 - C31S H108G P05230 - H108G P05230 - C98S C132S P05230 - L59F F147W P05230 - F147W P05230 - L59P P05230 - F100W P05230 - L59F P05230 - L88V P05230 - L88V V124L P05230 - V124L P05798 - E74K P05798 - wild-type P05798 - Q38A P05798 - E41K P05798 - R65A P05798 - H85Q P05798 - E54Q P05819 - wild-type P06179 - wild-type P06278 - wild-type P06654 - wild-type P06876 - V103L P06876 - wild-type P06876 - V103I P06996 - R95G P06996 - wild-type P06996 - R95S P06996 - R95A P06996 - R58H P06996 - D126G P06996 - R95C P06996 - R95A D126G P07017 - V348M WildType AA does not match
    P07017 - S327L WildType AA does not match
    P07017 - T313I P07017 - E303K WildType AA does not match
    P07017 - S461) P07017 - A438V WildType AA does not match
    P07017 - wild-type P07017 - V435I WildType AA does not match
    P07313 - wild-type P07751 - A1019K P07751 - wild-type P07751 - N1010A P07751 - A1019G P07751 - A1019E P07751 - A1019K P07751 - wild-type P07751 - N1010A P07751 - A1019G P07751 - A1019E P07751 - D1011G P07751 - A974V V986L M988V V1007I V1021L P07980 - wild-type P08038 - N72K P08038 - N70Q P08038 - wild-type P08253 - wild-type P08709 - wild-type P08753 - wild-type P08753 - G203A P08753 - Q204L P08753 - T329A P08753 - A326S P08758 - E17G P08758 - wild-type P08799 - wild-type P08839 - wild-type P08839 - H189A P08839 - H189E P08877 - D69A G49E P09237 - wild-type P09353 - wild-type P09372 - wild-type P09803 - wild-type P09850 - wild-type P09850 - S128C N176C P09850 - V126C A180C P09871 - wild-type P09954 - wild-type P09955 - wild-type P0A1J1 - wild-type P0A379 - wild-type P0A3D9 - D127K P0A3E0 - wild-type P0A3E0 - E21K P0A3E0 - E41K P0A3E0 - E73K P0A3E0 - D76K P0A3E0 - D151K P0A3H0 - V42I P0A3H0 - K38N P0A3H0 - E34D P0A3H0 - A27S P0A3H0 - wild-type P0A3H0 - G15E P0A6Y8 - wild-type P0A759 - wild-type P0A877 - K109N P0A877 - G211E P0A877 - G211R P0A877 - P28S P0A877 - G51D P0A877 - T63K P0A877 - P53T P0A877 - E49S P0A877 - P28L P0A877 - D112G P0A877 - S6P P0A877 - P207A P0A877 - P96A P0A877 - P132G P0A877 - E49Q P0A877 - P132A P0A877 - P62A P0A877 - P57A P0A877 - C154V P0A877 - C154A P0A877 - C154S P0A877 - C118V P0A877 - C118A P0A877 - C118S P0A877 - C81V P0A877 - C81A P0A877 - C81G P0A877 - C81S P0A877 - P53H P0A877 - F114L P0A877 - P21S P0A877 - A67V P0A877 - T24M P0A877 - P78L P0A877 - Y102C P0A877 - Y102H P0A877 - F22S P0A877 - D112N P0A877 - Q65R P0A877 - P62Q P0A877 - M101T P0A877 - M101I P0A877 - P78S P0A877 - E49G P0A877 - D56G P0A877 - Y115C P0A877 - wild-type P0A877 - S33L P0A877 - D60G P0A877 - M101V P0A881 - wild-type P0A8U6 - wild-type P0A9B2 - wild-type P0A9X9 - wild-type P0AA25 - L80C T90C P0AA25 - wild-type P0AA25 - L79C K91C P0AA25 - C33S C36S L79C P0AA25 - M38L P41S P0AA25 - C33S C36S P0AA25 - M38L P0AA25 - wild-type P0AA25 - L79K P0AA25 - L79R P0AA25 - T78C V92C P0AA25 - wild-type P0ABQ4 - wild-type P0ABQ4 - G121V P0ABQ4 - D27F P0ABQ4 - T113V P0ABQ4 - S138Y P0ABQ4 - D87P P0ABQ4 - G67H P0ABQ4 - A6I P0ABQ4 - E139V P0ABQ4 - L156Y P0ABQ4 - Q108D P0ABQ4 - G15W P0ABQ4 - E157F P0ABQ4 - H141E P0ABQ4 - W74F P0ABQ4 - S49E P0ABQ4 - C152I P0ABQ4 - S135I P0ABQ4 - wild-type P0ABQ4 - G121L P0ABQ4 - V119F P0ABQ4 - E120P P0ABQ4 - T68N P0ABQ4 - D116F P0ABQ4 - H114R P0ABQ4 - G43P P0ACF0 - wild-type P0ACJ8 - wild-type P0AE67 - wild-type P0AEX9 - P185A P0AEX9 - P74A P0AEX9 - P185S P0AEX9 - P159S P0AEX9 - P74S P0AEX9 - wild-type P0AEX9 - P159A P0AFG6 - wild-type P0C1B3 - wild-type P0C1B3 - wild-type P0C1H3 - wild-type P0C2S3 - A6C R340C P0C2S3 - wild-type P0C2S3 - N220D T222A P0C2S3 - N9D P0C2S3 - E333G P0C2S3 - I293A P0C2S5 - wild-type P0C7U7 - wild-type P0CG48 - K641G P0CG48 - K641I P0CG48 - K641F P0CG48 - K641L P0CG48 - K641M P0CG48 - K641N P0CG48 - K641E P0CG48 - K641Q P0CG48 - K641S P0CG48 - K641T P0CG48 - K641V P0CL66 - wild-type P10408 - wild-type P10408 - E210Q P11073 - wild-type P11166 - wild-type P11540 - C41A P11540 - C83A P11540 - wild-type P11540 - C41A C83A P11961 - F166W P12528 - wild-type P12528 - G178R P12528 - E310V P12528 - G245R P12528 - R383S P12528 - G324D P12528 - R286K P12528 - T236I P13123 - W24A P13123 - wild-type P13123 - V30I P13123 - wild-type P13231 - wild-type P13284 - wild-type P13569 - F429S F494N Q673R L636E WildType AA does not match
    P13569 - F429S F494N Q673R WildType AA does not match
    P13569 - G550E R553Q R553K WildType AA does not match
    P13569 - F494N Q637R P13569 - V510D P14080 - wild-type P14090 - wild-type P14295 - wild-type P14711 - S63T P14711 - Y53H P14711 - T59I P14711 - Y53H T59I S63T P14711 - Y53H T59I P14711 - T59I S63T P14711 - Y53H S63T P15057 - wild-type P15452 - wild-type P16404 - wild-type P17778 - wild-type P18614 - 1mhp_L:A25H 1mhp_L:M32T WildType AA does not match
    P18614 - 1mhp_L:M32F WildType AA does not match
    P18614 - 1mhp_L:V29F 1mhp_L:Y70V WildType AA does not match
    P18614 - wild-type P18614 - 1mhp_L:V29T WildType AA does not match
    P18614 - 1mhp_L:V29F 1mhp_L:M32I 1mhp_L:Y70F WildType AA does not match
    P18614 - 1mhp_L:A25S 1mhp_L:Y70F WildType AA does not match
    P18614 - 1mhp_L:V29I 1mhp_L:M32L WildType AA does not match
    P18614 - 1mhp_L:V29S WildType AA does not match
    P18614 - 1mhp_L:M32L 1mhp_L:Y70F WildType AA does not match
    P18614 - 1mhp_L:A25F 1mhp_L:M32I WildType AA does not match
    P18614 - 1mhp_H:M34I WildType AA does not match
    P18614 - 1mhp_L:V29I 1mhp_L:M32L 1mhp_L:Y70F WildType AA does not match
    P18614 - 1mhp_H:F29L P18614 - 1mhp_H:A24V WildType AA does not match
    P18614 - 1mhp_H:A24F 1mhp_H:F29M 1mhp_H:M34F WildType AA does not match
    P18614 - 1mhp_H:M34H WildType AA does not match
    P18614 - 1mhp_L:A25S 1mhp_L:V29L 1mhp_L:M32L 1mhp_L:Y70F WildType AA does not match
    P18614 - 1mhp_H:F29W 1mhp_H:M34A WildType AA does not match
    P18614 - 1mhp_L:A25V WildType AA does not match
    P18614 - 1mhp_H:F29W 1mhp_H:M34F WildType AA does not match
    P18614 - 1mhp_H:A24S 1mhp_H:F29H 1mhp_H:M34F WildType AA does not match
    P18614 - 1mhp_H:M34V WildType AA does not match
    P19873 - R52Q P19873 - wild-type P19873 - C3S C48S P19873 - E38C W54C P19873 - V42C R52C P19873 - R50A P19873 - R52A P19873 - P4G P19873 - T43A P19873 - R50K P20058 - wild-type P21549 - S48H D52E I340M P21549 - P11L P21549 - P11L I340M P21549 - P11L I340M H83R P21549 - P11L I340M G170R P21549 - P11L I340M R197Q P21549 - P11L I340M I244T P21549 - P11L I340M A295T P21549 - P11L I340 M A368T P21549 - Q23R P21549 - S48H P21549 - S50H P21549 - I340M P21549 - V113A P21549 - S48H I340M P21549 - S48H D52E P21549 - D52E I340M P21549 - wild-type P21549 - G41R P21549 - G41V P21549 - D52E P21549 - Q23R S48H D52E V113A I340M P21549 - Q23R S48H D52E I340M P21549 - S48H D52E V113A I340M P22069 - F28T P22069 - wild-type P22069 - F36Y P22069 - F28A P22069 - C87A C104A P22069 - M23L P22914 - wild-type P23904 - wild-type P23907 - R171Q P23907 - A136V R171Q P23907 - 1uw3_A:H155R WildType AA does not match
    P24092 - F42L WildType AA does not match
    P24092 - wild-type P24565 - wild-type P24821 - L820K P24821 - D850K P24821 - V811C I821C P24821 - A885V P24821 - E887I P24821 - I833F P24821 - E887I T890K P24821 - E887I T890K D850K P24821 - E887I T890K D850K I833F P24821 - T888C E870C P24821 - I833C S875C P24821 - T888C E870C I833C S875C P24821 - Q808K  L820K  D850K  T890K P24821 - T890K P24821 - Q808K P24821 - wild-type P25984 - A177P P25984 - A22P P25984 - S24P P25984 - T149P P25984 - H222P P25984 - L275P P25984 - L316P P25984 - wild-type P25984 - A22P S24P P25984 - A347P P25984 - S24P L316P P26514 - wild-type P26747 - T294I P26747 - A108V P26747 - W48Q P26747 - G282D P26747 - D302G P26747 - G232D P26747 - D174N P26747 - Y411H P26747 - F353L P26747 - D174G P26882 - wild-type P26882 - K227A P26882 - K308A P26882 - K227A K308A P27064 - wild-type P27169 - wild-type P28366 - wild-type P28366 - E208Q P29392 - wild-type P29600 - wild-type P29957 - T256V P29957 - K324R N174D V220F Q188I T256V P29957 - N36R P29957 - L243R P29957 - R88E P29957 - M403V P29957 - N312V P29957 - Q188I P29957 - Q188I V220F P29957 - N174D V220F P29957 - E303W P29957 - N174D P29957 - K324R P29957 - V220F P29957 - wild-type P29957 - Q82C A123C P30533 - wild-type P30879 - wild-type P32081 - wild-type P32081 - K39E P32081 - E42Q P32081 - S48E P32081 - V20K P32081 - K5Q P32081 - E3Q P32081 - E3R P32081 - E53Q P32081 - N55D P32081 - K65E P33120 - wild-type Failure in Uniprot request
    P34093 - P105G WildType AA does not match
    P34093 - wild-type P35495 - wild-type P36217 - wild-type P36217 - T35C T61C K91R P37093 - wild-type P38909 - wild-type P38909 - N62I WildType AA does not match
    P38993 - N77A P38993 - N198A P38993 - N113A P39060 - wild-type P39476 - wild-type P39476 - I30) P39476 - F32A P39476 - F32Y P39476 - W24A P39940 - wild-type P39940 - E34A WildType AA does not match
    P39940 - E34C WildType AA does not match
    P39940 - wild-type P39940 - I334L I340V WildType AA does not match
    P39940 - I334L WildType AA does not match
    P39940 - E34P WildType AA does not match
    P39940 - E34H WildType AA does not match
    P39940 - E34G WildType AA does not match
    P39940 - E34K WildType AA does not match
    P39940 - E34Q WildType AA does not match
    P39940 - I334L I340F WildType AA does not match
    P39940 - E34N WildType AA does not match
    P39940 - E34T WildType AA does not match
    P39940 - E34I WildType AA does not match
    P39940 - E34W WildType AA does not match
    P39940 - E34D WildType AA does not match
    P39940 - E34F WildType AA does not match
    P39940 - E34V WildType AA does not match
    P39940 - E34Y WildType AA does not match
    P39940 - E34L WildType AA does not match
    P39940 - I340L WildType AA does not match
    P39940 - I334L I340L WildType AA does not match
    P39940 - I340V WildType AA does not match
    P39940 - I334V I340L WildType AA does not match
    P39940 - I334V I340V WildType AA does not match
    P39940 - I334V WildType AA does not match
    P39940 - I334V I340F WildType AA does not match
    P39940 - I334F I340V WildType AA does not match
    P39940 - I334F WildType AA does not match
    P39940 - I340F WildType AA does not match
    P39940 - I334F I340L WildType AA does not match
    P39940 - E34S WildType AA does not match
    P41016 - H29E P41016 - E46K P41016 - wild-type P41365 - wild-type P42790 - wild-type P43351 - wild-type P43895 - wild-type P46406 - wild-type P48781 - wild-type P48816 - wild-type P48842 - S201D Q204T P48842 - G120D A172R P48842 - D198N P48842 - D74N P48842 - wild-type P48842 - G322A P48842 - Y311F P53779 - wild-type P53779 - V1833M Mutation not in the range of the protein
    P53779 - M1775R Mutation not in the range of the protein
    P53779 - R1699W Mutation not in the range of the protein
    P56634 - wild-type P56740 - wild-type P60546 - wild-type P60953 - wild-type P60953 - T35A P61626 - I107A P61626 - D36N P61626 - Q76A P61626 - R68A P61626 - Y56G P61626 - Y56A P61626 - R39G P61626 - R39A P61626 - Y63F P61626 - I74M P61626 - V118T P61626 - I74L P61626 - V20A P61626 - I124V P61626 - I107V P61626 - V118F P61626 - H96A P61626 - I77G P61626 - N136A P61626 - Y72F P61626 - I74A P61626 - I41A P61626 - D67N P61626 - D85N P61626 - D120N P61626 - D138N P61626 - I77T P61626 - G147A P61626 - I77S P61626 - I77M P61626 - I77L P61626 - I74F P61626 - Y142F P61626 - A114M P61626 - I74G P61626 - Y81F P61626 - I77F P61626 - H96G P61626 - E53L P61626 - R68G P61626 - T61A P61626 - Y56F P61626 - Y38F P61626 - V148A P61626 - V143A P61626 - V139A P61626 - V128A P61626 - T70A P61626 - T58A P61626 - S100A P61626 - S98A P61626 - S79A P61626 - S69A P61626 - S54A P61626 - V111A P61626 - S42A P61626 - T29A P61626 - T88A P61626 - T29V P61626 - T58V P61626 - G55Q P61626 - I77Y P61626 - A50L P61626 - N45L P61626 - I77V P61626 - I74V P61626 - I41V P61626 - T88V P61626 - T70V P61626 - T61V P61626 - V117T P61626 - A114S P61626 - V111T P61626 - A110S P61626 - A50S P61626 - A27S P61626 - L26T P61626 - Q76G P61626 - N136G P61626 - I124A P61626 - V92M P61626 - V92G P61626 - K19M P61626 - K19A P61626 - V20F P61626 - V20M P61626 - V20L P61626 - V20I P61626 - V20G P61626 - V117A P61626 - V128P P61626 - A65P P61626 - G145A P61626 - G123A P61626 - D109P P61626 - G90A P61626 - I74T P61626 - G86A P61626 - G66A P61626 - G55A P61626 - P89G P121G P61626 - P121G P61626 - P89G P61626 - V92A P61626 - G40A P61626 - wild-type P61626 - E25Q P61626 - G37A P61626 - V92I P61626 - V92L P61626 - G34A P61626 - V92F P61626 - I77A P61626 - V118A P61626 - V128R P61626 - V128N P61626 - V128D P61626 - V128Y P61626 - V92R P61626 - V92N P61626 - V92D P61626 - V92Y P61626 - V92S P61626 - V20R P61626 - V20D P61626 - V20N P61626 - V20S P61626 - C95A C113A P61626 - V128F P61626 - V128M P61626 - V128L P61626 - V128I P61626 - V128G P61626 - V20Y P62166 - K36D G47E P62166 - K36D G47E P62166 - wild-type P62166 - wild-type P62894 - wild-type P62990 - wild-type Failure in Uniprot request
    P63284 - wild-type P67939 - C124W M133L C135I C141F WildType AA does not match
    P67939 - L145I V157I WildType AA does not match
    P67939 - wild-type P67939 - H115E W146R Q165K Q167E L206H F212K N268R WildType AA does not match
    P67939 - C124W M133L WildType AA does not match
    P67939 - M133L C141V Y236F T253L WildType AA does not match
    P67939 - L145V V157L WildType AA does not match
    P68135 - wild-type P68390 - wild-type P69168 - T31I P69168 - T31I T119I Q147H D227Y P69168 - wild-type P69168 - T119I D227Y P69168 - T31I T119I D227Y P69168 - T119I P69328 - wild-type P69441 - G85V P69441 - F86L P69441 - wild-type P69441 - R88G P69441 - D84H P69488 - E61V P69488 - S11V P69488 - wild-type P69488 - H72K P69488 - T88R P69488 - S11V E61V P69488 - T17K P69488 - C39D P69488 - S48K H72K S82K Q87K T88R P69488 - S48K P69488 - S48D P69488 - Q87K P69488 - S48K P69488 - S82R P69488 - S82K P69488 - T17K S48D P69488 - Y60T P69488 - S11V E59K E61T P69488 - wild-type P69488 - S11V P69488 - S11A P69488 - S11I P69488 - G45Q P69488 - E59L P69488 - E59K P69488 - E59Q P69488 - Q87K T88R P69488 - E61V P69488 - E61T P69488 - S11V E61T P69488 - S11V E61V Q73V P69488 - S11V E61T Q73V P69488 - E59K E61V P69488 - S11V E61V P69488 - Y86D P69488 - Q73V P69488 - S110R P69488 - E61H P69905 - wild-type P78352 - wild-type P78352 - D332P P78352 - D332G P78352 - E401R P78352 - E334Q P78352 - E334L P80025 - wild-type P80028 - wild-type P80319 - wild-type P80324 - wild-type P81708 - wild-type P81708 - A93S P81708 - I56L P81708 - H21G P81708 - V109K P96110 - N118R P96110 - S129R P96110 - S129R T159E P96110 - wild-type P96110 - S129R T159E N118R P96110 - S129R T159E S161E P96110 - T159E P96110 - S129R T159E N118R S161E P96988 - 1k42_A:D160A WildType AA does not match
    P96988 - 1k42_A:E23A WildType AA does not match
    P96988 - 1k42_A:E11A 1k42_A:D160A WildType AA does not match
    P96988 - 1k42_A:E11A WildType AA does not match
    P96988 - wild-type P96988 - 1k42_A:E26A WildType AA does not match
    P96988 - 1k42_A:D29A WildType AA does not match
    P98002 - wild-type Q00511 - wild-type Q1J0W0 - wild-type Q28198 - wild-type Q2XSK9 - N72K Q2XSK9 - N70Q Q2XSK9 - wild-type Q2XSL7 - wild-type Q2XSL7 - Q73N Q2XSL7 - K75N Q47108 - wild-type Q56304 - D168T Q56304 - wild-type Q56304 - T139E D168T Q56304 - T139E Q59196 - wild-type Q59962 - wild-type Q60675 - wild-type Q63610 - A74Q A78N A81Q WildType AA does not match
    Q63610 - A74L A78V A81L WildType AA does not match
    Q63610 - wild-type Q63K37 - wild-type Q6B184 - wild-type Failure in Uniprot request
    Q6VS32 - wild-type Q6VS32 - G133D Q6VS32 - L130R Q6VS32 - L130R G133D Q6VS32 - wild-type Q6VS32 - 4z3t_A:D133G WildType AA does not match
    Q6VS32 - 4z3t_A:R130L WildType AA does not match
    Q6VS32 - 4z3t_A:R130L 4z3t_A:D133G WildType AA does not match
    Q7BQ98 - wild-type Q7SIA8 - wild-type Q7SIB6 - wild-type Q7SIG1 - wild-type Q7SIG1 - K102Q Q7YUF0 - A416E R417K D420Q I422L Q7YUF0 - E411D K412N A414C Y416F WildType AA does not match
    Q8GB52 - N142P Q8GB52 - N142P I144P T404P Q8GB52 - N142P I144P N377P T404P Q8GB52 - N142P I144P Q8GB52 - T404P Q8GB52 - N377P Q8GB52 - I144P Q8GB52 - N142P I144P N377P Q8GB52 - wild-type Q8NBP7 - Q152N Q8NBP7 - S386A Q8NBP7 - Q152H Q8NBP7 - wild-type Q8RR56 - wild-type Q93P54 - wild-type Q97ZF4 - wild-type Q99983 - E303R Q99983 - E284R Q99983 - N278R Q99983 - D271R Q99983 - D254R Q99983 - E225R Q99983 - E173R Q99983 - wild-type Q99983 - E284R E303R Q9EYL5 - wild-type Q9EYL5 - P170A WildType AA does not match
    Q9NAV8 - wild-type Q9S8W1 - wild-type Q9ZEU2 - wild-type R9S082 - I143V R9S082 - I143A R9S082 - I143M R9S082 - wild-type R9S082 - L136V R9S082 - wild-type R9S082 - I29L WildType AA does not match
    R9S082 - I29M WildType AA does not match
    R9S082 - L30I R9S082 - L30M R9S082 - I29V WildType AA does not match
    R9S082 - I29A WildType AA does not match
    R9S082 - I112L R9S082 - L50I R9S082 - I112M R9S082 - I143L R9S082 - L30V R9S082 - L30A R9S082 - L70M R9S082 - L70V R9S082 - L70A R9S082 - L136I R9S082 - L136M R9S082 - I112A R9S082 - L70I Total time analyzing all the dataFrame 254.92953276634216 s

%% Cell type:markdown id: tags:

# Fail dictionary contains all  the proteins with accesion number that contains an inconsistency between uniprot and ProThermDB
Fail dictionary contains all  the proteins with accesion number that contains an inconsistency between uniprot and ProThermDB

%% Cell type:code id: tags:

``` python
fail
```

%% Output

    ---------------------------------------------------------------------------
    NameError                                 Traceback (most recent call last)
    <ipython-input-23-a44b30b36141> in <module>
    ----> 1 fail

    NameError: name 'fail' is not defined

%% Cell type:markdown id: tags:

# The following cell create a file with ACCNumber-Mutation,T_m,PDBID,sequence
The following cell create a file with ACCNumber-Mutation,T_m,PDBID,sequence

%% Cell type:code id: tags:

``` python
temp_accnum = no_wild_type_repeated[['UniProt_ID','MUTATION','Tm_(C)','PDB_wild']]
arr_acc_temp = temp_accnum.to_numpy()

temp_dic={}

for i in arr_acc_temp:
    acc = i[0]
    if(i[1] == 'wild-type'):
        mut ='WT'
    else:
        mut = i[1].split('(')[0].strip()

    name="{}-{}".format(acc,mut)
    temp_dic[name]=[i[-2],i[-1].upper()]

with open('sequences_protherm.csv','w') as file:
    file.write("AccNumber,T_m,PDBID,Sequence \n")
    for k,v in Sequences.items():
        if(v==None):
            continue
        temp = temp_dic[k][0]
        code_pdb = temp_dic[k][1]


        text = "{},{},{},{} \n".format(k,temp,code_pdb,v)
        file.write(text)
```

%% Cell type:markdown id: tags:

# DeepLearning and Machine Learning Models using protein sequences

%% Cell type:code id: tags:

``` python
import deepchem as dc
from rdkit import Chem
import pandas as pd
```

%% Cell type:code id: tags:

``` python
Final_Data  = pd.read_csv("sequences_protherm.csv")
```

%% Cell type:code id: tags:

``` python
seq_list = list(Final_Data['Sequence '])
```

%% Cell type:code id: tags:

``` python
codes = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
         'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
```

%% Cell type:markdown id: tags:

# In order to obtain a oneHotFeaturizer for all the selected sequences we will determine the max sequence lenght
In order to obtain a oneHotFeaturizer for all the selected sequences we will determine the max sequence lenght

%% Cell type:code id: tags:

``` python
max_seq= 0
for i in seq_list:
    if(len(i)>max_seq):
        max_seq=len(i)
max_seq
```

%% Output

    4564

%% Cell type:code id: tags:

``` python
OneHotFeaturizer = dc.feat.OneHotFeaturizer(codes,max_length=max_seq)
```

%% Cell type:code id: tags:

``` python
features = OneHotFeaturizer.featurize(seq_list)
```

%% Cell type:markdown id: tags:

The code ``` temp = [float(x.split("(")[0]) for x in list(Final_Data['T_m'])] ```
will extract the melting temperature from the ProthermDB and avoid the reported standard deviation enclosed in parenthesis

%% Cell type:code id: tags:

``` python
temp = [float(x.split("(")[0]) for x in list(Final_Data['T_m'])]
```

%% Cell type:code id: tags:

``` python
dc_dataset = dc.data.NumpyDataset(X=features,y=temp)
```

%% Cell type:code id: tags:

``` python
dc_dataset.X.shape
```

%% Output

    (1266, 4564, 21)

%% Cell type:code id: tags:

``` python
from deepchem import splits
splitter = splits.RandomSplitter()
train, test  = splitter.train_test_split(dc_dataset,seed=42)
```

%% Cell type:code id: tags:

``` python
dc_dataset.X.shape[1:]
```

%% Output

    (4564, 21)

%% Cell type:markdown id: tags:

# We will create a keras model with X number od Dense layers and relu activation. Finally, the last Dense layer will work as a regressor output
We will create a keras model with X number of Dense layers and relu activation. Finally, the last Dense layer will work as a regressor output

%% Cell type:code id: tags:

``` python
from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([
    layers.Dense(units=32, activation='relu', input_shape=dc_dataset.X.shape[1:]),
    layers.Dropout(0.2),
    layers.Dense(units=32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(units=32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(units=1),
])
model.compile(loss='mae', optimizer='adam')

print(model.summary())

history = model.fit(
    train.X, train.y,
    validation_data=(test.X,test.y),
    batch_size=24,
    epochs=10,
)

## perform a plot of loss vs epochs
import matplotlib.pyplot as plt
history_df = pd.DataFrame(history.history)
history_df[['loss', 'val_loss']].plot()
```

%% Output

    Epoch 1/10
    43/43 [==============================] - 8s 138ms/step - loss: 59.1024 - val_loss: 58.9272
    Epoch 2/10
    43/43 [==============================] - 5s 125ms/step - loss: 56.5079 - val_loss: 47.7227
    Epoch 3/10
    43/43 [==============================] - 5s 126ms/step - loss: 39.1362 - val_loss: 13.2655
    Epoch 4/10
    43/43 [==============================] - 5s 127ms/step - loss: 16.5047 - val_loss: 13.3590
    Epoch 5/10
    43/43 [==============================] - 5s 127ms/step - loss: 16.1596 - val_loss: 13.2370
    Epoch 6/10
    43/43 [==============================] - 6s 131ms/step - loss: 15.7966 - val_loss: 13.4440
    Epoch 7/10
    43/43 [==============================] - 6s 129ms/step - loss: 16.0694 - val_loss: 13.1121
    Epoch 8/10
    43/43 [==============================] - 6s 133ms/step - loss: 15.8541 - val_loss: 13.3269
    Epoch 9/10
    43/43 [==============================] - 6s 129ms/step - loss: 16.1645 - val_loss: 13.0633
    Epoch 10/10
    43/43 [==============================] - 5s 127ms/step - loss: 16.1484 - val_loss: 13.2540
    Model: "sequential"
    _________________________________________________________________
    Layer (type)                 Output Shape              Param #
    =================================================================
    dense (Dense)                (None, 4564, 32)          704
    _________________________________________________________________
    dropout (Dropout)            (None, 4564, 32)          0
    _________________________________________________________________
    dense_1 (Dense)              (None, 4564, 32)          1056
    _________________________________________________________________
    dropout_1 (Dropout)          (None, 4564, 32)          0
    _________________________________________________________________
    dense_2 (Dense)              (None, 4564, 32)          1056
    _________________________________________________________________
    dropout_2 (Dropout)          (None, 4564, 32)          0
    _________________________________________________________________
    dense_3 (Dense)              (None, 4564, 1)           33
    =================================================================
    Total params: 2,849
    Trainable params: 2,849
    Non-trainable params: 0
    _________________________________________________________________
    None

    <AxesSubplot:>


%% Cell type:markdown id: tags:

# TODO
-how to use a DeepChemKeras Model to perform the fit
-how to use a DeepChemKeras Model to perform the fit  ?
-there is a better architecture  of the NN ?

%% Cell type:code id: tags:

``` python
dc_model = dc.models.KerasModel(model,dc.metrics.mae_score)
```

%% Cell type:markdown id: tags:

# Examples of  Classic ML models
## Examples of  Classic ML models

%% Cell type:code id: tags:

``` python
from deepchem import splits
splitter = splits.RandomSplitter()
train, test  = splitter.train_test_split(dc_dataset,seed=42)
```

%% Cell type:code id: tags:

``` python
train.X.shape
```

%% Output

    (1012, 4564, 21)

%% Cell type:markdown id: tags:

# TODO
-how to reshape train.X to introduce it in to a ML model using DeepChem
-how to reshape train.X to introduce it in to a ML model using DeepChem ?

%% Cell type:code id: tags:

``` python
from sklearn.ensemble import RandomForestRegressor
from deepchem.utils.evaluate import Evaluator
import pandas as pd
seed = 42 # Set a random seed to get stable results
sklearn_model = RandomForestRegressor(n_estimators=100, max_features='sqrt')
sklearn_model.random_state = seed
model = dc.models.SklearnModel(sklearn_model)
model.fit(train)
```

%% Output

    ---------------------------------------------------------------------------
    ValueError                                Traceback (most recent call last)
    <ipython-input-47-79eb630e575b> in <module>
          6 sklearn_model.random_state = seed
          7 model = dc.models.SklearnModel(sklearn_model)
    ----> 8 model.fit(train)

    ~/anaconda3/envs/deepchem-night/lib/python3.8/site-packages/deepchem/models/sklearn_models/sklearn_model.py in fit(self, dataset)
         96     # Some scikit-learn models don't use weights.
         97     if self.use_weights:
    ---> 98       self.model.fit(X, y, w)
         99       return
        100     self.model.fit(X, y)
    ~/anaconda3/envs/deepchem-night/lib/python3.8/site-packages/sklearn/ensemble/_forest.py in fit(self, X, y, sample_weight)
        302                 "sparse multilabel-indicator for y is not supported."
        303             )
    --> 304         X, y = self._validate_data(X, y, multi_output=True,
        305                                    accept_sparse="csc", dtype=DTYPE)
        306         if sample_weight is not None:
    ~/anaconda3/envs/deepchem-night/lib/python3.8/site-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
        431                 y = check_array(y, **check_y_params)
        432             else:
    --> 433                 X, y = check_X_y(X, y, **check_params)
        434             out = X, y
        435
    ~/anaconda3/envs/deepchem-night/lib/python3.8/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
         61             extra_args = len(args) - len(all_args)
         62             if extra_args <= 0:
    ---> 63                 return f(*args, **kwargs)
         64
         65             # extra_args > 0
    ~/anaconda3/envs/deepchem-night/lib/python3.8/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
        812         raise ValueError("y cannot be None")
        813
    --> 814     X = check_array(X, accept_sparse=accept_sparse,
        815                     accept_large_sparse=accept_large_sparse,
        816                     dtype=dtype, order=order, copy=copy,
    ~/anaconda3/envs/deepchem-night/lib/python3.8/site-packages/sklearn/utils/validation.py in inner_f(*args, **kwargs)
         61             extra_args = len(args) - len(all_args)
         62             if extra_args <= 0:
    ---> 63                 return f(*args, **kwargs)
         64
         65             # extra_args > 0
    ~/anaconda3/envs/deepchem-night/lib/python3.8/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
        657                     "into decimal numbers with dtype='numeric'") from e
        658         if not allow_nd and array.ndim >= 3:
    --> 659             raise ValueError("Found array with dim %d. %s expected <= 2."
        660                              % (array.ndim, estimator_name))
        661
    ValueError: Found array with dim 3. Estimator expected <= 2.

%% Cell type:markdown id: tags:

# Finally, we will compare others descriptros such as AAcomposition and Composition,transition and distribution of AA (https://www.pnas.org/content/92/19/8700)
Finally, we will compare others descriptros such as AAcomposition and Composition,transition and distribution of AA (https://www.pnas.org/content/92/19/8700)

%% Cell type:code id: tags:

``` python
from propy import PyPro
```

%% Cell type:markdown id: tags:

In the following cell, we are creating and pyPro Object based on the protein sequence. Pypro allows us the calculation of amino acid composition vectors

%% Cell type:code id: tags:

``` python
aaComplist = []
CTDList =[]
for seq in seq_list:
    Obj = PyPro.GetProDes(seq)
    aaComplist.append(np.array(list(Obj.GetAAComp().values())))
    CTDList.append(np.array(list(Obj.GetCTD().values())))
```

%% Cell type:code id: tags:

``` python
dc_dataset_aacomp = dc.data.NumpyDataset(X=aaComplist,y= temp)
dc_dataset_ctd = dc.data.NumpyDataset(X=CTDList,y= temp)
```

%% Cell type:markdown id: tags:

# Using clasic ML with the aacomosition featurizer
Using clasic ML with the aacomosition featurizer

%% Cell type:code id: tags:

``` python
from deepchem import splits
splitter = splits.RandomSplitter()
train, test  = splitter.train_test_split(dc_dataset_aacomp,seed=42)
from sklearn.ensemble import RandomForestRegressor
from deepchem.utils.evaluate import Evaluator
import pandas as pd
print("RandomForestRegressor")
seed = 42 # Set a random seed to get stable results
sklearn_model = RandomForestRegressor(n_estimators=100, max_features='sqrt')
sklearn_model.random_state = seed
model = dc.models.SklearnModel(sklearn_model)
model.fit(train)
metric = dc.metrics.Metric(dc.metrics.mae_score)
train_score = model.evaluate(train, [metric])
test_score = model.evaluate(test, [metric])
print("Train score is : {}".format(train_score))
print("Test score is : {}".format(test_score))
print("SupportVectorMachineRegressor")
from sklearn.svm import SVR
svr_sklearn = SVR(kernel="poly",degree=4)
svr_sklearn.random_state = seed
model = dc.models.SklearnModel(svr_sklearn)
model.fit(train)
metric = dc.metrics.Metric(dc.metrics.mae_score)
train_score = model.evaluate(train, [metric])
test_score = model.evaluate(test, [metric])
print("Train score is : {}".format(train_score))
print("Test score is : {}".format(test_score))
```

%% Output

    RandomForestRegressor
    Train score is : {'mae_score': 2.618318289893296}
    Test score is : {'mae_score': 6.424065900477471}
    SupportVectorMachineRegressor
    Train score is : {'mae_score': 6.915117936487243}
    Test score is : {'mae_score': 8.42955281366753}

%% Cell type:code id: tags:

``` python
from deepchem import splits
splitter = splits.RandomSplitter()
train, test  = splitter.train_test_split(dc_dataset_ctd,seed=42)
from sklearn.ensemble import RandomForestRegressor
from deepchem.utils.evaluate import Evaluator
import pandas as pd
print("RandomForestRegressor")
seed = 42 # Set a random seed to get stable results
sklearn_model = RandomForestRegressor(n_estimators=100, max_features='sqrt')
sklearn_model.random_state = seed
model = dc.models.SklearnModel(sklearn_model)
model.fit(train)
metric = dc.metrics.Metric(dc.metrics.mae_score)
train_score = model.evaluate(train, [metric])
test_score = model.evaluate(test, [metric])
print("Train score is : {}".format(train_score))
print("Test score is : {}".format(test_score))
print("SupportVectorMachineRegressor")
from sklearn.svm import SVR
svr_sklearn = SVR(kernel="poly",degree=4)
svr_sklearn.random_state = seed
model = dc.models.SklearnModel(svr_sklearn)
model.fit(train)
metric = dc.metrics.Metric(dc.metrics.mae_score)
train_score = model.evaluate(train, [metric])
test_score = model.evaluate(test, [metric])
print("Train score is : {}".format(train_score))
print("Test score is : {}".format(test_score))
```

%% Output

    RandomForestRegressor
    Train score is : {'mae_score': 2.572226988614976}
    Test score is : {'mae_score': 6.987328441471036}
    SupportVectorMachineRegressor
    Train score is : {'mae_score': 12.2357520685622}
    Test score is : {'mae_score': 12.083711912061512}

%% Cell type:code id: tags:

``` python
```

%% Cell type:code id: tags:

``` python
```

%% Cell type:code id: tags:

``` python
```
%% Cell type:markdown id: tags:

%% Cell type:code id: tags:
# Congratulations! Time to join the Community!

``` python
```
Congratulations on completing this tutorial notebook! If you enjoyed working through the tutorial, and want to continue working with DeepChem, we encourage you to finish the rest of the tutorials in this series. You can also help the DeepChem community in the following ways:

%% Cell type:code id: tags:
## Star DeepChem on [GitHub](https://github.com/deepchem/deepchem)
This helps build awareness of the DeepChem project and the tools for open source drug discovery that we're trying to build.

``` python
```

%% Cell type:code id: tags:

``` python
```
## Join the DeepChem Gitter
The DeepChem [Gitter](https://gitter.im/deepchem/Lobby) hosts a number of scientists, developers, and enthusiasts interested in deep learning for the life sciences. Join the conversation!