Commit 28cdccbd authored by JinyuanSun's avatar JinyuanSun
Browse files

fix bugs and add protein design tools

parent 7a287d43
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -132,3 +132,5 @@ dmypy.json
test*
01d70f7c997c6a7d73e8fc592865b84f7371642b7afdba535726ba70f020183e*
ce73fb8a1b802e6746c58ac3bf915d79506e2b5edc36e83f1cbfa3f6071a9a92*
e4f51ecb92f1387f12e6a8f00025990b7c16198bf3643601dc66c55144f0b6f8*
9d58e5ce6d8c9ed839c7cb7d836581dbd48e4ba916403cbd9644f886fd070d2b*
 No newline at end of file
+2 −5
Original line number Diff line number Diff line
@@ -71,11 +71,9 @@ You are more than welcome to contribute any function to ChatMol copilot.
## TODO
### Analysis tools
- [ ] **Protein Docking Simulation**: Develop a simulation tool for docking small molecule ligands to protein targets, exploring potential binding modes.
  - AutoDock Vina (High priority)
  - DiffDock (Low priority)
  - DiffDock
  
- [ ] **Protein Structure and Sequence Comparison**: Build a tool for comparing the structures of multiple proteins, identifying similarities, differences, and motifs.
  - TM-align (High priority)
  - Kalign (Medium priority)
  - MMseqs2 (Low priority)

@@ -91,8 +89,7 @@ You are more than welcome to contribute any function to ChatMol copilot.


### visualization tools
- [ ] **Protein-Ligand Interaction Visualization**: Create a tool for visualizing and analyzing protein-ligand interactions, focusing on key binding residues.

- [ ] **Protein-Ligand Interaction Visualization**: Create a tool for visualizing and analyzing protein-ligand interactions, focusing on key binding residues.(Mol* partically done)

## Online Version
We provided an online version for you. [Click here](https://chatmol.org/copilot/) to try it.  
 No newline at end of file
+12 −9
Original line number Diff line number Diff line
import json
import requests
import pymol
# import pymol
import os
import pandas as pd
import openai
import time
import types
import urllib.parse
# import pandas as pd
# import openai
# import time
# import types
# import urllib.parse
import pprint
from datetime import datetime
from pymol import cmd
from urllib.parse import quote
# from datetime import datetime
# from pymol import cmd
# from urllib.parse import quote


os.environ["REGISTRY_HOST_PORT"]="100.89.180.132:9999"
@@ -21,6 +21,9 @@ registry_host_port = os.getenv("REGISTRY_HOST_PORT")
#os.environ["REGISTRY_HOST_PORT"]="100.89.180.132:9999"

# Search for pymol service endpoint
# print("Registry Host Port = ",registry_host_port)
# print(requests.get("http://"+registry_host_port+"/registry"))
# print("END")
registry = requests.get("http://"+registry_host_port+"/registry").json()
pymol_endpoint = ""
for key in registry.keys():
+38 −1
Original line number Diff line number Diff line
@@ -246,6 +246,41 @@ class ConversationHandler:
                    "required": ["query", "type"],
                },
            },
            {
                "type": "function",
                "function": {
                    "name": "call_proteinmpnn_api",
                    "description": "Calls the ProteinMPNN API to design protein sequences based on structures",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "path_to_pdb": {"type": "string", "description": "The path to the PDB file."},
                            "designed_chain": {"type": "string", "description": "The designed chain identifier."},
                            "num_seqs": {"type": "string", "description": "The number of sequences to generate."},
                            "homonomer": {"type": "string", "description": "Indicates whether the protein is a homomer or not."},
                            "sampling_temp": {"type": "string", "description": "The sampling temperature."},
                            "fixed_chain": {"type": "string", "description": "The fixed chain identifier, optional.", "default": None},
                        },
                    },
                    "required": ["path_to_pdb", "designed_chain","num_seqs", "homonomer", "sampling_temp","fixed_chain"],
                }
            },
            {
                "type": "function",
                "function": {
                    "name": "compare_protein_structures",
                    "description": "Compare the structures of two proteins with TMAlign",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "pdb_file1": {"type": "string", "description": "The file path to a local pdb file of the first protein"},
                            "pdb_file2": {"type": "string", "description": "The file path to a local pdb file of the second protein"},
                        },
                    },
                    "required": ["pdb_file1", "pdb_file2"],
                }
            }
            
            
        ]
        self.available_functions = {
@@ -261,7 +296,9 @@ class ConversationHandler:
            "get_protein_sequence_from_pdb": self.cfn.get_protein_sequence_from_pdb,
            "search_rcsb": self.cfn.search_rcsb,
            "query_uniprot": self.cfn.query_uniprot,
            "blind_docking": self.cfn.blind_docking
            "blind_docking": self.cfn.blind_docking,
            "call_proteinmpnn_api": self.cfn.call_proteinmpnn_api,
            "compare_protein_structures": self.cfn.compare_protein_structures,
        }

    def setup_workdir(self, work_dir):
+48 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ from rdkit import Chem
from rdkit.Chem import AllChem
import time
import pandas as pd
from tool_utils import StructPair

from Bio.PDB import PDBParser

@@ -270,6 +271,53 @@ class ChatmolFN:
        # # print(log)
        # return res_df.to_string()
    
    def call_proteinmpnn_api(
            self,
            path_to_pdb: str,
            designed_chain: str = "A",
            num_seqs: str = "1", # int,
            homonomer: str ="false", #bool,
            sampling_temp: str = '0.9', #int,
            fixed_chain: str = None,
    ):  
        """
        Calls the ProteinMPNN API to design protein sequences based on structures.
        """
        headers = {'accept': 'application/json'}
        num_seqs = int(num_seqs)
        homonomer = True if homonomer.lower() == "true" else False
        sampling_temp = float(sampling_temp)

        if fixed_chain is None:
            params = {
                'designed_chain': designed_chain,
                'num_seqs': num_seqs,
                'homonomer': homonomer,
                'sampling_temp': sampling_temp,
            }
        else:
            params = {
                'designed_chain': designed_chain,
                'fixed_chain': fixed_chain,
                'num_seqs': num_seqs,
                'homonomer': homonomer,
                'sampling_temp': sampling_temp,
            }

        files = {'uploaded_file': open(path_to_pdb, 'rb')}
        response = requests.post('https://api.cloudmol.org/protein/proteinmpnn/', params=params, headers=headers, files=files)
        return response.text

    def compare_protein_structures(self, pdb_file1, pdb_file2):
        """
        Compare two protein structures using TMalign
        Parameters:
        - pdb_file1 (str): The path to the first PDB file.
        - pdb_file2 (str): The path to the second PDB file.
        """
        sp = StructPair(pdb_file1, pdb_file2)
        sp.tmalign()
        return f"Aligned length: {sp.aligned_length}, RMSD: {sp.rmsd}, Identity: {sp.identity}, TM-score of {pdb_file1}: {sp.tmscore_p1}, TM-score of {pdb_file2}: {sp.tmscore_p2}"

    @handle_file_not_found_error
    def protein_single_point_mutation_prediction(self, pdb_file, mutations):
Loading