Merge pull request #29 from ivandon15/ivan (c2d615e8) · Commits · github_fork / ChatMol

copilot_public/new_function_template.py

+186 −0

Original line number	Diff line number	Diff line
		@@ -10,6 +10,118 @@ def translate_to_protein(self, seq: str, pname=None):
		else:
		return f"The protein sequence of {seq} is `>protein\n{protein_seq}`"


		def get_smiles_feature(self, smiles):
		from rdkit import Chem
		from rdkit.Chem.QED import properties

		try:
		mol = Chem.MolFromSmiles(smiles)
		except:
		return "Error: Not a valid SMILES string"
		p = properties(mol)
		formatted_result = (
		f"Molecular Weight: {p.MW:.2f}, "
		f"LOGP: {p.ALOGP:.2f}, "
		f"HBA (Hydrogen Bond Acceptors): {p.HBA}, "
		f"HBD (Hydrogen Bond Donors): {p.HBD}, "
		f"PSA (Polar Surface Area): {p.PSA:.2f}"
		)
		return formatted_result


		def capped(self, smiles):
		""" cap one amino acid """
		from rdkit import Chem
		def update_idx(removed, current):
		if current>removed:
		current -= 1
		return current

		try:
		mol = Chem.MolFromSmiles(smiles)
		except:
		return "Error: Not a valid SMILES string"

		acetyl_smiles = "CC(=O)N"
		methyl_amide_smiles = "NC"
		m_idx = 0
		ace_mol = Chem.MolFromSmiles(acetyl_smiles)
		methyl_mol = Chem.MolFromSmiles(methyl_amide_smiles)

		combined_mol = Chem.CombineMols(mol,ace_mol)
		combined_mol = Chem.CombineMols(combined_mol, methyl_mol)

		backbone_matches = "NC[C:1](=[O:2])-[OD1]"
		backbone = Chem.MolFromSmarts(backbone_matches)

		cp_nh2 = Chem.MolFromSmarts(methyl_amide_smiles)
		cp_cooh = Chem.MolFromSmarts('[C:1](=[O:2])-N')
		OH = combined_mol.GetSubstructMatches(backbone)[0][-1]
		C1 = combined_mol.GetSubstructMatches(backbone)[0][-3]
		NH2 = combined_mol.GetSubstructMatches(backbone)[0][0]
		METH = combined_mol.GetSubstructMatches(cp_nh2)[-1][m_idx]
		ACE = combined_mol.GetSubstructMatches(cp_cooh)[-1][-1]

		capped_mol = Chem.EditableMol(combined_mol)
		capped_mol.RemoveAtom(OH)
		C1 = update_idx(OH,C1)
		NH2 = update_idx(OH,NH2)
		METH = update_idx(OH,METH)
		ACE = update_idx(OH,ACE)
		capped_mol.AddBond(C1, METH, order=Chem.rdchem.BondType.SINGLE)

		bonds = capped_mol.GetMol().GetBonds()
		connected = []
		for bond in bonds:
		if bond.GetBeginAtom().GetIdx() == NH2:
		connected.append(bond.GetEndAtom().GetIdx()-1 \
		if bond.GetEndAtom().GetIdx()>NH2 \
		else bond.GetEndAtom().GetIdx())
		elif bond.GetEndAtom().GetIdx() == NH2:
		connected.append(bond.GetBeginAtom().GetIdx()-1 \
		if bond.GetBeginAtom().GetIdx()>NH2 \
		else bond.GetBeginAtom().GetIdx())

		capped_mol.RemoveAtom(NH2)
		ACE = update_idx(NH2,ACE)
		for conn in connected:
		capped_mol.AddBond(conn, ACE, order=Chem.rdchem.BondType.SINGLE)
		capped_smi = Chem.MolToSmiles(capped_mol.GetMol())
		return f"After capping (adding ace and nme), the smiles is `{capped_smi}`"


		def smiles_similarity(self, smiles1, smiles2, types="ECFP"):
		from rdkit import Chem
		from rdkit.Chem import AllChem
		from rdkit.Chem import MACCSkeys
		from rdkit.DataStructs import TanimotoSimilarity

		def get_fingerprint(smiles, types):
		try:
		molecule = Chem.MolFromSmiles(smiles)
		except:
		return "Error: Not a valid SMILES string"
		# ECFP
		fp = AllChem.GetMorganFingerprint(molecule, 2)
		if types == "FCFP":
		fp = AllChem.GetMorganFingerprint(
		molecule, 2,
		useFeatures=True,
		useChirality=True
		)
		elif types == "RDK":
		fp = AllChem.RDKFingerprint(molecule)
		elif types == "MACC":
		fp = MACCSkeys.GenMACCSKeys(molecule)
		return fp

		fp1, fp2 = get_fingerprint(smiles1,types), get_fingerprint(smiles2,types)
		similarity = TanimotoSimilarity(fp1, fp2)

		return f"Using {types} fingerprint and Tanimoto, the result is {similarity:.2f}"


		function_descriptions = [{ # This is the description of the function
		"type": "function",
		"function": {
		@@ -23,6 +135,56 @@ function_descriptions = [{ # This is the description of the function
		},
		"required": ["seq"],
		},
		},
		{
		"type": "function",
		"function": {
		"name": "get_smiles_feature",
		"description": "Input a smiles, and will return molecule weight, logp, \
		HBA(Hydrogen Bond Acceptors), HBD(Hydrogen Bond Donors) \
		and PSA(Polar Surface Area) values",
		"parameters": {
		"type": "object",
		"properties": {
		"smiles": {"type": "string", "description": "The smiles sequence"},
		},
		},
		"required": ["smiles"],
		}
		},
		{
		"type": "function",
		"function": {
		"name": "capped",
		"description": "Input a smiles, and will return a capped smiles, which \
		means adding ACE and NME to the smiles to prevent or \
		block unwanted reactions.",
		"parameters": {
		"type": "object",
		"properties": {
		"smiles": {"type": "string", "description": "The smiles sequence"},
		},
		},
		"required": ["smiles"],
		}
		},
		{
		"type": "function",
		"function": {
		"name": "smiles_similarity",
		"description": "Input two smiles and a fingerprint method (if not provided, \
		ECFP - the default morgan fingerprint will be used) and \
		return the TanimotoSimilarity",
		"parameters": {
		"type": "object",
		"properties": {
		"smiles1": {"type": "string", "description": "The smiles sequence"},
		"smiles2": {"type": "string", "description": "The smiles sequence"},
		"types": {"type": "string", "description": "The fingerprint method"},
		},
		},
		"required": ["smiles1", "smiles2"],
		}
		}]

		test_data = {
		@@ -32,6 +194,30 @@ test_data = {
		"seq": "ATGCGAATTTGGGCCC",
		},
		"output": "The protein sequence of ATGCGAATTTGGGCCC is `>protein\nMRFL`",
		},
		"get_smiles_feature": {
		"input": {
		"self": None,
		"smiles": "C[C@H](N)C(=O)N[C@@H](CS)C(=O)N[C@@H](CS)C(=O)O",
		},
		"output": "Molecular Weight: 295.39, LOGP: -1.75, HBA (Hydrogen Bond Acceptors): \
		5, HBD (Hydrogen Bond Donors): 6, PSA (Polar Surface Area): 121.52",
		},
		"capped": {
		"input": {
		"self": None,
		"smiles": "N[C@@H](CS)C(=O)O",
		},
		"output": "After capping (adding ace and nme), the smiles is \
		`CNC(=O)[C@H](CS)NC(C)=O`",
		},
		"smiles_similarity": {
		"input": {
		"self": None,
		"smiles1": "N[C@@H](CS)C(=O)O",
		"smiles2": "N[C@@H](CS)C(=O)O",
		},
		"output": "Using ECFP fingerprint and Tanimoto, the result is 1.00",
		}
		}

Admin message