Sialyltransferase¶

This is a more advanced example, where the PDB file is not as straightforward to use out of the box and the ligands are tougher to handle. Beware that this script may take a long time to execute, due to the size of the ligands.

Here we input the ligands as stereospecific SMILES strings. It is important to make sure the stereochemistry is right, since one wrong (or unspecified) stereocentre can result in a very wrong structure. Always check your minimised input structures before you start a simulation, no matter how confident you are in your input files or ProtoCaller! Alternatively, for molecules with many stereocentres it might be easier to input the ligand as a file in a supported input format (e.g. SDF). For more information on how to do that, please refer to the documentation.

NOTE: You will need a Modeller license to run this example, since there are missing residues in the PDB file.

# import os
# add an alternative default version for GROMACS. Otherwise, use bash default
# os.environ["GROMACSHOME"] = os.path.expanduser("~/gromacs-2018.4")
import logging
logging.basicConfig(level=logging.INFO)

from ProtoCaller.Utils.fileio import Dir
from ProtoCaller.Ensemble import Ligand, Protein, Ensemble

with Dir("Sialyltransferase", overwrite=True):
    # create a protein from its PDB code and the residue number of the ligand
    # we are going to use for mapping
    protein = Protein("2WNB", ligand_ref="1344")

    # delete any atoms with altLoc B
    for chain in protein.pdb_obj:
        for residue in chain:
            atoms_to_purge = [x for x in residue if x.altLoc == "B"]
            residue.purgeAtoms(atoms_to_purge, "discard")
    protein.pdb_obj.writePDB()

    # create two ligands from SMILES strings
    lig1 = Ligand("O([P@]([O-])(=O)CC[C@@]1(C(=O)N)C[C@@H]([C@H]([C@H]([C@@H]"
                  "([C@@H](CO)O)O)O1)NC(=O)C)O)C[C@H]1O[C@@H](n2c(=O)nc(N)cc2)"
                  "[C@H](O)[C@@H]1O", workdir="Ligands")
    lig2 = Ligand("[P@]([O-])(=O)(OC[C@H]1O[C@@H](n2ccc(nc2=O)N)[C@H](O)[C@@H]"
                  "1O)CC[C@]1(CO)O[C@H]([C@@H]([C@H](C1)O)NC(=O)C)[C@H](O)"
                  "[C@H](O)CO", workdir="Ligands")

    # create the morphs from the ligands
    morphs = [[lig1, lig2], [lig2, lig1]]

    # create a system from the protein and the morphs and set up some default
    # settings regarding system preparation
    system = Ensemble("GROMACS", protein=protein, morphs=morphs,
                      box_length_complex=7, ligand_ff="gaff2",
                      workdir=protein.workdir.path)
    # only keep the reference ligand and keep all crystallographic waters
    system.protein.filter(ligands=None, waters="all")
    # add missing residues with Modeller and add missing atoms and protonate
    # with PDB2PQR. We also replace the nonstandard selenomethionine residues
    # with methionine.
    system.protein.prepare(add_missing_residues="modeller",
                           add_missing_atoms="pdb2pqr",
                           replace_nonstandard_residues=True)
    # prepare the complex and solvated leg starting structures and save them as
    # GROMACS input. Parametrisation here will be very slow. Be patient
    system.prepareComplexes()

Alternatively, we can use a custom PDB file and ligand files to avoid some troubles with the preparation. In this case we don’t need Modeller and we get better alignment. Note that in this case the original files may be overwritten by ProtoCaller during the workflow.

# import os
# add an alternative default version for GROMACS. Otherwise, use bash default
# os.environ["GROMACSHOME"] = os.path.expanduser("~/gromacs-2018.4")
import logging
logging.basicConfig(level=logging.INFO)

from ProtoCaller.Utils.fileio import Dir
from ProtoCaller.Ensemble import Ligand, Protein, Ensemble

with Dir("Sialyltransferase", overwrite=False):
    # create a protein and ligands using custom files
    lig_ref = Ligand("lig_ref.sdf", protonated=True, workdir="Ligands",
                     name="lig_ref")
    lig1 = Ligand("lig1.sdf", protonated=True, workdir="Ligands", name="lig1")
    lig2 = Ligand("lig2.sdf", protonated=True, workdir="Ligands", name="lig2")
    protein = Protein("2WNB", pdb_file="protein.pdb", ligand_ref=lig_ref)

    # create the morphs from the ligands
    morphs = [[lig1, lig2], [lig2, lig1]]

    # create a system from the protein and the morphs and set up some default
    # settings regarding system preparation
    system = Ensemble("GROMACS", protein=protein, morphs=morphs,
                      box_length_complex=7, ligand_ff="gaff2",
                      workdir=protein.workdir.path)
    # only keep the reference ligand
    system.protein.filter(ligands=None)
    # we still need PDB2PQR to give proper naming to residues
    system.protein.prepare(protonate_proteins="pdb2pqr")
    # prepare the complex and solvated leg starting structures and save them as
    # GROMACS input. Parametrisation here will be very slow. Be patient
    system.prepareComplexes()