Spaces:

colbyford
/

esm3

Runtime error

App Files Files Community

colbyford commited on Jun 26

Commit

6a5443d

•

1 Parent(s): 66b44f0

Add initial app code

Browse files

Files changed (3) hide show

README.md +5 -5
app.py +281 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
-title: Esm3
-emoji: ⚡
-colorFrom: red
-colorTo: red
 sdk: gradio
 sdk_version: 4.37.1
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: ESM3
+emoji: 🧬
+colorFrom: gray
+colorTo: green
 sdk: gradio
 sdk_version: 4.37.1
 app_file: app.py
 pinned: false
 ---
+# ESM3 HF Spaces Application

app.py ADDED Viewed

	@@ -0,0 +1,281 @@

+import gradio as gr
+import numpy as np
+import torch
+import py3Dmol
+from huggingface_hub import login
+from esm.utils.structure.protein_chain import ProteinChain
+from esm.models.esm3 import ESM3
+from esm.sdk.api import (
+    ESMProtein,
+    GenerationConfig,
+)
+theme = gr.themes.Monochrome(
+    primary_hue="gray",
+)
+## Function to get model from Hugging Face using token
+def get_model(model_name, token):
+    login(token=token)
+    # if torch.cuda.is_available():
+    #     model = ESM3.from_pretrained(model_name, device=torch.device("cuda"))
+    # else:
+    #     model = ESM3.from_pretrained(model_name, device=torch.device("cpu"))
+    model = ESM3.from_pretrained(model_name, device=torch.device("cpu"))
+    return model
+## Function to render 3D structure using py3Dmol
+def render_pdb(pdb_string, motif_start=None, motif_end=None):
+    view = py3Dmol.view(width=800, height=800)
+    view.addModel(pdb_string, "pdb")
+    view.setStyle({"cartoon": {"color": "spectrum"}})
+    if motif_start is not None and motif_end is not None:
+        motif_inds = np.arange(motif_start, motif_end)
+        view.setStyle({"cartoon": {"color": "lightgrey"}})
+        motif_res_inds = (motif_inds + 1).tolist()
+        view.addStyle({"resi": motif_res_inds}, {"cartoon": {"color": "cyan"}})
+    view.zoomTo()
+    return view
+## Function to get PDB data
+def get_pdb(pdb_id, chain_id):
+    pdb = ProteinChain.from_rcsb(pdb_id, chain_id)
+    # return [pdb.sequence, render_pdb(pdb.to_pdb_string())]
+    return pdb
+# def select_motif(pdb, motif_start, motif_end):
+#     motif_inds = np.arange(motif_start, motif_end)
+#     motif_sequence = pdb[motif_inds].sequence
+#     motif_atom37_positions = pdb[motif_inds].atom37_positions
+#     return [motif_sequence, motif_atom37_positions]
+# def setup_prompt(prompt_length, motif_sequence, motif_atom37_positions, insert_size):
+#     prompt_length = 200
+#     sequence_prompt = ["_"]*prompt_length
+#     sequence_prompt[insert_size:insert_size+len(motif_sequence)] = list(motif_sequence)
+#     sequence_prompt = "".join(sequence_prompt)
+#     structure_prompt = torch.full((prompt_length, 37, 3), np.nan)
+#     structure_prompt[insert_size:insert_size+len(motif_atom37_positions)] = torch.tensor(motif_atom37_positions)
+#     protein_prompt = ESMProtein(sequence=sequence_prompt, coordinates=structure_prompt)
+#     return [sequence_prompt, structure_prompt, protein_prompt]
+# def generate_scaffold_sequence(model_name, token, sequence_prompt, protein_prompt):
+#     sequence_generation_config = GenerationConfig(track="sequence",
+#                                                   num_steps=sequence_prompt.count("_") // 2,
+#                                                   temperature=0.5)
+#     model = get_model(model_name, token)
+#     sequence_generation = model.generate(protein_prompt, sequence_generation_config)
+#     return sequence_generation
+def scaffold(model_name, token, pdb_id, chain_id, motif_start, motif_end, prompt_length, insert_size):
+    pdb = get_pdb(pdb_id, chain_id)
+    # motif_sequence, motif_atom37_positions = select_motif(pdb, motif_start, motif_end)
+    motif_inds = np.arange(motif_start, motif_end)
+    motif_sequence = pdb[motif_inds].sequence
+    motif_atom37_positions = pdb[motif_inds].atom37_positions
+    # sequence_prompt, structure_prompt, protein_prompt = setup_prompt(prompt_length, motif_sequence, motif_atom37_positions, insert_size)
+    ## Create sequence prompt
+    sequence_prompt = ["_"]*prompt_length
+    sequence_prompt[insert_size:insert_size+len(motif_sequence)] = list(motif_sequence)
+    sequence_prompt = "".join(sequence_prompt)
+    ## Create structure prompt
+    structure_prompt = torch.full((prompt_length, 37, 3), np.nan)
+    structure_prompt[insert_size:insert_size+len(motif_atom37_positions)] = torch.tensor(motif_atom37_positions)
+    ## Create protein prompt
+    protein_prompt = ESMProtein(sequence=sequence_prompt, coordinates=structure_prompt)
+    # sequence_generation = generate_scaffold_sequence(model_name, token, sequence_prompt, protein_prompt)
+    sequence_generation_config = GenerationConfig(track="sequence",
+                                                  num_steps=sequence_prompt.count("_") // 2,
+                                                  temperature=0.5)
+    ## Generate sequence
+    model = get_model(model_name, token)
+    sequence_generation = model.generate(protein_prompt, sequence_generation_config)
+    generated_sequence = sequence_generation.sequence
+    return [
+        pdb.sequence,
+        motif_sequence,
+        # motif_atom37_positions,
+        sequence_prompt,
+        # structure_prompt,
+        # protein_prompt
+        generated_sequence
+    ]
+def ss_edit(model_name, token, pdb_id, chain_id, region_start, region_end, shortened_region_length, shortening_ss8):
+    pdb = get_pdb(pdb_id, chain_id)
+    edit_region = np.arange(region_start, region_end)
+    ## Construct a sequence prompt that masks the (shortened) helix-coil-helix region, but leaves the flanking regions unmasked
+    sequence_prompt = pdb.sequence[:edit_region[0]] + "_" * shortened_region_length + pdb.sequence[edit_region[-1] + 1:]
+    ## Construct a secondary structure prompt that retains the secondary structure of the flanking regions, and shortens the lengths of helices in the helix-coil-helix region
+    ss8_prompt = shortening_ss8[:edit_region[0]] + (((shortened_region_length - 3) // 2) * "H" + "C"*3 + ((shortened_region_length - 3) // 2) * "H") + shortening_ss8[edit_region[-1] + 1:]
+    ## Save original sequence and secondary structure
+    original_sequence = pdb.sequence
+    original_ss8 = shortening_ss8
+    original_ss8_region = " "*edit_region[0] + shortening_ss8[edit_region[0]:edit_region[-1]+1]
+    proposed_ss8_region = " "*edit_region[0] + ss8_prompt[edit_region[0]:edit_region[0]+shortened_region_length]
+    ## Create protein prompt
+    protein_prompt = ESMProtein(sequence=sequence_prompt, secondary_structure=ss8_prompt)
+    ## Generatre sequence
+    model = get_model(model_name, token)
+    sequence_generation = model.generate(protein_prompt, GenerationConfig(track="sequence", num_steps=protein_prompt.sequence.count("_") // 2, temperature=0.5))
+    return [
+        original_sequence,
+        original_ss8,
+        original_ss8_region,
+        sequence_prompt,
+        ss8_prompt,
+        proposed_ss8_region,
+        # protein_prompt,
+        sequence_generation
+        ]
+def sasa_edit(model_name, token, pdb_id, chain_id, span_start, span_end, n_samples):
+    pdb = get_pdb(pdb_id, chain_id)
+    structure_prompt = torch.full((len(pdb), 37, 3), torch.nan)
+    structure_prompt[span_start:span_end] = torch.tensor(pdb[span_start:span_end].atom37_positions, dtype=torch.float32)
+    sasa_prompt = [None]*len(pdb)
+    sasa_prompt[span_start:span_end] = [40.0]*(span_end - span_start)
+    protein_prompt = ESMProtein(sequence="_"*len(pdb), coordinates=structure_prompt, sasa=sasa_prompt)
+    model = get_model(model_name, token)
+    generated_proteins = []
+    for i in range(n_samples):
+        ## Generate sequence
+        sequence_generation = model.generate(protein_prompt, GenerationConfig(track="sequence", num_steps=len(protein_prompt) // 8, temperature=0.7))
+        ## Fold Protein
+        structure_prediction = model.generate(ESMProtein(sequence=sequence_generation.sequence), GenerationConfig(track="structure", num_steps=len(protein_prompt) // 32))
+        generated_proteins.append(structure_prediction)
+    ## Sort generations by ptm
+    generated_proteins = sorted(generated_proteins, key=lambda x: x.ptm.item(), reverse=True)
+    return [
+        protein_prompt,
+        sequence_generation,
+        generated_proteins
+    ]
+## Interface for main Scaffolding Example
+scaffold_app = gr.Interface(
+    fn=scaffold,
+    inputs=[
+        gr.Dropdown(label="Model Name", choices=["esm3_sm_open_v1"], value="esm3_sm_open_v1", allow_custom_value=True),
+        gr.Textbox(value = "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", label="Hugging Face Token", type="password"),
+        gr.Textbox(value="1ITU", label = "PDB Code"),
+        gr.Textbox(value="A", label = "Chain"),
+        gr.Number(value=123, label="Motif Start"),
+        gr.Number(value=146, label="Motif End"),
+        gr.Number(value=200, label="Prompt Length"),
+        gr.Number(value=72, label="Insert Size")
+        ],
+    outputs=[
+        gr.Textbox(label="Sequence"),
+        # gr.Plot(label="3D Structure")
+        gr.Textbox(label="Motif Sequence"),
+        # gr.Textbox(label="Motif Positions")
+        gr.Textbox(label="Sequence Prompt"),
+        # gr.Textbox(label="Structure Prompt"),
+        # gr.Textbox(label="Protein Prompt"),
+        gr.Textbox(label="Generated Sequence")
+    ]
+    )
+## Interface for "Secondary Structure Editing Example: Helix Shortening"
+ss_app = gr.Interface(
+    fn=ss_edit,
+    inputs=[
+        gr.Dropdown(label="Model Name", choices=["esm3_sm_open_v1"], value="esm3_sm_open_v1", allow_custom_value=True),
+        gr.Textbox(value = "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", label="Hugging Face Token", type="password"),
+        gr.Textbox(value = "7XBQ", label="PDB ID"),
+        gr.Textbox(value = "A", label="Chain ID"),
+        gr.Number(value=38, label="Edit Region Start"),
+        gr.Number(value=111, label="Edit Region End"),
+        gr.Number(value=45, label="Shortened Region Length"),
+        gr.Textbox(value="CCCSHHHHHHHHHHHTTCHHHHHHHHHHHHHTCSSCCCCHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHTTCHHHHHHHHHHHHHHHHHHHHHHHHHHHHIIIIIGGGCCSHHHHHHHHHHHHHHHHHHHHHCCHHHHHHHHHHHHHHHHHHHHHHHHHSCTTCHHHHHHHHHHHHHIIIIICCHHHHHHHHHHHHHHHHTTCTTCCSSHHHHHHHHHHHHHHHHHHHC", label="SS8 Shortening")
+    ],
+    outputs=[
+        gr.Textbox(label="Original Sequence"),
+        gr.Textbox(label="Original SS8"),
+        gr.Textbox(label="Original SS8 Edit Region"),
+        gr.Textbox(label="Sequence Prompt"),
+        gr.Textbox(label="Edited SS8 Prompt"),
+        gr.Textbox(label="Proposed SS8 of Edit Region"),
+        # gr.Textbox(label="Protein Prompt"),
+        gr.Textbox(label="Generated Sequence")
+    ]
+    )
+## Interface for "SASA Editing Example: Exposing a buried helix"
+sasa_app = gr.Interface(
+    fn=sasa_edit,
+    inputs=[
+        gr.Dropdown(label="Model Name", choices=["esm3_sm_open_v1"], value="esm3_sm_open_v1", allow_custom_value=True),
+        gr.Textbox(value = "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", label="Hugging Face Token", type="password"),
+        gr.Textbox(value = "1LBS", label="PDB ID"),
+        gr.Textbox(value = "A", label="Chain ID"),
+        gr.Number(value=105, label="Span Start"),
+        gr.Number(value=116, label="Span End"),
+        # gr.Textbox(value="CCSSCCCCSSCHHHHHHTEEETTBBTTBCSSEEEEECCTTCCHHHHHTTTHHHHHHHTTCEEEEECCTTTTCSCHHHHHHHHHHHHHHHHHHTTSCCEEEEEETHHHHHHHHHHHHCGGGGGTEEEEEEESCCTTCBGGGHHHHHTTCBCHHHHHTBTTCHHHHHHHHTTTTBCSSCEEEEECTTCSSSCCCCSSSTTSTTCCBTSEEEEHHHHHCTTCCCCSHHHHHBHHHHHHHHHHHHCTTSSCCGGGCCSTTCCCSBCTTSCHHHHHHHHSTHHHHHHHHHHSCCBSSCCCCCGGGGGGSTTCEETTEECCC", label="SS8 String")
+        gr.Number(value=4, label="Number of Samples")
+    ],
+    outputs = [
+        gr.Textbox(label="Protein Prompt"),
+        gr.Textbox(label="Generated Sequences"),
+        gr.Textbox(label="Generated Proteins")
+    ]
+)
+## Main Interface
+with gr.Blocks(theme=theme) as esm_app:
+    with gr.Row():
+        gr.Markdown(
+            """
+            # ESM3: A frontier language model for biology.
+            - Created By: [EvolutionaryScale](https://www.evolutionaryscale.ai/blog/esm3-release)
+            - Spaces App By: [Tuple, The Cloud Genomics Company](https://tuple.xyz) [[Colby T. Ford](https://colbyford.com)]
+            """
+        )
+    with gr.Row():
+        gr.TabbedInterface([
+            scaffold_app,
+            ss_app,
+            sasa_app
+            ],
+            [
+                "Scaffolding Example",
+                "Secondary Structure Editing Example",
+                "SASA Editing Example"
+            ])
+if __name__ == "__main__":
+    esm_app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+esm
+numpy
+torch>=2.3.0
+py3Dmol
+huggingface_hub