DR-BERT / app.py
nambiar4's picture
Update app.py
cc2c186
raw
history blame contribute delete
No virus
1.71 kB
import gradio as gr
from transformers import AutoModel, AutoTokenizer, AutoModelForTokenClassification
import torch
import numpy as np
import torch.nn.functional as F
import matplotlib.pyplot as plt
tokenizer = AutoTokenizer.from_pretrained("./checkpoint-final/")
model = AutoModelForTokenClassification.from_pretrained("./checkpoint-final/")
model = model.eval()
examples = [
["GSHMSDNEDNFDGDDFDDVEEDEGLDDLENAEEEGQENVEILPSGERPQANQKRITTPYMTKYERARVLGTRALQIAMCAPVMVELEGETDPLLIAMKELKARKIPIIIRRYLPDGSYEDWGVDELIITD"]]
def get_out(sent):
prefix = ""
if len(sent)>1022:
sent = sent[:1022]
prefix = "Your protein was longer than 1022 AAs. We are working on including longer sequences but in the meantime, here are the scores for the first 1022 AAs: \n "
print(sent)
encoded = tokenizer.encode_plus(sent, return_tensors="pt")
with torch.no_grad():
output = model(**encoded)
output = F.softmax(torch.squeeze(output['logits']))[1:-1,1].detach().numpy()
fig = plt.figure()
plt.plot(output)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.xlabel('Sequence position', fontsize=15)
plt.ylabel('DR-BERT score', fontsize=15)
output = ','.join(str(x) for x in output)
return (fig,prefix+output)
gr.Interface(
get_out,
[
gr.components.Textbox(label="Input Amino Acid Sequence", placeholder = " Amino acid sequence here ...")
],
["plot","text"],
examples=examples,
title="DR-BERT: A Protein Language Model to Predict Disordered Regions",
description="The app uses DR-BERT to predict disordered regions in proteins. Outputs generated are the probability that a residue is disordered."
).launch()