Spaces:

nambiar4
/

DR-BERT

Running

App Files Files Community

DR-BERT / app.py

nambiar4

Update app.py

cc2c186 over 1 year ago

raw

history blame contribute delete

No virus

1.71 kB

	import gradio as gr
	from transformers import AutoModel, AutoTokenizer, AutoModelForTokenClassification
	import torch
	import numpy as np
	import torch.nn.functional as F
	import matplotlib.pyplot as plt


	tokenizer = AutoTokenizer.from_pretrained("./checkpoint-final/")
	model = AutoModelForTokenClassification.from_pretrained("./checkpoint-final/")
	model = model.eval()

	examples = [
	["GSHMSDNEDNFDGDDFDDVEEDEGLDDLENAEEEGQENVEILPSGERPQANQKRITTPYMTKYERARVLGTRALQIAMCAPVMVELEGETDPLLIAMKELKARKIPIIIRRYLPDGSYEDWGVDELIITD"]]

	def get_out(sent):
	prefix = ""
	if len(sent)>1022:
	sent = sent[:1022]
	prefix = "Your protein was longer than 1022 AAs. We are working on including longer sequences but in the meantime, here are the scores for the first 1022 AAs: \n "
	print(sent)
	encoded = tokenizer.encode_plus(sent, return_tensors="pt")
	with torch.no_grad():
	output = model(**encoded)
	output = F.softmax(torch.squeeze(output['logits']))[1:-1,1].detach().numpy()

	fig = plt.figure()
	plt.plot(output)
	plt.xticks(fontsize=15)
	plt.yticks(fontsize=15)
	plt.xlabel('Sequence position', fontsize=15)
	plt.ylabel('DR-BERT score', fontsize=15)

	output = ','.join(str(x) for x in output)
	return (fig,prefix+output)


	gr.Interface(
	get_out,
	[
	gr.components.Textbox(label="Input Amino Acid Sequence", placeholder = " Amino acid sequence here ...")
	],
	["plot","text"],
	examples=examples,
	title="DR-BERT: A Protein Language Model to Predict Disordered Regions",
	description="The app uses DR-BERT to predict disordered regions in proteins. Outputs generated are the probability that a residue is disordered."
	).launch()