Abso1ute666's picture
Update app.py
582a4e0 verified
import gradio as gr
import torch
from transformers import AutoModelForTokenClassification, AutoTokenizer
title = "Protien Token Classification 🧬."
description = "Finds the position of Helix and Beta strand in the Protein Sequence."
article = 'Created from finetuning ESM2_150M'
model = AutoModelForTokenClassification.from_pretrained('./Model')
tokenizer = AutoTokenizer.from_pretrained('facebook/esm2_t30_150M_UR50D')
example_list = ['MENFTALFGAQADPPPPPTALGFGPGKPPPPPPPPAGGGPGTAPPPTAATAPPGADKSGAGCGPFYLMRELPGSTELTGSTNLITHYNLEQAYNKFCGKKVKEKLSNFLPDLPGMIDLPGSHDNSSLRSLIEKPPILSSSFNPITGTMLAGFRLHTGPLPEQCRLMHIQPPKKKNKHKHKQSRTQDPVPPETPSDSDHKKKKKKKEEDPDRKRKKKEKKKKKNRHSPDHPGMGSSQASSSSSLR',
'MAFSDLTSRTVHLYDNWIKDADPRVEDWLLMSSPLPQTILLGFYVYFVTSLGPKLMENRKPFELKKAMITYNFFIVLFSVYMCYEFVMSGWGIGYSFRCDIVDYSRSPTALRMARTCWLYYFSKFIELLDTIFFVLRKKNSQVTFLHVFHHTIMPWTWWFGVKFAAGGLGTFHALLNTAVHVVMYSYYGLSALGPAYQKYLWWKKYLTSLQLVQFVIVAIHISQFFFMEDCKYQFPVFACIIMSYSFMFLLLFLHFWYRAYTKGQRLPKTVKNGTCKNKDN',
'MYPSNKKKKVWREEKERLLKMTLEERRKEYLRDYIPLNSILSWKEEMKGKGQNDEENTQETSQVKKSLTEKVSLYRGDITLLEVDAIVNAANASLLGGGGVDGCIHRAAGPCLLAECRNLNGCDTGHAKITCGYDLPAKYVIHTVGPIARGHINGSHKEDLANCYKSSLKLVKENNIRSVAFPCISTGIYGFPNEPAAVIALNTIKEWLAKNHHEVDRIIFCVFLEVDFKIYKKKMNEFFSVDDNNEEEEDVEMKEDSDENGPEEKQSVEEMEEQSQDADGVNTVTVPGPASEEAVEDCKDEDFAKDENITKGGEVTDHSVRDQDHPDGQENDSTKNEIKIETESQSSYMETEELSSNQEDAVIVEQPEVIPLTEDQEEKEGEKAPGEDTPRMPGKSEGSSDLENTPGPDAGAQDEAKEQRNGTK',
'MAGQHLPVPRLEGVSREQFMQHLYPQRKPLVLEGIDLGPCTSKWTVDYLSQVGGKKEVKIHVAAVAQMDFISKNFVYRTLPFDQLVQRAAEEKHKEFFVSEDEKYYLRSLGEDPRKDVADIRKQFPLLKGDIKFPEFFKEEQFFSSVFRISSPGLQLWTHYDVMDNLLIQVTGKKRVVLFSPRDAQYLYLKGTKSEVLNIDNPDLAKYPLFSKARRYECSLEAGDVLFIPALWFHNVISEEFGVGVNIFWKHLPSECYDKTDTYGNKDPTAASRAAQILDRALKTLAELPEEYRDFYARRMVLHIQDKAYSKNSE',
'MEAGPPGSARPAEPGPCLSGQRGADHTASASLQSVAGTEPGRHPQAVAAVLPAGGCGERMGVPTPKQFCPILERPLISYTLQALERVCWIKDIVVAVTGENMEVMKSIIQKYQHKRISLVEAGVTRHRSIFNGLKALAEDQINSKLSKPEVVIIHDAVRPFVEEGVLLKVVTAAKEHGAAGAIRPLVSTVVSPSADGCLDYSLERARHRASEMPQAFLFDVIYEAYQQCSDYDLEFGTECLQLALKYCCTKAKLVEGSPDLWKVTYKRDLYAAESIIKERISQEICVVMDTEEDNKHVGHLLEEVLKSELNHVKVTSEALGHAGRHLQQIILDQCYNFVCVNVTTSDFQETQKLLSMLEESSLCILYPVVVVSVHFLDFKLVPPSQKMENLMQIREFAKEVKERNILLYGLLISYPQDDQKLQESLRQGAIIIASLIKERNSGLIGQLLIA']
def count_helix(helix):
final = []
temp = []
for x in range(1, len(helix)):
if helix[x] == helix[x-1] + 1:
temp.append(helix[x-1])
temp.append(helix[x])
elif len(temp) != 0:
final.append((temp[0], temp[-1]))
temp = []
return final
def count_strand(strand):
final = []
temp = []
for x in range(1, len(strand)):
if strand[x] == strand[x-1] + 1:
temp.append(strand[x-1])
temp.append(strand[x])
elif len(temp) != 0:
final.append((temp[0], temp[-1]))
temp = []
return final
def print_output1(helix):
helix_op = count_helix(helix)
if len(helix_op) != 0:
str1 = str(helix_op)[1:-1]
return str1
else:
return str('No Helix found.')
def print_output2(strand):
strand_op = count_strand(strand)
if len(strand_op) != 0:
str1 = str(strand_op)[1:-1]
return str1
else:
return str('No Beta strand found.')
def predict(ProtienSequence):
input = tokenizer(ProtienSequence, return_tensors='pt')
with torch.inference_mode():
outputs = model(**input)
output = outputs.logits.argmax(axis=2)[0].numpy()
helix = []
strand = []
for i in range(len(output)):
if output[i] != 0:
if output[i] == 1:
helix.append(i+1)
else:
strand.append(i+1)
return print_output1(helix), print_output2(strand)
iface = gr.Interface(fn=predict,
inputs='text',
outputs=[gr.Text(label='Helix'),
gr.Text(label='Beta Strand')],
title=title,
description=description,
article=article,
examples=example_list)
iface.launch()