|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForTokenClassification, AutoTokenizer |
|
|
|
title = "Protien Token Classification 🧬." |
|
description = "Finds the position of Helix and Beta strand in the Protein Sequence." |
|
article = 'Created from finetuning ESM2_150M' |
|
|
|
model = AutoModelForTokenClassification.from_pretrained('./Model') |
|
tokenizer = AutoTokenizer.from_pretrained('facebook/esm2_t30_150M_UR50D') |
|
|
|
example_list = ['MENFTALFGAQADPPPPPTALGFGPGKPPPPPPPPAGGGPGTAPPPTAATAPPGADKSGAGCGPFYLMRELPGSTELTGSTNLITHYNLEQAYNKFCGKKVKEKLSNFLPDLPGMIDLPGSHDNSSLRSLIEKPPILSSSFNPITGTMLAGFRLHTGPLPEQCRLMHIQPPKKKNKHKHKQSRTQDPVPPETPSDSDHKKKKKKKEEDPDRKRKKKEKKKKKNRHSPDHPGMGSSQASSSSSLR', |
|
'MAFSDLTSRTVHLYDNWIKDADPRVEDWLLMSSPLPQTILLGFYVYFVTSLGPKLMENRKPFELKKAMITYNFFIVLFSVYMCYEFVMSGWGIGYSFRCDIVDYSRSPTALRMARTCWLYYFSKFIELLDTIFFVLRKKNSQVTFLHVFHHTIMPWTWWFGVKFAAGGLGTFHALLNTAVHVVMYSYYGLSALGPAYQKYLWWKKYLTSLQLVQFVIVAIHISQFFFMEDCKYQFPVFACIIMSYSFMFLLLFLHFWYRAYTKGQRLPKTVKNGTCKNKDN', |
|
'MYPSNKKKKVWREEKERLLKMTLEERRKEYLRDYIPLNSILSWKEEMKGKGQNDEENTQETSQVKKSLTEKVSLYRGDITLLEVDAIVNAANASLLGGGGVDGCIHRAAGPCLLAECRNLNGCDTGHAKITCGYDLPAKYVIHTVGPIARGHINGSHKEDLANCYKSSLKLVKENNIRSVAFPCISTGIYGFPNEPAAVIALNTIKEWLAKNHHEVDRIIFCVFLEVDFKIYKKKMNEFFSVDDNNEEEEDVEMKEDSDENGPEEKQSVEEMEEQSQDADGVNTVTVPGPASEEAVEDCKDEDFAKDENITKGGEVTDHSVRDQDHPDGQENDSTKNEIKIETESQSSYMETEELSSNQEDAVIVEQPEVIPLTEDQEEKEGEKAPGEDTPRMPGKSEGSSDLENTPGPDAGAQDEAKEQRNGTK', |
|
'MAGQHLPVPRLEGVSREQFMQHLYPQRKPLVLEGIDLGPCTSKWTVDYLSQVGGKKEVKIHVAAVAQMDFISKNFVYRTLPFDQLVQRAAEEKHKEFFVSEDEKYYLRSLGEDPRKDVADIRKQFPLLKGDIKFPEFFKEEQFFSSVFRISSPGLQLWTHYDVMDNLLIQVTGKKRVVLFSPRDAQYLYLKGTKSEVLNIDNPDLAKYPLFSKARRYECSLEAGDVLFIPALWFHNVISEEFGVGVNIFWKHLPSECYDKTDTYGNKDPTAASRAAQILDRALKTLAELPEEYRDFYARRMVLHIQDKAYSKNSE', |
|
'MEAGPPGSARPAEPGPCLSGQRGADHTASASLQSVAGTEPGRHPQAVAAVLPAGGCGERMGVPTPKQFCPILERPLISYTLQALERVCWIKDIVVAVTGENMEVMKSIIQKYQHKRISLVEAGVTRHRSIFNGLKALAEDQINSKLSKPEVVIIHDAVRPFVEEGVLLKVVTAAKEHGAAGAIRPLVSTVVSPSADGCLDYSLERARHRASEMPQAFLFDVIYEAYQQCSDYDLEFGTECLQLALKYCCTKAKLVEGSPDLWKVTYKRDLYAAESIIKERISQEICVVMDTEEDNKHVGHLLEEVLKSELNHVKVTSEALGHAGRHLQQIILDQCYNFVCVNVTTSDFQETQKLLSMLEESSLCILYPVVVVSVHFLDFKLVPPSQKMENLMQIREFAKEVKERNILLYGLLISYPQDDQKLQESLRQGAIIIASLIKERNSGLIGQLLIA'] |
|
|
|
def count_helix(helix): |
|
final = [] |
|
temp = [] |
|
for x in range(1, len(helix)): |
|
if helix[x] == helix[x-1] + 1: |
|
temp.append(helix[x-1]) |
|
temp.append(helix[x]) |
|
elif len(temp) != 0: |
|
final.append((temp[0], temp[-1])) |
|
temp = [] |
|
return final |
|
|
|
def count_strand(strand): |
|
final = [] |
|
temp = [] |
|
for x in range(1, len(strand)): |
|
if strand[x] == strand[x-1] + 1: |
|
temp.append(strand[x-1]) |
|
temp.append(strand[x]) |
|
elif len(temp) != 0: |
|
final.append((temp[0], temp[-1])) |
|
temp = [] |
|
return final |
|
|
|
def print_output1(helix): |
|
helix_op = count_helix(helix) |
|
|
|
if len(helix_op) != 0: |
|
str1 = str(helix_op)[1:-1] |
|
return str1 |
|
|
|
else: |
|
return str('No Helix found.') |
|
|
|
def print_output2(strand): |
|
strand_op = count_strand(strand) |
|
|
|
if len(strand_op) != 0: |
|
str1 = str(strand_op)[1:-1] |
|
return str1 |
|
|
|
else: |
|
return str('No Beta strand found.') |
|
|
|
def predict(ProtienSequence): |
|
input = tokenizer(ProtienSequence, return_tensors='pt') |
|
with torch.inference_mode(): |
|
outputs = model(**input) |
|
output = outputs.logits.argmax(axis=2)[0].numpy() |
|
|
|
helix = [] |
|
strand = [] |
|
|
|
for i in range(len(output)): |
|
if output[i] != 0: |
|
if output[i] == 1: |
|
helix.append(i+1) |
|
else: |
|
strand.append(i+1) |
|
|
|
return print_output1(helix), print_output2(strand) |
|
|
|
iface = gr.Interface(fn=predict, |
|
inputs='text', |
|
outputs=[gr.Text(label='Helix'), |
|
gr.Text(label='Beta Strand')], |
|
title=title, |
|
description=description, |
|
article=article, |
|
examples=example_list) |
|
iface.launch() |