File size: 4,093 Bytes
1b203be
582a4e0
1b203be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
import torch
from transformers import AutoModelForTokenClassification, AutoTokenizer

title = "Protien Token Classification 🧬."
description = "Finds the position of Helix and Beta strand in the Protein Sequence."
article = 'Created from finetuning ESM2_150M'

model = AutoModelForTokenClassification.from_pretrained('./Model')
tokenizer = AutoTokenizer.from_pretrained('facebook/esm2_t30_150M_UR50D')

example_list = ['MENFTALFGAQADPPPPPTALGFGPGKPPPPPPPPAGGGPGTAPPPTAATAPPGADKSGAGCGPFYLMRELPGSTELTGSTNLITHYNLEQAYNKFCGKKVKEKLSNFLPDLPGMIDLPGSHDNSSLRSLIEKPPILSSSFNPITGTMLAGFRLHTGPLPEQCRLMHIQPPKKKNKHKHKQSRTQDPVPPETPSDSDHKKKKKKKEEDPDRKRKKKEKKKKKNRHSPDHPGMGSSQASSSSSLR',
 'MAFSDLTSRTVHLYDNWIKDADPRVEDWLLMSSPLPQTILLGFYVYFVTSLGPKLMENRKPFELKKAMITYNFFIVLFSVYMCYEFVMSGWGIGYSFRCDIVDYSRSPTALRMARTCWLYYFSKFIELLDTIFFVLRKKNSQVTFLHVFHHTIMPWTWWFGVKFAAGGLGTFHALLNTAVHVVMYSYYGLSALGPAYQKYLWWKKYLTSLQLVQFVIVAIHISQFFFMEDCKYQFPVFACIIMSYSFMFLLLFLHFWYRAYTKGQRLPKTVKNGTCKNKDN',
 'MYPSNKKKKVWREEKERLLKMTLEERRKEYLRDYIPLNSILSWKEEMKGKGQNDEENTQETSQVKKSLTEKVSLYRGDITLLEVDAIVNAANASLLGGGGVDGCIHRAAGPCLLAECRNLNGCDTGHAKITCGYDLPAKYVIHTVGPIARGHINGSHKEDLANCYKSSLKLVKENNIRSVAFPCISTGIYGFPNEPAAVIALNTIKEWLAKNHHEVDRIIFCVFLEVDFKIYKKKMNEFFSVDDNNEEEEDVEMKEDSDENGPEEKQSVEEMEEQSQDADGVNTVTVPGPASEEAVEDCKDEDFAKDENITKGGEVTDHSVRDQDHPDGQENDSTKNEIKIETESQSSYMETEELSSNQEDAVIVEQPEVIPLTEDQEEKEGEKAPGEDTPRMPGKSEGSSDLENTPGPDAGAQDEAKEQRNGTK',
 'MAGQHLPVPRLEGVSREQFMQHLYPQRKPLVLEGIDLGPCTSKWTVDYLSQVGGKKEVKIHVAAVAQMDFISKNFVYRTLPFDQLVQRAAEEKHKEFFVSEDEKYYLRSLGEDPRKDVADIRKQFPLLKGDIKFPEFFKEEQFFSSVFRISSPGLQLWTHYDVMDNLLIQVTGKKRVVLFSPRDAQYLYLKGTKSEVLNIDNPDLAKYPLFSKARRYECSLEAGDVLFIPALWFHNVISEEFGVGVNIFWKHLPSECYDKTDTYGNKDPTAASRAAQILDRALKTLAELPEEYRDFYARRMVLHIQDKAYSKNSE',
 'MEAGPPGSARPAEPGPCLSGQRGADHTASASLQSVAGTEPGRHPQAVAAVLPAGGCGERMGVPTPKQFCPILERPLISYTLQALERVCWIKDIVVAVTGENMEVMKSIIQKYQHKRISLVEAGVTRHRSIFNGLKALAEDQINSKLSKPEVVIIHDAVRPFVEEGVLLKVVTAAKEHGAAGAIRPLVSTVVSPSADGCLDYSLERARHRASEMPQAFLFDVIYEAYQQCSDYDLEFGTECLQLALKYCCTKAKLVEGSPDLWKVTYKRDLYAAESIIKERISQEICVVMDTEEDNKHVGHLLEEVLKSELNHVKVTSEALGHAGRHLQQIILDQCYNFVCVNVTTSDFQETQKLLSMLEESSLCILYPVVVVSVHFLDFKLVPPSQKMENLMQIREFAKEVKERNILLYGLLISYPQDDQKLQESLRQGAIIIASLIKERNSGLIGQLLIA']

def count_helix(helix):
    final = []
    temp = []
    for x in range(1, len(helix)):
        if helix[x] == helix[x-1] + 1:
            temp.append(helix[x-1])
            temp.append(helix[x])
        elif len(temp) != 0: 
            final.append((temp[0], temp[-1]))
            temp = []
    return final

def count_strand(strand):
    final = []
    temp = []
    for x in range(1, len(strand)):
        if strand[x] == strand[x-1] + 1:
            temp.append(strand[x-1])
            temp.append(strand[x])
        elif len(temp) != 0: 
            final.append((temp[0], temp[-1]))
            temp = []
    return final

def print_output1(helix):
    helix_op = count_helix(helix)

    if len(helix_op) != 0:
        str1 = str(helix_op)[1:-1]
        return str1
    
    else:
        return str('No Helix found.')
    
def print_output2(strand):
    strand_op = count_strand(strand)

    if len(strand_op) != 0:
        str1 = str(strand_op)[1:-1]
        return str1 

    else:
        return str('No Beta strand found.')

def predict(ProtienSequence):
    input = tokenizer(ProtienSequence, return_tensors='pt')
    with torch.inference_mode():
        outputs = model(**input)
    output = outputs.logits.argmax(axis=2)[0].numpy()

    helix = []
    strand = []

    for i in range(len(output)):
        if output[i] != 0:
            if output[i] == 1:
                helix.append(i+1)
            else:
                strand.append(i+1)
    
    return print_output1(helix), print_output2(strand)

iface = gr.Interface(fn=predict,
                    inputs='text',
                    outputs=[gr.Text(label='Helix'),
                             gr.Text(label='Beta Strand')],
                             title=title,
                             description=description,
                             article=article,
                             examples=example_list)
iface.launch()