Qiwei97 commited on
Commit
6d2b6a2
1 Parent(s): 025e6e7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -0
app.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %cd question_generator/
2
+ %load questiongenerator.py
3
+ import streamlit as st
4
+ from transformers import pipeline
5
+ import pandas as pd
6
+ import numpy as np
7
+ from questiongenerator import QuestionGenerator
8
+ from questiongenerator import print_qa
9
+ from annotated_text import annotated_text
10
+ import requests
11
+
12
+
13
+ #-----------------------------------------
14
+
15
+ @st.cache(allow_output_mutation=True)
16
+ def analyze_text(text, url="https://bern.korea.ac.kr/plain"):
17
+ return requests.post(url, data={'sample_text': text}).json()
18
+
19
+ @st.cache(allow_output_mutation=True)
20
+ def load_models():
21
+
22
+ # Load all the models
23
+ summarizer = pipeline("summarization",
24
+ model='patrickvonplaten/led-large-16384-pubmed')
25
+
26
+ QnA = pipeline("question-answering",
27
+ model='franklu/pubmed_bert_squadv2')
28
+
29
+ qg = QuestionGenerator()
30
+
31
+ return summarizer, QnA, qg
32
+
33
+ summarizer, QnA, qg = load_models()
34
+
35
+ #-----------------------------------------
36
+
37
+ # sidebar
38
+ st.sidebar.header('Welcome to the Pubmed Analyzer!')
39
+ nav = st.sidebar.selectbox('Navigation', ['Summarization', 'Analyze Text'])
40
+
41
+ # st.markdown(
42
+ # """
43
+ # <style>
44
+ # section[data-testid="stSidebar"] div[class="css-1lcbmhc e1fqkh3o0"] {
45
+ # background-image: linear-gradient(#8993ab,#8993ab);
46
+ # color: white
47
+ # }
48
+ # </style>
49
+ # """, unsafe_allow_html=True
50
+ # )
51
+
52
+ #-----------------------------------------
53
+
54
+ # Summarization
55
+ def p_title(title):
56
+ st.markdown(f'<h3 style="text-align: left; color:#F63366; font-size:28px;">{title}</h3>', unsafe_allow_html=True)
57
+
58
+ if nav == 'Summarization':
59
+
60
+ st.markdown("<h3 style='text-align: center; color:grey;'>Pubmed Analyzer &#129302;</h3>", unsafe_allow_html=True)
61
+ st.text('')
62
+ p_title('Summarization')
63
+ st.text('')
64
+
65
+ example = "Autophagy maintains tumour growth through circulating arginine. Autophagy captures intracellular components and delivers them to lysosomes, where they are degraded and recycled to sustain metabolism and to enable survival during starvation1-5. Acute, whole-body deletion of the essential autophagy gene Atg7 in adult mice causes a systemic metabolic defect that manifests as starvation intolerance and gradual loss of white adipose tissue, liver glycogen and muscle mass1. Cancer cells also benefit from autophagy. Deletion of essential autophagy genes impairs the metabolism, proliferation, survival and malignancy of spontaneous tumours in models of autochthonous cancer6,7. Acute, systemic deletion of Atg7 or acute, systemic expression of a dominant-negative ATG4b in mice induces greater regression of KRAS-driven cancers than does tumour-specific autophagy deletion, which suggests that host autophagy promotes tumour growth1,8. Here we show that host-specific deletion of Atg7 impairs the growth of multiple allografted tumours, although not all tumour lines were sensitive to host autophagy status. Loss of autophagy in the host was associated with a reduction in circulating arginine, and the sensitive tumour cell lines were arginine auxotrophs owing to the lack of expression of the enzyme argininosuccinate synthase 1. Serum proteomic analysis identified the arginine-degrading enzyme arginase I (ARG1) in the circulation of Atg7-deficient hosts, and in vivo arginine metabolic tracing demonstrated that serum arginine was degraded to ornithine. ARG1 is predominantly expressed in the liver and can be released from hepatocytes into the circulation. Liver-specific deletion of Atg7 produced circulating ARG1, and reduced both serum arginine and tumour growth."
66
+
67
+ text = st.text_area("Use the example below or input your own text in English (between 1,000 and 10,000 characters)",
68
+ value=example, max_chars=10000, height=330)
69
+
70
+ st.session_state.text = text
71
+ st.session_state.qn_list = ""
72
+
73
+ if st.button('Summarize'):
74
+ if len(text) < 1000:
75
+ st.error('Please enter a text in English of minimum 1,000 characters')
76
+ else:
77
+ with st.spinner('Processing...'):
78
+ summary = summarizer(text,
79
+ min_length=50,
80
+ max_length=200,
81
+ num_beams=3,
82
+ no_repeat_ngram_size=2,
83
+ early_stopping=True,
84
+ clean_up_tokenization_spaces=True)[0]['summary_text']
85
+ # Clean output
86
+ summary = summary.split('.')
87
+ summary = [i.strip().capitalize() for i in summary]
88
+ summary = '. '.join(summary)[:-1]
89
+
90
+ sum_stats = (str(len(summary.split())) + ' words' + ' ('"{:.0%}".format(len(summary.split())/len(text.split())) + ' of original content)')
91
+ st.markdown('___')
92
+ st.caption(sum_stats)
93
+ st.success(summary)
94
+ st.session_state.summary = summary
95
+
96
+ #-----------------------------------------
97
+
98
+ # QnA + NER
99
+ if nav == 'Analyze Text':
100
+
101
+ # NER
102
+ try:
103
+ st.markdown("<h3 style='text-align: center; color:grey;'>Pubmed Analyzer &#129302;</h3>", unsafe_allow_html=True)
104
+ st.text('')
105
+ p_title('Named Entities')
106
+
107
+ entities = analyze_text(st.session_state.summary)
108
+ ent_df = pd.DataFrame(entities['denotations'])[['obj', 'span']]
109
+ ent_df['Phrase'] = ent_df.span.apply(lambda x: st.session_state.summary[x['begin']:x['end']])
110
+ ent_df['begin'] = ent_df.loc[:,'span'].apply(lambda x: x['begin'])
111
+
112
+ no_ent = pd.DataFrame(ent_df.span.tolist())
113
+ no_ent['lag'] = no_ent.end.shift()
114
+ no_ent = no_ent.fillna(0)
115
+ no_ent.lag = no_ent.lag.astype(int)
116
+ no_ent['Phrase'] = no_ent.apply(lambda x: st.session_state.summary[x['lag']:x['begin']], axis = 1)
117
+ no_ent['begin'] = no_ent.lag
118
+ no_ent = no_ent[['Phrase', 'begin']]
119
+
120
+ ent_df = ent_df.append(no_ent)
121
+ ent_df = ent_df.sort_values('begin').drop(columns=['span'])
122
+ ent_df = ent_df.fillna(0).reset_index(drop=True)
123
+
124
+ ner_text = []
125
+ for row in range(len(ent_df)):
126
+ phrase = ent_df['Phrase'][row] + " "
127
+ if ent_df['obj'][row] == 0:
128
+ ner_text.append((phrase))
129
+ elif ent_df['obj'][row] == 'disease':
130
+ ner_text.append((phrase, ent_df['obj'][row], "#8ef"))
131
+ elif ent_df['obj'][row] == 'drug':
132
+ ner_text.append((phrase, ent_df['obj'][row], "#faa"))
133
+ elif ent_df['obj'][row] == 'gene':
134
+ ner_text.append((phrase, ent_df['obj'][row], "#fea"))
135
+ else:
136
+ ner_text.append((phrase, ent_df['obj'][row], "#afa"))
137
+
138
+ ner_text.append(('.'))
139
+ annotated_text(*ner_text)
140
+
141
+ except:
142
+ st.error('Please summarize your text first.')
143
+
144
+ # QnA
145
+ if 'summary' in st.session_state:
146
+ st.markdown('___')
147
+ p_title('Question Answering')
148
+ question = st.text_input('Type your question here')
149
+ if st.button('Submit'):
150
+ with st.spinner('Processing...'):
151
+ if question:
152
+ answer = QnA(question=question, context=st.session_state.text)['answer']
153
+ st.success(answer)
154
+ else:
155
+ st.error("Please ask a question.")
156
+
157
+ # QG
158
+ if 'summary' in st.session_state:
159
+ if st.session_state.qn_list == "":
160
+ with st.spinner('Generating Suggestions...'):
161
+ text = st.session_state.text.split('.')
162
+ text = [i for i in text if i != ""]
163
+ text = [i + '.' for i in text]
164
+ text = np.random.choice(text, 5, replace=False)
165
+
166
+ st.session_state.qn_list = []
167
+ for i in text:
168
+ qn = qg.generate(i, num_questions=1, answer_style='all', use_evaluator=False)[0]['question']
169
+ st.session_state.qn_list.append(qn)
170
+
171
+ st.text('')
172
+ st.markdown("<h6 style=color:grey;'>Suggestions</h6>", unsafe_allow_html=True)
173
+ for i in st.session_state.qn_list:
174
+ st.caption(i.capitalize())