Spaces:
Sleeping
Sleeping
Sravan1214
commited on
Commit
•
82ea83d
1
Parent(s):
91d58be
checking the code
Browse files
app.py
CHANGED
@@ -10,11 +10,8 @@ from transformers import (DebertaTokenizerFast,
|
|
10 |
import tensorflow as tf
|
11 |
import spacy
|
12 |
import streamlit as st
|
13 |
-
from scraper import scrape_text
|
14 |
|
15 |
|
16 |
-
os.environ['TF_USE_LEGACY_KERAS'] = "1"
|
17 |
-
|
18 |
class NERLabelEncoder:
|
19 |
'''
|
20 |
Label Encoder to encode and decode the entity labels
|
@@ -75,7 +72,6 @@ def load_ner_models():
|
|
75 |
|
76 |
ner_model, ner_label_encoder, ner_tokenizer, nlp = load_ner_models()
|
77 |
|
78 |
-
|
79 |
############ NER MODEL & VARS INITIALIZATION END ####################
|
80 |
|
81 |
############ NER LOGIC START ####################
|
@@ -151,9 +147,10 @@ def ner_inference_long_text(txt):
|
|
151 |
entities = []
|
152 |
doc = nlp(txt)
|
153 |
for sent in doc.sents:
|
154 |
-
entities.
|
155 |
return entities
|
156 |
|
|
|
157 |
def get_ner_text(article_txt, ner_result):
|
158 |
res_txt = ''
|
159 |
start = 0
|
@@ -177,7 +174,6 @@ def get_ner_text(article_txt, ner_result):
|
|
177 |
|
178 |
############ NER LOGIC END ####################
|
179 |
|
180 |
-
|
181 |
############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
|
182 |
SUMM_CHECKPOINT = "facebook/bart-base"
|
183 |
SUMM_INPUT_N_TOKENS = 400
|
@@ -213,23 +209,13 @@ def summ_inference_tokenize(input_: list, n_tokens: int):
|
|
213 |
tokenized_data = summ_tokenizer(text=input_, max_length=SUMM_TARGET_N_TOKENS, truncation=True, padding="max_length", return_tensors="tf")
|
214 |
return summ_tokenizer, tokenized_data
|
215 |
|
216 |
-
def clean_summary(summary: str):
|
217 |
-
summary = summary.strip()
|
218 |
-
if summary[-1] != '.':
|
219 |
-
sents = summary.split(". ")
|
220 |
-
summary = ". ".join(sents[:-1])
|
221 |
-
summary += "."
|
222 |
-
summary = re.sub(r'^-', "", summary)
|
223 |
-
summary = summary.strip()
|
224 |
-
if len(summary) <= 5:
|
225 |
-
summary = ""
|
226 |
-
return summary
|
227 |
-
|
228 |
def summ_inference(txt: str):
|
229 |
txt = summ_preprocess(txt)
|
230 |
-
|
|
|
231 |
pred = summ_model.generate(**tokenized_data, max_new_tokens=SUMM_TARGET_N_TOKENS)
|
232 |
-
result =
|
|
|
233 |
return result
|
234 |
############ SUMMARIZATION MODEL & VARS INITIALIZATION END ####################
|
235 |
|
|
|
10 |
import tensorflow as tf
|
11 |
import spacy
|
12 |
import streamlit as st
|
|
|
13 |
|
14 |
|
|
|
|
|
15 |
class NERLabelEncoder:
|
16 |
'''
|
17 |
Label Encoder to encode and decode the entity labels
|
|
|
72 |
|
73 |
ner_model, ner_label_encoder, ner_tokenizer, nlp = load_ner_models()
|
74 |
|
|
|
75 |
############ NER MODEL & VARS INITIALIZATION END ####################
|
76 |
|
77 |
############ NER LOGIC START ####################
|
|
|
147 |
entities = []
|
148 |
doc = nlp(txt)
|
149 |
for sent in doc.sents:
|
150 |
+
entities.extend(ner_inference(sent.text))
|
151 |
return entities
|
152 |
|
153 |
+
|
154 |
def get_ner_text(article_txt, ner_result):
|
155 |
res_txt = ''
|
156 |
start = 0
|
|
|
174 |
|
175 |
############ NER LOGIC END ####################
|
176 |
|
|
|
177 |
############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
|
178 |
SUMM_CHECKPOINT = "facebook/bart-base"
|
179 |
SUMM_INPUT_N_TOKENS = 400
|
|
|
209 |
tokenized_data = summ_tokenizer(text=input_, max_length=SUMM_TARGET_N_TOKENS, truncation=True, padding="max_length", return_tensors="tf")
|
210 |
return summ_tokenizer, tokenized_data
|
211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
def summ_inference(txt: str):
|
213 |
txt = summ_preprocess(txt)
|
214 |
+
test_data = [txt]
|
215 |
+
inference_tokenizer, tokenized_data = summ_inference_tokenize(input_=test_data, n_tokens=SUMM_INPUT_N_TOKENS)
|
216 |
pred = summ_model.generate(**tokenized_data, max_new_tokens=SUMM_TARGET_N_TOKENS)
|
217 |
+
result = inference_tokenizer.decode(pred[0])
|
218 |
+
result = re.sub("<.*?>", "", result).strip()
|
219 |
return result
|
220 |
############ SUMMARIZATION MODEL & VARS INITIALIZATION END ####################
|
221 |
|