Spaces:
Runtime error
Runtime error
ULMER Louis (T0240644)
commited on
Commit
•
51636fd
1
Parent(s):
8746267
updating paraphraser
Browse files- .gitignore +1 -0
- README.md +3 -3
- app.py +10 -4
- backend/data_augmenter.py +13 -4
- footer.py +76 -0
- requirements.txt +2 -1
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.pyc
|
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
title: Paraphraser.ai
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.10.0
|
8 |
app_file: app.py
|
|
|
1 |
---
|
2 |
title: Paraphraser.ai
|
3 |
+
emoji: ✍️
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: green
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.10.0
|
8 |
app_file: app.py
|
app.py
CHANGED
@@ -1,19 +1,22 @@
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
from backend.data_augmenter import BackTranslatorAugmenter
|
4 |
-
|
5 |
os.environ['NO_PROXY'] = '127.0.0.1'
|
6 |
-
st.set_page_config(layout="wide", page_title="Paraphraser.AI", page_icon="
|
7 |
-
st.title('Paraphraser.AI
|
8 |
st.header("An intelligent sentence paraphraser")
|
|
|
9 |
|
10 |
model_selection = st.sidebar.selectbox(
|
11 |
'Select a paraphraser:',
|
12 |
-
['Vladimir 🧑🏼','Maria 👩🏽'],
|
13 |
)
|
14 |
|
15 |
input_text = st.text_area('Please type the text to paraphrase')
|
16 |
|
|
|
|
|
17 |
class DummyAugmenter:
|
18 |
def __init__(self, in_lang="en", out_lang="ru") -> None:
|
19 |
pass
|
@@ -25,8 +28,11 @@ if model_selection == 'Vladimir 🧑🏼':
|
|
25 |
model = BackTranslatorAugmenter(in_lang="en", out_lang="ru")
|
26 |
if model_selection == 'Maria 👩🏽':
|
27 |
model = BackTranslatorAugmenter(in_lang="en", out_lang="es")
|
|
|
|
|
28 |
|
29 |
if input_text:
|
30 |
st.header(f"Paraphrased text :")
|
31 |
st.write("".join(model.back_translate(input_text)))
|
32 |
|
|
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
from backend.data_augmenter import BackTranslatorAugmenter
|
4 |
+
from footer import footer
|
5 |
os.environ['NO_PROXY'] = '127.0.0.1'
|
6 |
+
st.set_page_config(layout="wide", page_title="Paraphraser.AI", page_icon="🤖✍️")
|
7 |
+
st.title('Paraphraser.AI 🤖✍️')
|
8 |
st.header("An intelligent sentence paraphraser")
|
9 |
+
st.markdown('''This is a demo of a system that can rewrite some given paragraphs with slight differences.''')
|
10 |
|
11 |
model_selection = st.sidebar.selectbox(
|
12 |
'Select a paraphraser:',
|
13 |
+
['Vladimir 🧑🏼','Maria 👩🏽','Jacques 👨'],
|
14 |
)
|
15 |
|
16 |
input_text = st.text_area('Please type the text to paraphrase')
|
17 |
|
18 |
+
|
19 |
+
|
20 |
class DummyAugmenter:
|
21 |
def __init__(self, in_lang="en", out_lang="ru") -> None:
|
22 |
pass
|
|
|
28 |
model = BackTranslatorAugmenter(in_lang="en", out_lang="ru")
|
29 |
if model_selection == 'Maria 👩🏽':
|
30 |
model = BackTranslatorAugmenter(in_lang="en", out_lang="es")
|
31 |
+
if model_selection == 'Jacques 👨':
|
32 |
+
model = BackTranslatorAugmenter(in_lang="en", out_lang="fr")
|
33 |
|
34 |
if input_text:
|
35 |
st.header(f"Paraphrased text :")
|
36 |
st.write("".join(model.back_translate(input_text)))
|
37 |
|
38 |
+
footer()
|
backend/data_augmenter.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import argparse
|
2 |
import time
|
3 |
from tqdm import tqdm
|
@@ -7,6 +8,8 @@ import os
|
|
7 |
import json
|
8 |
import torch
|
9 |
from dotenv import load_dotenv
|
|
|
|
|
10 |
|
11 |
load_dotenv()
|
12 |
from nltk.tokenize import sent_tokenize
|
@@ -63,9 +66,14 @@ class BackTranslatorAugmenter:
|
|
63 |
if verbose:
|
64 |
tic = time.time()
|
65 |
encoded_text = self.in_tokenizer(
|
66 |
-
text, return_tensors="pt", padding=True, truncation=True
|
67 |
).to(self.device)
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
in_preds = [
|
71 |
self.in_tokenizer.decode(
|
@@ -76,9 +84,10 @@ class BackTranslatorAugmenter:
|
|
76 |
if verbose:
|
77 |
print("in_pred : ", in_preds)
|
78 |
encoded_text = self.out_tokenizer(
|
79 |
-
in_preds, return_tensors="pt", padding=True, truncation=True
|
80 |
).to(self.device)
|
81 |
-
out_generated_ids = self.out_model.generate(
|
|
|
82 |
out_preds = [
|
83 |
self.out_tokenizer.decode(
|
84 |
gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True
|
|
|
1 |
+
#%%
|
2 |
import argparse
|
3 |
import time
|
4 |
from tqdm import tqdm
|
|
|
8 |
import json
|
9 |
import torch
|
10 |
from dotenv import load_dotenv
|
11 |
+
#%%
|
12 |
+
|
13 |
|
14 |
load_dotenv()
|
15 |
from nltk.tokenize import sent_tokenize
|
|
|
66 |
if verbose:
|
67 |
tic = time.time()
|
68 |
encoded_text = self.in_tokenizer(
|
69 |
+
text, return_tensors="pt", padding=True, truncation=True, return_overflowing_tokens=True
|
70 |
).to(self.device)
|
71 |
+
if encoded_text['num_truncated_tokens'][0] > 0:
|
72 |
+
print('Text is too long ')
|
73 |
+
return self.back_translate_long(text,verbose=verbose)
|
74 |
+
|
75 |
+
in_generated_ids = self.in_model.generate(inputs=encoded_text['input_ids'],
|
76 |
+
attention_mask=encoded_text["attention_mask"])
|
77 |
|
78 |
in_preds = [
|
79 |
self.in_tokenizer.decode(
|
|
|
84 |
if verbose:
|
85 |
print("in_pred : ", in_preds)
|
86 |
encoded_text = self.out_tokenizer(
|
87 |
+
in_preds, return_tensors="pt", padding=True, truncation=True,return_overflowing_tokens=True
|
88 |
).to(self.device)
|
89 |
+
out_generated_ids = self.out_model.generate(inputs=encoded_text['input_ids'],
|
90 |
+
attention_mask=encoded_text["attention_mask"])
|
91 |
out_preds = [
|
92 |
self.out_tokenizer.decode(
|
93 |
gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True
|
footer.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from htbuilder import HtmlElement, div, ul, li, br, hr, a, p, img, styles, classes, fonts
|
3 |
+
from htbuilder.units import percent, px
|
4 |
+
from htbuilder.funcs import rgba, rgb
|
5 |
+
|
6 |
+
|
7 |
+
def image(src_as_string, **style):
|
8 |
+
return img(src=src_as_string, style=styles(**style))
|
9 |
+
|
10 |
+
|
11 |
+
def link(link, text, **style):
|
12 |
+
return a(_href=link, _target="_blank", style=styles(**style))(text)
|
13 |
+
|
14 |
+
|
15 |
+
def layout(*args):
|
16 |
+
|
17 |
+
style = """
|
18 |
+
<style>
|
19 |
+
# MainMenu {visibility: hidden;}
|
20 |
+
footer {visibility: hidden;}
|
21 |
+
.stApp { bottom: 105px; }
|
22 |
+
</style>
|
23 |
+
"""
|
24 |
+
|
25 |
+
style_div = styles(
|
26 |
+
position="fixed",
|
27 |
+
left=0,
|
28 |
+
bottom=0,
|
29 |
+
margin=px(0, 0, 0, 0),
|
30 |
+
width=percent(100),
|
31 |
+
color="black",
|
32 |
+
text_align="center",
|
33 |
+
height="auto",
|
34 |
+
opacity=1
|
35 |
+
)
|
36 |
+
|
37 |
+
style_hr = styles(
|
38 |
+
display="block",
|
39 |
+
margin=px(8, 8, "auto", "auto"),
|
40 |
+
border_style="inset",
|
41 |
+
border_width=px(2)
|
42 |
+
)
|
43 |
+
|
44 |
+
body = p()
|
45 |
+
foot = div(
|
46 |
+
style=style_div
|
47 |
+
)(
|
48 |
+
hr(
|
49 |
+
style=style_hr
|
50 |
+
),
|
51 |
+
body
|
52 |
+
)
|
53 |
+
|
54 |
+
st.markdown(style, unsafe_allow_html=True)
|
55 |
+
|
56 |
+
for arg in args:
|
57 |
+
if isinstance(arg, str):
|
58 |
+
body(arg)
|
59 |
+
|
60 |
+
elif isinstance(arg, HtmlElement):
|
61 |
+
body(arg)
|
62 |
+
|
63 |
+
st.markdown(str(foot), unsafe_allow_html=True)
|
64 |
+
|
65 |
+
|
66 |
+
def footer():
|
67 |
+
myargs = [
|
68 |
+
"Made in ",
|
69 |
+
image('https://avatars3.githubusercontent.com/u/45109972?s=400&v=4',
|
70 |
+
width=px(25), height=px(25)),
|
71 |
+
br(),
|
72 |
+
"with ❤️ by Louis Ulmer ",
|
73 |
+
br(),
|
74 |
+
link("https://www.linkedin.com/in/louisulmer/", image('https://logospng.org/download/linkedin/logo-linkedin-icon-4096.png',width=px(25), height=px(25))),
|
75 |
+
]
|
76 |
+
layout(*myargs)
|
requirements.txt
CHANGED
@@ -5,4 +5,5 @@ transformers[sentencepiece]
|
|
5 |
pandas
|
6 |
scikit-learn
|
7 |
nltk
|
8 |
-
python-dotenv
|
|
|
|
5 |
pandas
|
6 |
scikit-learn
|
7 |
nltk
|
8 |
+
python-dotenv
|
9 |
+
htbuilder
|