Spaces:
Running
Running
Upload 12 files
Browse files- app/__init__.py +0 -0
- app/__pycache__/__init__.cpython-310.pyc +0 -0
- app/webui/README.md +79 -5
- app/webui/__pycache__/__init__.cpython-310.pyc +0 -0
- app/webui/__pycache__/app.cpython-310.pyc +0 -0
- app/webui/__pycache__/patch.cpython-310.pyc +0 -0
- app/webui/__pycache__/process.cpython-310.pyc +0 -0
- app/webui/app.py +9 -10
- app/webui/patch.py +19 -16
- app/webui/process.py +12 -20
- app/webui/requirements.txt +12 -0
app/__init__.py
ADDED
File without changes
|
app/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (136 Bytes). View file
|
|
app/webui/README.md
CHANGED
@@ -1,7 +1,81 @@
|
|
1 |
-
# Tranlsation-Agent-Webui
|
2 |
|
3 |
-
##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
-
git clone https://github.com/andrewyng/translation-agent.git
|
6 |
-
cd translation-agent\app\webui
|
7 |
-
pip install -
|
|
|
|
|
1 |
|
2 |
+
## Translation Agent WebUI
|
3 |
+
|
4 |
+
This repository contains a Gradio web UI for a translation agent that utilizes various language models for translation.
|
5 |
+
|
6 |
+
**Features:**
|
7 |
+
|
8 |
+
- **Tokenized Text:** Displays translated text with tokenization, highlighting differences between original and translated words.
|
9 |
+
- **Document Upload:** Supports uploading various document formats (PDF, TXT, DOC, etc.) for translation.
|
10 |
+
- **Multiple API Support:** Integrates with popular language models like:
|
11 |
+
- Groq
|
12 |
+
- OpenAI
|
13 |
+
- Cohere
|
14 |
+
- Ollama
|
15 |
+
- Together AI
|
16 |
+
- Hugging Face Inference API
|
17 |
+
...
|
18 |
+
Llama Index supported, easily extendable
|
19 |
+
|
20 |
+
|
21 |
+
**Getting Started**
|
22 |
+
|
23 |
+
1. **Install Dependencies:**
|
24 |
+
**Linux(Using Python Venv)**
|
25 |
+
```bash
|
26 |
+
git clone https://github.com/andrewyng/translation-agent.git
|
27 |
+
cd translation-agent
|
28 |
+
python -m venv web_ui
|
29 |
+
source web_ui/bin/activate
|
30 |
+
pip install -r app/webui/requirements.txt
|
31 |
+
|
32 |
+
```
|
33 |
+
**Windows**
|
34 |
+
```bash
|
35 |
+
git clone https://github.com/andrewyng/translation-agent.git
|
36 |
+
cd translation-agent
|
37 |
+
python -m venv web_ui
|
38 |
+
.\web_ui\Scripts\activate
|
39 |
+
pip install -r app/webui/requirements.txt
|
40 |
+
|
41 |
+
```
|
42 |
+
|
43 |
+
2. **Set API Keys:**
|
44 |
+
- Rename `.env.sample` to `.env`, you can add your API keys for each service:
|
45 |
+
|
46 |
+
```
|
47 |
+
OPENAI_API_KEY="sk-xxxxx" # Keep this field
|
48 |
+
GROQ_API_KEY="xxxxx"
|
49 |
+
COHERE_API_KEY="xxxxx"
|
50 |
+
TOGETHER_API_KEY="xxxxx"
|
51 |
+
HF_TOKEN="xxxxx"
|
52 |
+
```
|
53 |
+
- Then you can also set the API_KEY in webui.
|
54 |
+
|
55 |
+
3. **Run the Web UI:**
|
56 |
+
```bash
|
57 |
+
python -m app.webui.app
|
58 |
+
```
|
59 |
+
|
60 |
+
4. **Access the Web UI:**
|
61 |
+
Open your web browser and navigate to `http://127.0.0.1:7860/`.
|
62 |
+
|
63 |
+
**Usage:**
|
64 |
+
|
65 |
+
1. Select your desired translation API from the Endpoint dropdown menu.
|
66 |
+
2. If using Hugging Face API, enter your `HF_TOKEN` in the `api_key` textbox.
|
67 |
+
3. Input the source text or upload your document file.
|
68 |
+
4. Submit and get translation, the UI will display the translated text with tokenization and highlight differences.
|
69 |
+
|
70 |
+
**Customization:**
|
71 |
+
|
72 |
+
- **Add New LLMs:** Modify the `patch.py` file to integrate additional LLMs.
|
73 |
+
|
74 |
+
**Contributing:**
|
75 |
+
|
76 |
+
Contributions are welcome! Feel free to open issues or submit pull requests.
|
77 |
+
|
78 |
+
**License:**
|
79 |
+
|
80 |
+
This project is licensed under the MIT License.
|
81 |
|
|
|
|
|
|
app/webui/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (142 Bytes). View file
|
|
app/webui/__pycache__/app.cpython-310.pyc
ADDED
Binary file (4.22 kB). View file
|
|
app/webui/__pycache__/patch.cpython-310.pyc
ADDED
Binary file (3.37 kB). View file
|
|
app/webui/__pycache__/process.cpython-310.pyc
ADDED
Binary file (2.33 kB). View file
|
|
app/webui/app.py
CHANGED
@@ -7,7 +7,7 @@ sys.path.insert(0, project_root)
|
|
7 |
|
8 |
import re
|
9 |
import gradio as gr
|
10 |
-
from app.webui.process import model_load,
|
11 |
from llama_index.core import SimpleDirectoryReader
|
12 |
|
13 |
def huanik(
|
@@ -63,8 +63,8 @@ def update_model(endpoint):
|
|
63 |
return gr.update(value=endpoint_model_map[endpoint])
|
64 |
|
65 |
def read_doc(file):
|
66 |
-
docs = SimpleDirectoryReader(input_files=file).load_data()
|
67 |
-
return docs
|
68 |
|
69 |
TITLE = """
|
70 |
<h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
|
@@ -82,7 +82,7 @@ CSS = """
|
|
82 |
}
|
83 |
"""
|
84 |
|
85 |
-
with gr.Blocks(theme="soft", css=CSS) as demo:
|
86 |
gr.Markdown(TITLE)
|
87 |
with gr.Row():
|
88 |
with gr.Column(scale=1):
|
@@ -94,7 +94,7 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
|
|
94 |
model = gr.Textbox(label="Model", value="gpt-4o", )
|
95 |
api_key = gr.Textbox(label="API_KEY", type="password", )
|
96 |
source_lang = gr.Textbox(
|
97 |
-
label="Source Lang
|
98 |
value="English",
|
99 |
)
|
100 |
target_lang = gr.Textbox(
|
@@ -130,14 +130,14 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
|
|
130 |
value="How we live is so different from how we ought to live that he who studies "+\
|
131 |
"what ought to be done rather than what is done will learn the way to his downfall "+\
|
132 |
"rather than to his preservation.",
|
133 |
-
lines=
|
134 |
)
|
135 |
with gr.Tab("Final"):
|
136 |
-
output_final = gr.Textbox(label="FInal Translation", lines=
|
137 |
with gr.Tab("Initial"):
|
138 |
-
output_init = gr.Textbox(label="Init Translation", lines=
|
139 |
with gr.Tab("Reflection"):
|
140 |
-
output_reflect = gr.Textbox(label="Reflection", lines=
|
141 |
with gr.Tab("Diff"):
|
142 |
output_diff = gr.HighlightedText(visible = False)
|
143 |
with gr.Row():
|
@@ -146,7 +146,6 @@ with gr.Blocks(theme="soft", css=CSS) as demo:
|
|
146 |
clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
|
147 |
|
148 |
endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
|
149 |
-
source_text.change(lang_detector, source_text, source_lang)
|
150 |
submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
|
151 |
upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
|
152 |
|
|
|
7 |
|
8 |
import re
|
9 |
import gradio as gr
|
10 |
+
from app.webui.process import model_load, diff_texts, translator
|
11 |
from llama_index.core import SimpleDirectoryReader
|
12 |
|
13 |
def huanik(
|
|
|
63 |
return gr.update(value=endpoint_model_map[endpoint])
|
64 |
|
65 |
def read_doc(file):
|
66 |
+
docs = SimpleDirectoryReader(input_files=[file]).load_data()
|
67 |
+
return docs[0].text
|
68 |
|
69 |
TITLE = """
|
70 |
<h1><a href="https://github.com/andrewyng/translation-agent">Translation-Agent</a> webUI</h1>
|
|
|
82 |
}
|
83 |
"""
|
84 |
|
85 |
+
with gr.Blocks(theme="soft", css=CSS, fill_height=True) as demo:
|
86 |
gr.Markdown(TITLE)
|
87 |
with gr.Row():
|
88 |
with gr.Column(scale=1):
|
|
|
94 |
model = gr.Textbox(label="Model", value="gpt-4o", )
|
95 |
api_key = gr.Textbox(label="API_KEY", type="password", )
|
96 |
source_lang = gr.Textbox(
|
97 |
+
label="Source Lang",
|
98 |
value="English",
|
99 |
)
|
100 |
target_lang = gr.Textbox(
|
|
|
130 |
value="How we live is so different from how we ought to live that he who studies "+\
|
131 |
"what ought to be done rather than what is done will learn the way to his downfall "+\
|
132 |
"rather than to his preservation.",
|
133 |
+
lines=10,
|
134 |
)
|
135 |
with gr.Tab("Final"):
|
136 |
+
output_final = gr.Textbox(label="FInal Translation", lines=10, show_copy_button=True)
|
137 |
with gr.Tab("Initial"):
|
138 |
+
output_init = gr.Textbox(label="Init Translation", lines=10, show_copy_button=True)
|
139 |
with gr.Tab("Reflection"):
|
140 |
+
output_reflect = gr.Textbox(label="Reflection", lines=10, show_copy_button=True)
|
141 |
with gr.Tab("Diff"):
|
142 |
output_diff = gr.HighlightedText(visible = False)
|
143 |
with gr.Row():
|
|
|
146 |
clear = gr.ClearButton([source_text, output_init, output_reflect, output_final])
|
147 |
|
148 |
endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
|
|
|
149 |
submit.click(fn=huanik, inputs=[endpoint, model, api_key, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output], outputs=[output_init, output_reflect, output_final, output_diff])
|
150 |
upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
|
151 |
|
app/webui/patch.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
# a monkey patch to use llama-index completion
|
2 |
import os
|
3 |
-
from typing import Union
|
4 |
-
|
5 |
-
from src.translation_agent.utils import *
|
6 |
-
|
7 |
|
8 |
from llama_index.llms.groq import Groq
|
9 |
from llama_index.llms.cohere import Cohere
|
@@ -28,12 +26,12 @@ def model_load(
|
|
28 |
if endpoint == "Groq":
|
29 |
llm = Groq(
|
30 |
model=model,
|
31 |
-
api_key=api_key,
|
32 |
)
|
33 |
elif endpoint == "Cohere":
|
34 |
llm = Cohere(
|
35 |
model=model,
|
36 |
-
api_key=api_key,
|
37 |
)
|
38 |
elif endpoint == "OpenAI":
|
39 |
llm = OpenAI(
|
@@ -43,16 +41,16 @@ def model_load(
|
|
43 |
elif endpoint == "TogetherAI":
|
44 |
llm = TogetherLLM(
|
45 |
model=model,
|
46 |
-
api_key=api_key,
|
47 |
)
|
48 |
-
elif endpoint == "
|
49 |
llm = Ollama(
|
50 |
model=model,
|
51 |
request_timeout=120.0)
|
52 |
elif endpoint == "Huggingface":
|
53 |
llm = HuggingFaceInferenceAPI(
|
54 |
model_name=model,
|
55 |
-
token=api_key,
|
56 |
task="text-generation",
|
57 |
)
|
58 |
Settings.llm = llm
|
@@ -63,10 +61,7 @@ def model_load(
|
|
63 |
Settings.num_output = num_output
|
64 |
|
65 |
|
66 |
-
|
67 |
-
def completion_wrapper(func: Callable) -> Callable:
|
68 |
-
@wraps(func)
|
69 |
-
def wrapper(
|
70 |
prompt: str,
|
71 |
system_message: str = "You are a helpful assistant.",
|
72 |
temperature: float = 0.3,
|
@@ -126,7 +121,15 @@ def completion_wrapper(func: Callable) -> Callable:
|
|
126 |
)
|
127 |
return response.message.content
|
128 |
|
129 |
-
|
130 |
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# a monkey patch to use llama-index completion
|
2 |
import os
|
3 |
+
from typing import Union
|
4 |
+
import src.translation_agent.utils as utils
|
|
|
|
|
5 |
|
6 |
from llama_index.llms.groq import Groq
|
7 |
from llama_index.llms.cohere import Cohere
|
|
|
26 |
if endpoint == "Groq":
|
27 |
llm = Groq(
|
28 |
model=model,
|
29 |
+
api_key=api_key if api_key else os.getenv("GROQ_API_KEY"),
|
30 |
)
|
31 |
elif endpoint == "Cohere":
|
32 |
llm = Cohere(
|
33 |
model=model,
|
34 |
+
api_key=api_key if api_key else os.getenv("COHERE_API_KEY"),
|
35 |
)
|
36 |
elif endpoint == "OpenAI":
|
37 |
llm = OpenAI(
|
|
|
41 |
elif endpoint == "TogetherAI":
|
42 |
llm = TogetherLLM(
|
43 |
model=model,
|
44 |
+
api_key=api_key if api_key else os.getenv("TOGETHER_API_KEY"),
|
45 |
)
|
46 |
+
elif endpoint == "Ollama":
|
47 |
llm = Ollama(
|
48 |
model=model,
|
49 |
request_timeout=120.0)
|
50 |
elif endpoint == "Huggingface":
|
51 |
llm = HuggingFaceInferenceAPI(
|
52 |
model_name=model,
|
53 |
+
token=api_key if api_key else os.getenv("HF_TOKEN"),
|
54 |
task="text-generation",
|
55 |
)
|
56 |
Settings.llm = llm
|
|
|
61 |
Settings.num_output = num_output
|
62 |
|
63 |
|
64 |
+
def get_completion(
|
|
|
|
|
|
|
65 |
prompt: str,
|
66 |
system_message: str = "You are a helpful assistant.",
|
67 |
temperature: float = 0.3,
|
|
|
121 |
)
|
122 |
return response.message.content
|
123 |
|
124 |
+
utils.get_completion = get_completion
|
125 |
|
126 |
+
one_chunk_initial_translation = utils.one_chunk_initial_translation
|
127 |
+
one_chunk_reflect_on_translation = utils.one_chunk_reflect_on_translation
|
128 |
+
one_chunk_improve_translation = utils.one_chunk_improve_translation
|
129 |
+
one_chunk_translate_text = utils.one_chunk_translate_text
|
130 |
+
num_tokens_in_string = utils.num_tokens_in_string
|
131 |
+
multichunk_initial_translation = utils.multichunk_initial_translation
|
132 |
+
multichunk_reflect_on_translation = utils.multichunk_reflect_on_translation
|
133 |
+
multichunk_improve_translation = utils.multichunk_improve_translation
|
134 |
+
multichunk_translation = utils.multichunk_translation
|
135 |
+
calculate_chunk_size =utils.calculate_chunk_size
|
app/webui/process.py
CHANGED
@@ -1,34 +1,26 @@
|
|
1 |
-
|
2 |
-
|
3 |
from difflib import Differ
|
4 |
from icecream import ic
|
5 |
-
from app.webui.patch import
|
|
|
|
|
6 |
from llama_index.core.node_parser import SentenceSplitter
|
7 |
|
8 |
-
def lang_detector(text):
|
9 |
-
min_chars = 5
|
10 |
-
if len(text) < min_chars:
|
11 |
-
return "Input text too short"
|
12 |
-
try:
|
13 |
-
detector = Detector(text).language
|
14 |
-
lang_info = str(detector)
|
15 |
-
code = re.search(r"name: (\w+)", lang_info).group(1)
|
16 |
-
return code
|
17 |
-
except Exception as e:
|
18 |
-
return f"ERROR:{str(e)}"
|
19 |
|
20 |
-
|
21 |
-
# Use polyglot to tokenize the text
|
22 |
-
polyglot_text = Text(text)
|
23 |
-
words = polyglot_text.words
|
24 |
|
|
|
|
|
|
|
25 |
# Check if the text contains spaces
|
26 |
if ' ' in text:
|
27 |
# Create a list of words and spaces
|
28 |
tokens = []
|
29 |
for word in words:
|
30 |
tokens.append(word)
|
31 |
-
|
|
|
32 |
return tokens[:-1] # Remove the last space
|
33 |
else:
|
34 |
return words
|
@@ -62,7 +54,7 @@ def translator(
|
|
62 |
target_lang,
|
63 |
source_text,
|
64 |
country,
|
65 |
-
max_tokens=
|
66 |
):
|
67 |
"""Translate the source_text from source_lang to target_lang."""
|
68 |
num_tokens_in_text = num_tokens_in_string(source_text)
|
|
|
1 |
+
import re
|
2 |
+
import nltk
|
3 |
from difflib import Differ
|
4 |
from icecream import ic
|
5 |
+
from app.webui.patch import model_load,num_tokens_in_string,one_chunk_initial_translation, one_chunk_reflect_on_translation, one_chunk_improve_translation
|
6 |
+
from app.webui.patch import calculate_chunk_size, multichunk_initial_translation, multichunk_reflect_on_translation, multichunk_improve_translation
|
7 |
+
|
8 |
from llama_index.core.node_parser import SentenceSplitter
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
+
nltk.download('punkt', quiet=True)
|
|
|
|
|
|
|
12 |
|
13 |
+
def tokenize(text):
|
14 |
+
# Use nltk to tokenize the text
|
15 |
+
words = nltk.word_tokenize(text)
|
16 |
# Check if the text contains spaces
|
17 |
if ' ' in text:
|
18 |
# Create a list of words and spaces
|
19 |
tokens = []
|
20 |
for word in words:
|
21 |
tokens.append(word)
|
22 |
+
if not word.startswith("'") and not word.endswith("'"): # Avoid adding space after punctuation
|
23 |
+
tokens.append(' ') # Add space after each word
|
24 |
return tokens[:-1] # Remove the last space
|
25 |
else:
|
26 |
return words
|
|
|
54 |
target_lang,
|
55 |
source_text,
|
56 |
country,
|
57 |
+
max_tokens=1000,
|
58 |
):
|
59 |
"""Translate the source_text from source_lang to target_lang."""
|
60 |
num_tokens_in_text = num_tokens_in_string(source_text)
|
app/webui/requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
llama-index
|
2 |
+
llama-index-llms-groq
|
3 |
+
llama-index-llms-openai
|
4 |
+
llama-index-llms-cohere
|
5 |
+
llama-index-llms-together
|
6 |
+
llama-index-llms-ollama
|
7 |
+
llama-index-llms-huggingface-api
|
8 |
+
tiktoken
|
9 |
+
icecream
|
10 |
+
nltk
|
11 |
+
langchain-text-splitters
|
12 |
+
gradio
|