Spaces:

CVPR
/

ml-talking-face

Running

App Files Files Community

형규 송 commited on Jun 17, 2022

Commit

6bd388c

•

1 Parent(s): e0a78a8

add Google Perspective API

Browse files

(`d9de3ce` in https://bitbucket.org/maum-system/cvpr22-demo-gradio)

Files changed (10) hide show

.gitignore +2 -1
app.py +39 -50
docs/{description.txt → description.md} +4 -0
requirements.txt +2 -1
toxicity_estimator/__init__.py +1 -0
toxicity_estimator/module.py +51 -0
translator/__init__.py +1 -1
translator/module.py +48 -0
translator/v3.py +1 -1
vacant.mp4 +0 -0

.gitignore CHANGED Viewed

@@ -10,4 +10,5 @@ output_file/*
 *.png
 !background_image/*
 *.mkv
-gradio_queue.db*

 *.png
 !background_image/*
 *.mkv
+gradio_queue.db*
+!vacant.mp4

app.py CHANGED Viewed

@@ -7,55 +7,19 @@ TRANSLATION_APIKEY_URL = os.environ['TRANSLATION_APIKEY_URL']
 GOOGLE_APPLICATION_CREDENTIALS = os.environ['GOOGLE_APPLICATION_CREDENTIALS']
 subprocess.call(f"wget --no-check-certificate -O {GOOGLE_APPLICATION_CREDENTIALS} {TRANSLATION_APIKEY_URL}", shell=True)
 import gradio as gr
 from client_rest import RestAPIApplication
 from pathlib import Path
 import argparse
 import threading
-from translator import GoogleAuthTranslation
 import yaml
 TITLE = Path("docs/title.txt").read_text()
-DESCRIPTION = Path("docs/description.txt").read_text()
-class Translator:
-    def __init__(self, yaml_path='lang.yaml'):
-        self.google_translation = GoogleAuthTranslation(project_id="cvpr-2022-demonstration")
-        with open(yaml_path) as f:
-            self.supporting_languages = yaml.load(f, Loader=yaml.FullLoader)
-    def _get_text_with_lang(self, text, lang):
-        lang_detected = self.google_translation.detect(text)
-        print(lang_detected, lang)
-        if lang is None:
-            lang = lang_detected
-        if lang != lang_detected:
-            target_text = self.google_translation.translate(text, lang=lang)
-        else:
-            target_text = text
-        return target_text, lang
-    def _convert_lang_from_index(self, lang):
-        lang_finder = [name for name in self.supporting_languages
-                        if self.supporting_languages[name]['language'] == lang]
-        if len(lang_finder) == 1:
-            lang = lang_finder[0]
-        else:
-            raise AssertionError(f"Given language index can't be understood! | lang: {lang}")
-        return lang
-    def get_translation(self, text, lang, use_translation=True):
-        lang_ = self._convert_lang_from_index(lang)
-        if use_translation:
-            target_text, _ = self._get_text_with_lang(text, lang_)
-        else:
-            target_text = text
-        return target_text, lang
 class GradioApplication:
@@ -72,6 +36,7 @@ class GradioApplication:
                                 "background_image/river.mp4",
                                 "background_image/sky.mp4"]
         self.translator = Translator()
         self.rest_application = RestAPIApplication(rest_ip, rest_port)
         self.output_dir = Path("output_file")
@@ -118,24 +83,49 @@ class GradioApplication:
             is_video_background = False
         return background_data, is_video_background
     def infer(self, text, lang, duration_rate, action, background_index):
         self._counter_file_seed()
         print(f"File Seed: {self._file_seed}")
-        target_text, lang_dest = self.translator.get_translation(text, lang)
-        lang_rpc_code = self.get_lang_code(lang_dest)
         background_data, is_video_background = self.get_background_data(background_index)
         video_data = self.rest_application.get_video(target_text, lang_rpc_code, duration_rate, action.lower(),
                                                      background_data, is_video_background)
-        print(len(video_data))
         video_filename = self.output_dir / f"{self._file_seed:02d}.mkv"
         with open(video_filename, "wb") as video_file:
             video_file.write(video_data)
-        return f"Language: {lang_dest}\nText: \n{target_text}", str(video_filename)
     def run(self, server_port=7860, share=False):
         try:
@@ -176,11 +166,10 @@ def prepare_input():
 def prepare_output():
-    translation_result_otuput = gr.Textbox(type="str",
-                                                   label="Translation Result")
     video_output = gr.Video(format='mp4')
-    return [translation_result_otuput, video_output]
 def parse_args():

 GOOGLE_APPLICATION_CREDENTIALS = os.environ['GOOGLE_APPLICATION_CREDENTIALS']
 subprocess.call(f"wget --no-check-certificate -O {GOOGLE_APPLICATION_CREDENTIALS} {TRANSLATION_APIKEY_URL}", shell=True)
+TOXICITY_THRESHOLD = float(os.getenv('TOXICITY_THRESHOLD', 0.7))
 import gradio as gr
+from toxicity_estimator import PerspectiveAPI
+from translator import Translator
 from client_rest import RestAPIApplication
 from pathlib import Path
 import argparse
 import threading
 import yaml
 TITLE = Path("docs/title.txt").read_text()
+DESCRIPTION = Path("docs/description.md").read_text()
 class GradioApplication:
                                 "background_image/river.mp4",
                                 "background_image/sky.mp4"]
+        self.perspective_api = PerspectiveAPI()
         self.translator = Translator()
         self.rest_application = RestAPIApplication(rest_ip, rest_port)
         self.output_dir = Path("output_file")
             is_video_background = False
         return background_data, is_video_background
+    @staticmethod
+    def return_format(toxicity_prob, target_text, lang_dest, video_filename):
+        return {'Toxicity': toxicity_prob}, f"Language: {lang_dest}\nText: \n{target_text}", str(video_filename)
     def infer(self, text, lang, duration_rate, action, background_index):
         self._counter_file_seed()
         print(f"File Seed: {self._file_seed}")
+        toxicity_prob = 0.0
+        target_text = "(Sorry, it seems that the input text is too toxic.)"
+        lang_dest = ""
+        video_filename = "vacant.mp4"
+        # Toxicity estimation
+        try:
+            toxicity_prob = self.perspective_api.get_score(text)
+        except Exception as e:  # when Perspective API doesn't work
+            pass
+        if toxicity_prob > TOXICITY_THRESHOLD:
+            return self.return_format(toxicity_prob, target_text, lang_dest, video_filename)
+        # Google Translate API
+        try:
+            target_text, lang_dest = self.translator.get_translation(text, lang)
+            lang_rpc_code = self.get_lang_code(lang_dest)
+        except Exception as e:
+            target_text = f"Error from language translation: ({e})"
+            lang_dest = ""
+            return self.return_format(toxicity_prob, target_text, lang_dest, video_filename)
+        # Video Inference
         background_data, is_video_background = self.get_background_data(background_index)
         video_data = self.rest_application.get_video(target_text, lang_rpc_code, duration_rate, action.lower(),
                                                      background_data, is_video_background)
+        print(f"Video data size: {len(video_data)}")
         video_filename = self.output_dir / f"{self._file_seed:02d}.mkv"
         with open(video_filename, "wb") as video_file:
             video_file.write(video_data)
+        return {'Toxicity': toxicity_prob}, f"Language: {lang_dest}\nText: \n{target_text}", str(video_filename)
     def run(self, server_port=7860, share=False):
         try:
 def prepare_output():
+    toxicity_output = gr.Label(num_top_classes=1, label="Toxicity (from Perspective API)")
+    translation_result_otuput = gr.Textbox(type="str", label="Translation Result")
     video_output = gr.Video(format='mp4')
+    return [toxicity_output, translation_result_otuput, video_output]
 def parse_args():

docs/{description.txt → description.md} RENAMED Viewed

@@ -3,4 +3,8 @@ You can provide the input text in one of the four languages: Chinese (Mandarin),
 You may also select the target language, the language of the output speech.
 If the input text language and the target language are different, the input text will be translated to the target language using Google Translate API.
 (2022.06.05.) Due to the latency from HuggingFace Spaces and video rendering, it takes 15 ~ 30 seconds to get a video result.

 You may also select the target language, the language of the output speech.
 If the input text language and the target language are different, the input text will be translated to the target language using Google Translate API.
+### Updates
+(2022.06.17.) We were originally planning to support any input text. However, when checking the logs recently, we found that there were a lot of inappropriate input texts. So, we decided to filter the inputs based on toxicity using [Perspective API @Google](https://developers.perspectiveapi.com/s/). Now, if you enter a possibily toxic text, the video generation will fail. We hope you understand.
 (2022.06.05.) Due to the latency from HuggingFace Spaces and video rendering, it takes 15 ~ 30 seconds to get a video result.

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ jinja2
 googletrans==4.0.0-rc1
 PyYAML
 opencv-python
-google-cloud-translate

 googletrans==4.0.0-rc1
 PyYAML
 opencv-python
+google-cloud-translate
+google-api-python-client

toxicity_estimator/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .module import PerspectiveAPI

toxicity_estimator/module.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from googleapiclient import discovery
+import argparse
+import json
+import os
+API_KEY = os.environ['PERSPECTIVE_API_KEY']
+class PerspectiveAPI:
+    def __init__(self):
+        self.client = discovery.build(
+                        "commentanalyzer",
+                        "v1alpha1",
+                        developerKey=API_KEY,
+                        discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
+                        static_discovery=False,
+                    )
+    @staticmethod
+    def _get_request(text):
+        return {
+            'comment': {'text': text},
+            'requestedAttributes': {'TOXICITY': {}}
+        }
+    def _infer(self, text):
+        request = self._get_request(text)
+        response = self.client.comments().analyze(body=request).execute()
+        return response
+    def infer(self, text):
+        return self._infer(text)
+    def get_score(self, text, label='TOXICITY'):
+        response = self._infer(text)
+        return response['attributeScores'][label]['spanScores'][0]['score']['value']
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Perspective API Test.')
+    parser.add_argument('-i', '--input-text', type=str, required=True)
+    args = parser.parse_args()
+    return args
+if __name__ == '__main__':
+    args = parse_args()
+    perspective_api = PerspectiveAPI()
+    score = perspective_api.get_score(args.input_text)
+    print(score)

translator/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- from .v3 import ~~GoogleAuthTranslation~~


1	+ from .module import Translator

translator/module.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from .v3 import GoogleAuthTranslation
+from pathlib import Path
+import yaml
+class Translator:
+    def __init__(self, yaml_path='./lang.yaml'):
+        self.google_translation = GoogleAuthTranslation(project_id="cvpr-2022-demonstration")
+        with open(yaml_path) as f:
+            self.supporting_languages = yaml.load(f, Loader=yaml.FullLoader)
+    def _get_text_with_lang(self, text, lang):
+        lang_detected = self.google_translation.detect(text)
+        print(lang_detected, lang)
+        if lang is None:
+            lang = lang_detected
+        if lang != lang_detected:
+            target_text = self.google_translation.translate(text, lang=lang)
+        else:
+            target_text = text
+        return target_text, lang
+    def _convert_lang_from_index(self, lang):
+        try:
+            lang_finder = [name for name in self.supporting_languages
+                            if self.supporting_languages[name]['language'] == lang]
+        except Exception as e:
+            raise RuntimeError(e)
+        if len(lang_finder) == 1:
+            lang = lang_finder[0]
+        else:
+            raise AssertionError("Given language index can't be understood!"
+                                 f"Only one of ['Korean', 'English', 'Japanese', 'Chinese'] can be supported. | lang: {lang}")
+        return lang
+    def get_translation(self, text, lang, use_translation=True):
+        lang_ = self._convert_lang_from_index(lang)
+        if use_translation:
+            target_text, _ = self._get_text_with_lang(text, lang_)
+        else:
+            target_text = text
+        return target_text, lang

translator/v3.py CHANGED Viewed

@@ -36,7 +36,7 @@ class GoogleAuthTranslation:
             if self.supporting_languages[key]['google_dest'] == dest:
                 return key
-        raise RuntimeError(f"Detected langauge {dest} is not supported for TTS.")
     def translate(self, query, lang):

             if self.supporting_languages[key]['google_dest'] == dest:
                 return key
+        raise RuntimeError(f"Detected langauge is not supported in our multilingual TTS. |\n Code: {dest} | See https://cloud.google.com/translate/docs/languages")
     def translate(self, query, lang):

vacant.mp4 ADDED Viewed

File without changes