Rishabh03 deepkyu commited on
Commit
7d42762
·
0 Parent(s):

Duplicate from CVPR/ml-talking-face

Browse files

Co-authored-by: Hyoung-Kyu Song <[email protected]>

.dockerignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ output_file/*
2
+ !output_file/.gitkeep
.gitattributes ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
29
+ *.png filter=lfs diff=lfs merge=lfs -text
30
+ output_file/* filter=lfs diff=lfs merge=lfs -text
31
+ background_image/* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ flagged/
3
+ __pycache__/
4
+ .vscode/
5
+ output_file/*
6
+
7
+ !output_file/.gitkeep
8
+
9
+ *.mp4
10
+ *.png
11
+ !background_image/*
12
+ *.mkv
13
+ gradio_queue.db*
14
+ !vacant.mp4
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Talking Face Generation with Multilingual TTS
3
+ emoji: 👄
4
+ colorFrom: blue
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.0.6
8
+ app_file: app.py
9
+ pinned: false
10
+ license: cc-by-nc-sa-4.0
11
+ duplicated_from: CVPR/ml-talking-face
12
+ ---
13
+
14
+ # Configuration
15
+
16
+ `title`: _string_
17
+ Display title for the Space
18
+
19
+ `emoji`: _string_
20
+ Space emoji (emoji-only character allowed)
21
+
22
+ `colorFrom`: _string_
23
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
24
+
25
+ `colorTo`: _string_
26
+ Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
27
+
28
+ `sdk`: _string_
29
+ Can be either `gradio`, `streamlit`, or `static`
30
+
31
+ `sdk_version` : _string_
32
+ Only applicable for `streamlit` SDK.
33
+ See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
34
+
35
+ `app_file`: _string_
36
+ Path to your main application file (which contains either `gradio` or `streamlit` Python code, or `static` html code).
37
+ Path is relative to the root of the repository.
38
+
39
+ `models`: _List[string]_
40
+ HF model IDs (like "gpt2" or "deepset/roberta-base-squad2") used in the Space.
41
+ Will be parsed automatically from your code if not specified here.
42
+
43
+ `datasets`: _List[string]_
44
+ HF dataset IDs (like "common_voice" or "oscar-corpus/OSCAR-2109") used in the Space.
45
+ Will be parsed automatically from your code if not specified here.
46
+
47
+ `pinned`: _boolean_
48
+ Whether the Space stays on top of your list.
app.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://huggingface.co/deepkyu/ml-talking-face
2
+ import os
3
+ import subprocess
4
+
5
+ REST_IP = os.environ['REST_IP']
6
+ SERVICE_PORT = int(os.environ['SERVICE_PORT'])
7
+ TRANSLATION_APIKEY_URL = os.environ['TRANSLATION_APIKEY_URL']
8
+ GOOGLE_APPLICATION_CREDENTIALS = os.environ['GOOGLE_APPLICATION_CREDENTIALS']
9
+ subprocess.call(f"wget --no-check-certificate -O {GOOGLE_APPLICATION_CREDENTIALS} {TRANSLATION_APIKEY_URL}", shell=True)
10
+
11
+ TOXICITY_THRESHOLD = float(os.getenv('TOXICITY_THRESHOLD', 0.7))
12
+
13
+ import gradio as gr
14
+ from toxicity_estimator import PerspectiveAPI
15
+ from translator import Translator
16
+ from client_rest import RestAPIApplication
17
+ from pathlib import Path
18
+ import argparse
19
+ import threading
20
+ import yaml
21
+
22
+ TITLE = Path("docs/title.txt").read_text()
23
+ DESCRIPTION = Path("docs/description.md").read_text()
24
+
25
+
26
+ class GradioApplication:
27
+ def __init__(self, rest_ip, rest_port, max_seed):
28
+ self.lang_list = {
29
+ 'ko': 'ko_KR',
30
+ 'en': 'en_US',
31
+ 'ja': 'ja_JP',
32
+ 'zh': 'zh_CN',
33
+ 'zh-CN': 'zh_CN'
34
+ }
35
+ self.background_list = [None,
36
+ "background_image/cvpr.png",
37
+ "background_image/black.png",
38
+ "background_image/river.mp4",
39
+ "background_image/sky.mp4"]
40
+
41
+ self.perspective_api = PerspectiveAPI()
42
+ self.translator = Translator()
43
+ self.rest_application = RestAPIApplication(rest_ip, rest_port)
44
+ self.output_dir = Path("output_file")
45
+
46
+ inputs = prepare_input()
47
+ outputs = prepare_output()
48
+
49
+ self.iface = gr.Interface(fn=self.infer,
50
+ title=TITLE,
51
+ description=DESCRIPTION,
52
+ inputs=inputs,
53
+ outputs=outputs,
54
+ allow_flagging='never',
55
+ article=Path("docs/article.md").read_text())
56
+
57
+ self.max_seed = max_seed
58
+ self._file_seed = 0
59
+ self.lock = threading.Lock()
60
+
61
+
62
+ def _get_file_seed(self):
63
+ return f"{self._file_seed % self.max_seed:02d}"
64
+
65
+ def _reset_file_seed(self):
66
+ self._file_seed = 0
67
+
68
+ def _counter_file_seed(self):
69
+ with self.lock:
70
+ self._file_seed += 1
71
+
72
+ def get_lang_code(self, lang):
73
+ return self.lang_list[lang]
74
+
75
+ def get_background_data(self, background_index):
76
+ # get background filename and its extension
77
+ data_path = self.background_list[background_index]
78
+
79
+ if data_path is not None:
80
+ with open(data_path, 'rb') as rf:
81
+ background_data = rf.read()
82
+ is_video_background = str(data_path).endswith(".mp4")
83
+ else:
84
+ background_data = None
85
+ is_video_background = False
86
+
87
+ return background_data, is_video_background
88
+
89
+ @staticmethod
90
+ def return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=""):
91
+ return {'Toxicity': toxicity_prob}, f"Language: {lang_dest}\nText: {target_text}\n-\nDetails: {detail}", str(video_filename)
92
+
93
+ def infer(self, text, lang, duration_rate, action, background_index):
94
+ self._counter_file_seed()
95
+ print(f"File Seed: {self._file_seed}")
96
+ toxicity_prob = 0.0
97
+ target_text = ""
98
+ lang_dest = ""
99
+ video_filename = "vacant.mp4"
100
+
101
+ # Toxicity estimation
102
+ try:
103
+ toxicity_prob = self.perspective_api.get_score(text)
104
+ except Exception as e: # when Perspective API doesn't work
105
+ pass
106
+
107
+ if toxicity_prob > TOXICITY_THRESHOLD:
108
+ detail = "Sorry, it seems that the input text is too toxic."
109
+ return self.return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=f"Error: {detail}")
110
+
111
+ # Google Translate API
112
+ try:
113
+ target_text, lang_dest = self.translator.get_translation(text, lang)
114
+ except Exception as e:
115
+ target_text = ""
116
+ lang_dest = ""
117
+ detail = f"Error from language translation: ({e})"
118
+ return self.return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=f"Error: {detail}")
119
+
120
+ try:
121
+ self.translator.length_check(lang_dest, target_text) # assertion check
122
+ except AssertionError as e:
123
+ return self.return_format(toxicity_prob, target_text, lang_dest, video_filename, detail=f"Error: {str(e)}")
124
+
125
+ lang_rpc_code = self.get_lang_code(lang_dest)
126
+
127
+ # Video Inference
128
+ background_data, is_video_background = self.get_background_data(background_index)
129
+
130
+ video_data = self.rest_application.get_video(target_text, lang_rpc_code, duration_rate, action.lower(),
131
+ background_data, is_video_background)
132
+ print(f"Video data size: {len(video_data)}")
133
+
134
+ video_filename = self.output_dir / f"{self._file_seed:02d}.mkv"
135
+ with open(video_filename, "wb") as video_file:
136
+ video_file.write(video_data)
137
+
138
+ return self.return_format(toxicity_prob, target_text, lang_dest, video_filename)
139
+
140
+ def run(self, server_port=7860, share=False):
141
+ try:
142
+ self.iface.launch(height=900,
143
+ share=share, server_port=server_port,
144
+ enable_queue=True)
145
+
146
+ except KeyboardInterrupt:
147
+ gr.close_all()
148
+
149
+
150
+ def prepare_input():
151
+ text_input = gr.Textbox(lines=2,
152
+ placeholder="Type your text with English, Chinese, Korean, and Japanese.",
153
+ value="Hello, this is demonstration for talking face generation "
154
+ "with multilingual text-to-speech.",
155
+ label="Text")
156
+ lang_input = gr.Radio(['Korean', 'English', 'Japanese', 'Chinese'],
157
+ type='value',
158
+ value=None,
159
+ label="Language")
160
+ duration_rate_input = gr.Slider(minimum=0.8,
161
+ maximum=1.2,
162
+ step=0.01,
163
+ value=1.0,
164
+ label="Duration (The bigger the value, the slower the speech)")
165
+ action_input = gr.Radio(['Default', 'Hand', 'BothHand', 'HandDown', 'Sorry'],
166
+ type='value',
167
+ value='Default',
168
+ label="Select an action ...")
169
+ background_input = gr.Radio(['None', 'CVPR', 'Black', 'River', 'Sky'],
170
+ type='index',
171
+ value='None',
172
+ label="Select a background image/video ...")
173
+
174
+ return [text_input, lang_input, duration_rate_input,
175
+ action_input, background_input]
176
+
177
+
178
+ def prepare_output():
179
+ toxicity_output = gr.Label(num_top_classes=1, label="Toxicity (from Perspective API)")
180
+ translation_result_otuput = gr.Textbox(type="str", label="Translation Result")
181
+ video_output = gr.Video(format='mp4')
182
+ return [toxicity_output, translation_result_otuput, video_output]
183
+
184
+
185
+ def parse_args():
186
+ parser = argparse.ArgumentParser(
187
+ description='GRADIO DEMO for talking face generation submitted to CVPR2022')
188
+ parser.add_argument('-p', '--port', dest='gradio_port', type=int, default=7860, help="Port for gradio")
189
+ parser.add_argument('--rest_ip', type=str, default=REST_IP, help="IP for REST API")
190
+ parser.add_argument('--rest_port', type=int, default=SERVICE_PORT, help="Port for REST API")
191
+ parser.add_argument('--max_seed', type=int, default=20, help="Max seed for saving video")
192
+ parser.add_argument('--share', action='store_true', help='get publicly sharable link')
193
+ args = parser.parse_args()
194
+ return args
195
+
196
+
197
+ if __name__ == '__main__':
198
+ args = parse_args()
199
+
200
+ gradio_application = GradioApplication(args.rest_ip, args.rest_port, args.max_seed)
201
+ gradio_application.run(server_port=args.gradio_port, share=args.share)
202
+
background_image/black.png ADDED

Git LFS Details

  • SHA256: 436e4c11d009e01bfc62ec02d52f877b2ea7a717cd9c56dfd972938eac0591af
  • Pointer size: 130 Bytes
  • Size of remote file: 12.5 kB
background_image/cvpr.png ADDED
background_image/river.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8fedd95028adbabf17eb7fcc67bc37d22dc996cb45878a8a52cc95dcfb21cf3
3
+ size 4523353
background_image/sky.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f12109f770ba152228370f0386d2fdc9892ff79875d3c3296efde200822a3bbc
3
+ size 2222081
client_rest.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import base64
4
+ import argparse
5
+
6
+ VIDEO_WIDTH = 1080
7
+ VIDEO_HEIGHT = 1920
8
+ SPEAKER_ID = 0
9
+
10
+ class RestAPIApplication:
11
+ def __init__(self, ip, port):
12
+
13
+ if port < 0:
14
+ self.post_request_addr = f"http://{ip}/register/"
15
+ self.post_headers = {"Content-Type": "application/json"}
16
+ self.generate_addr = (lambda id_: f'http://{ip}/generate/{id_}')
17
+ else:
18
+ self.post_request_addr = f"http://{ip}:{port}/register/"
19
+ self.post_headers = {"Content-Type": "application/json"}
20
+ self.generate_addr = (lambda id_: f'http://{ip}:{port}/generate/{id_}')
21
+
22
+ @staticmethod
23
+ def _get_json_request(text, lang, duration_rate, action, background_data=None, is_video_background=False):
24
+ request_form = dict()
25
+
26
+ request_form['text'] = text
27
+ request_form['speaker'] = SPEAKER_ID
28
+ request_form['width'] = VIDEO_WIDTH
29
+ request_form['height'] = VIDEO_HEIGHT
30
+
31
+ request_form['action'] = action
32
+
33
+ if background_data is not None:
34
+ background_base64 = base64.b64encode(background_data).decode("UTF-8")
35
+ else:
36
+ background_base64 = ""
37
+
38
+ request_form['background'] = background_base64
39
+ request_form['durationRate'] = duration_rate
40
+ request_form['isVideoBackground'] = is_video_background
41
+ request_form['lang'] = lang
42
+
43
+ request_as_json = json.dumps(request_form)
44
+ return request_as_json
45
+
46
+ @staticmethod
47
+ def _get_video_id(results):
48
+ return json.loads(bytes.decode(results.content))['id']
49
+
50
+ def get_video(self, text, lang, duration_rate, action, background_data=None, is_video_background=False):
51
+ request_json = self._get_json_request(text, lang, duration_rate, action, background_data, is_video_background)
52
+
53
+ # POST request with jsonified request
54
+ results = requests.post(self.post_request_addr, headers=self.post_headers, data=request_json)
55
+
56
+ # GET video with the given id
57
+ video_id = self._get_video_id(results)
58
+ video_results = requests.get(self.generate_addr(video_id))
59
+
60
+ return video_results.content
61
+
62
+
63
+ def parse_args():
64
+ parser = argparse.ArgumentParser(
65
+ description='REST API interface for talking face generation submitted to CVPR2022')
66
+ parser.add_argument('-i', '--ip', dest='rest_ip', type=str, default="127.0.0.1", help="IP for REST API")
67
+ parser.add_argument('-p', '--port', dest='rest_port', type=int, default=8080, help="Port for REST API")
68
+ args = parser.parse_args()
69
+ return args
70
+
71
+
72
+ if __name__ == '__main__':
73
+ args = parse_args()
74
+ rest_api_application = RestAPIApplication(args.rest_ip, args.rest_port)
docs/article.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ## Why learn a new language, when your model can learn it for you?
3
+
4
+ <div style="max-width: 720px;max-height: 405px;margin: auto;">
5
+ <div style="float: none;clear: both;position: relative;padding-bottom: 56.25%;height: 0;width: 100%">
6
+ <iframe width="720" height="405" src="https://www.youtube.com/embed/toqdD1F_ZsU" title="YouTube video player" style="position: absolute;top: 0;left: 0;width: 100%;height: 100%;" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen>
7
+ </iframe>
8
+ </div>
9
+ </div>
10
+
11
+ ### Abstract
12
+
13
+ Recent studies in talking face generation have focused on building a train-once-use-everywhere model i.e. a model that will generalize from any source speech to any target identity. A number of works have already claimed this functionality and have added that their models will also generalize to any language. However, we show, using languages from different language families, that these models do not translate well when the training language and the testing language are sufficiently different. We reduce the scope of the problem to building a language-robust talking face generation system on seen identities i.e. the target identity is the same as the training identity. In this work, we introduce a talking face generation system that will generalize to different languages. We evaluate the efficacy of our system using a multilingual text-to-speech system. We also discuss the usage of joint text-to-speech system and the talking face generation system as a neural dubber system.
14
+
15
+ [CVPR Open Access](https://openaccess.thecvf.com/content/CVPR2022/html/Song_Talking_Face_Generation_With_Multilingual_TTS_CVPR_2022_paper.html) [arXiv](https://arxiv.org/abs/2205.06421)
16
+
17
+ ### News
18
+
19
+ (2022.08.18.) We got the CVPR Hugging Face prize! Thank you all and special thanks to AK([@akhaliq](https://huggingface.co/akhaliq)).
20
+
21
+ <center>
22
+ <img alt="we-got-huggingface-prize" src="https://github.com/deepkyu/ml-talking-face/blob/main/docs/we-got-huggingface-prize.jpeg?raw=true" width="50%" />
23
+ </center>
docs/description.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This system generates a talking face video based on the input text.
2
+ You can provide the input text in one of the four languages: Chinese (Mandarin), English, Japanese, and Korean.
3
+ You may also select the target language, the language of the output speech.
4
+ If the input text language and the target language are different, the input text will be translated to the target language using Google Translate API.
5
+
6
+ ### Updates
7
+
8
+ (2022.09.29.) **NOTE!** The core part of the demonstration has been working on the AWS instance of MINDsLab, and I found that it can't connect to the instance now. I want to fix this issue, but I'm sorry to say that I left the company last week. I've contacted the company, but it takes some time to restore the session. If you're in a hurry, please send the e-mail directly to MINDsLab ([email protected]).
9
+ Whatever the reason, I'm sorry again. Hope you understand.
10
+
11
+ (2022.06.17.) Thank you for visiting our demo!😊 This demo attracted a lot more attention than we anticipated. This, unfortunately, means that the computational burden is heavier than this demo was designed for. So, to maximize everyone's experience, we capped the length of the translated texts at:
12
+
13
+ - 200 characters for English
14
+ - 100 characters for Chinese, Japaense, and Korean.
15
+
16
+ (2022.06.17.) We were originally planning to support any input text. However, when checking the logs recently, we found that there were a lot of inappropriate input texts. So, we decided to filter the inputs based on toxicity using [Perspective API @Google](https://developers.perspectiveapi.com/s/). Now, if you enter a possibily toxic text, the video generation will fail. We hope you understand.
17
+
18
+ (2022.06.05.) Due to the latency from HuggingFace Spaces and video rendering, it takes 15 ~ 30 seconds to get a video result.
docs/title.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Talking Face Generation with Multilingual TTS (CVPR 2022 Demo Track)
docs/we-got-huggingface-prize.jpeg ADDED
lang.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ko:
2
+ index: 1
3
+ language: "Korean"
4
+ locale: "ko_KR"
5
+ google_dest: "ko"
6
+ en:
7
+ index: 2
8
+ language: "English"
9
+ locale: "en_US"
10
+ google_dest: "en"
11
+ ja:
12
+ index: 3
13
+ language: "Japanese"
14
+ locale: "ja_JP"
15
+ google_dest: "ja"
16
+ zh:
17
+ index: 4
18
+ language: "Chinese"
19
+ locale: "zh_CN"
20
+ google_dest: "zh-CN"
output_file/.gitkeep ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ jinja2
3
+ googletrans==4.0.0-rc1
4
+ PyYAML
5
+ opencv-python
6
+ google-cloud-translate
7
+ google-api-python-client
sample_text.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ko:
2
+ - "안녕하세요? 한국어로 말하고 있습니다."
3
+ en:
4
+ - "Hello. Now I'm speaking in English."
5
+ zh:
6
+ - "你好? 我在说普通话。"
7
+ ja:
8
+ - "こんにちは。 今、日本語で話しています。"
toxicity_estimator/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .module import PerspectiveAPI
toxicity_estimator/module.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from googleapiclient import discovery
2
+ import argparse
3
+ import json
4
+ import os
5
+
6
+ API_KEY = os.environ['PERSPECTIVE_API_KEY']
7
+
8
+ class PerspectiveAPI:
9
+ def __init__(self):
10
+ self.client = discovery.build(
11
+ "commentanalyzer",
12
+ "v1alpha1",
13
+ developerKey=API_KEY,
14
+ discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
15
+ static_discovery=False,
16
+ )
17
+ @staticmethod
18
+ def _get_request(text):
19
+ return {
20
+ 'comment': {'text': text},
21
+ 'requestedAttributes': {'TOXICITY': {}}
22
+ }
23
+
24
+ def _infer(self, text):
25
+ request = self._get_request(text)
26
+ response = self.client.comments().analyze(body=request).execute()
27
+ return response
28
+
29
+ def infer(self, text):
30
+ return self._infer(text)
31
+
32
+ def get_score(self, text, label='TOXICITY'):
33
+ response = self._infer(text)
34
+ return response['attributeScores'][label]['spanScores'][0]['score']['value']
35
+
36
+
37
+ def parse_args():
38
+ parser = argparse.ArgumentParser(
39
+ description='Perspective API Test.')
40
+ parser.add_argument('-i', '--input-text', type=str, required=True)
41
+ args = parser.parse_args()
42
+ return args
43
+
44
+
45
+ if __name__ == '__main__':
46
+ args = parse_args()
47
+
48
+ perspective_api = PerspectiveAPI()
49
+ score = perspective_api.get_score(args.input_text)
50
+
51
+ print(score)
translator/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .module import Translator
translator/module.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .v3 import GoogleAuthTranslation
2
+ from pathlib import Path
3
+ import yaml
4
+ import os
5
+
6
+ MAX_ENG_TEXT_LENGTH = int(os.getenv('MAX_ENG_TEXT_LENGTH', 200))
7
+ MAX_CJK_TEXT_LENGTH = int(os.getenv('MAX_CJK_TEXT_LENGTH', 100))
8
+
9
+ class Translator:
10
+ def __init__(self, yaml_path='./lang.yaml'):
11
+ self.google_translation = GoogleAuthTranslation(project_id="cvpr-2022-demonstration")
12
+ with open(yaml_path) as f:
13
+ self.supporting_languages = yaml.load(f, Loader=yaml.FullLoader)
14
+
15
+ @staticmethod
16
+ def length_check(lang, text):
17
+ if lang in ['en']:
18
+ if len(text) > MAX_ENG_TEXT_LENGTH:
19
+ raise AssertionError(f"Input text is too long. For English, the text length should be less than {MAX_ENG_TEXT_LENGTH}. | Length: {len(text)}")
20
+ elif lang in ['ko', 'ja', 'zh-CN', 'zh']:
21
+ if len(text) > MAX_CJK_TEXT_LENGTH:
22
+ raise AssertionError(f"Input text is too long. For CJK, the text length should be less than {MAX_CJK_TEXT_LENGTH}. | Length: {len(text)}")
23
+ else:
24
+ raise AssertionError(f"Not in ['ko', 'ja', 'zh-CN', 'zh', 'en'] ! | Language: {lang}")
25
+
26
+ return
27
+
28
+ def _get_text_with_lang(self, text, lang):
29
+ lang_detected = self.google_translation.detect(text)
30
+ print(f"Detected as: {lang_detected} | Destination: {lang}")
31
+
32
+ if lang is None:
33
+ lang = lang_detected
34
+
35
+ if lang != lang_detected:
36
+ target_text = self.google_translation.translate(text, lang=lang)
37
+ else:
38
+ target_text = text
39
+
40
+ return target_text, lang
41
+
42
+ def _convert_lang_from_index(self, lang):
43
+ try:
44
+ lang = [name for name in self.supporting_languages
45
+ if self.supporting_languages[name]['language'] == lang][0]
46
+ except Exception as e:
47
+ raise RuntimeError(e)
48
+
49
+ return lang
50
+
51
+ def get_translation(self, text, lang, use_translation=True):
52
+ lang_ = self._convert_lang_from_index(lang)
53
+
54
+ if use_translation:
55
+ target_text, _ = self._get_text_with_lang(text, lang_)
56
+ else:
57
+ target_text = text
58
+
59
+ return target_text, lang_
translator/v3.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.cloud import translate
2
+ import yaml
3
+
4
+
5
+ class GoogleAuthTranslation:
6
+ def __init__(self, project_id, yaml_path='lang.yaml'):
7
+ self.translator = translate.TranslationServiceClient()
8
+ self.location = "global"
9
+ self.parent = f"projects/{project_id}/locations/{self.location}"
10
+
11
+ with open(yaml_path) as f:
12
+ self.supporting_languages = yaml.load(f, Loader=yaml.FullLoader)
13
+
14
+ def _detect(self, query):
15
+ response = self.translator.detect_language(
16
+ request={
17
+ "parent": self.parent,
18
+ "content": query,
19
+ "mime_type": "text/plain", # mime types: text/plain, text/html
20
+ }
21
+ )
22
+
23
+ for language in response.languages:
24
+ # First language is the most confident one
25
+ return language.language_code
26
+
27
+ def _get_dest_from_lang(self, lang):
28
+ try:
29
+ return self.supporting_languages[lang]['google_dest']
30
+
31
+ except KeyError as e:
32
+ raise e
33
+
34
+ def _get_lang_from_dest(self, dest):
35
+ for key in self.supporting_languages:
36
+ if self.supporting_languages[key]['google_dest'] == dest:
37
+ return key
38
+
39
+ raise RuntimeError(f"Detected langauge is not supported in our multilingual TTS. |\n Code: {dest} | See https://cloud.google.com/translate/docs/languages")
40
+
41
+ def translate(self, query, lang):
42
+
43
+ dest = self._get_dest_from_lang(lang)
44
+
45
+ response = self.translator.translate_text(
46
+ request={
47
+ "parent": self.parent,
48
+ "contents": [query],
49
+ "mime_type": "text/plain", # mime types: text/plain, text/html
50
+ "target_language_code": dest,
51
+ }
52
+ )
53
+
54
+ return " ".join([translation.translated_text for translation in response.translations])
55
+
56
+ def detect(self, query):
57
+ dest = self._detect(query)
58
+ return self._get_lang_from_dest(dest)
vacant.mp4 ADDED
File without changes