Spaces:
Sleeping
Sleeping
wony617
commited on
Commit
โข
2f541f4
1
Parent(s):
c9615d3
feat: add file path list in the dropdown
Browse files- app.py +36 -9
- content.py +33 -0
- prompt.py +128 -0
app.py
CHANGED
@@ -8,6 +8,10 @@ import os
|
|
8 |
import openai
|
9 |
import gradio as gr
|
10 |
|
|
|
|
|
|
|
|
|
11 |
def get_content(filepath: str) -> str:
|
12 |
url = string.Template(
|
13 |
"https://raw.githubusercontent.com/huggingface/"
|
@@ -163,22 +167,45 @@ with demo:
|
|
163 |
"๋์ฐฉ์ด", "lรญngua alvo"
|
164 |
])
|
165 |
)
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
with gr.Tabs():
|
175 |
with gr.TabItem("Web UI"):
|
176 |
prompt_button = gr.Button("Show Full Prompt", variant="primary")
|
177 |
# TODO: add with_prompt_checkbox so people can freely use other services such as DeepL or Papago.
|
178 |
gr.Markdown("1. Copy with the button right-hand side and paste into [chat.openai.com](https://chat.openai.com).")
|
179 |
prompt_output = gr.Textbox(label="Full Prompt", lines=3, show_copy_button=True)
|
|
|
|
|
180 |
# TODO: add check for segments, indicating whether user should add or remove new lines from their input. (gr.Row)
|
181 |
-
gr.Markdown("
|
182 |
ui_translated_input = gr.Textbox(label="Cleaned ChatGPT initial translation")
|
183 |
fill_button = gr.Button("Fill in scaffold", variant="primary")
|
184 |
with gr.TabItem("API (Not Implemented)"):
|
|
|
8 |
import openai
|
9 |
import gradio as gr
|
10 |
|
11 |
+
from content import get_content_list, retrieve_content_list
|
12 |
+
from prompt import get_prompt_with_glossary
|
13 |
+
|
14 |
+
|
15 |
def get_content(filepath: str) -> str:
|
16 |
url = string.Template(
|
17 |
"https://raw.githubusercontent.com/huggingface/"
|
|
|
167 |
"๋์ฐฉ์ด", "lรญngua alvo"
|
168 |
])
|
169 |
)
|
170 |
+
with gr.Column():
|
171 |
+
content = get_content_list()
|
172 |
+
content_list = retrieve_content_list(content)
|
173 |
+
|
174 |
+
def update_dropdown_b(choice_a):
|
175 |
+
return gr.Dropdown(
|
176 |
+
choices=content_list[choice_a],
|
177 |
+
label="File path of document",
|
178 |
+
interactive=True,
|
179 |
+
allow_custom_value=True,
|
180 |
+
)
|
181 |
+
|
182 |
+
category_list = list(content_list.keys())
|
183 |
+
category_input = gr.Dropdown(
|
184 |
+
choices=category_list,
|
185 |
+
value=category_list[0],
|
186 |
+
label="Category of document",
|
187 |
+
interactive=True,
|
188 |
+
allow_custom_value=True,
|
189 |
+
)
|
190 |
+
|
191 |
+
filtpath_list = content_list.get(category_input)
|
192 |
+
filepath_input = gr.Dropdown(
|
193 |
+
label="File path of document",
|
194 |
+
interactive=True
|
195 |
+
)
|
196 |
+
|
197 |
+
category_input.change(fn=update_dropdown_b, inputs=category_input, outputs=filepath_input)
|
198 |
+
|
199 |
with gr.Tabs():
|
200 |
with gr.TabItem("Web UI"):
|
201 |
prompt_button = gr.Button("Show Full Prompt", variant="primary")
|
202 |
# TODO: add with_prompt_checkbox so people can freely use other services such as DeepL or Papago.
|
203 |
gr.Markdown("1. Copy with the button right-hand side and paste into [chat.openai.com](https://chat.openai.com).")
|
204 |
prompt_output = gr.Textbox(label="Full Prompt", lines=3, show_copy_button=True)
|
205 |
+
gr.Markdown("2. After getting the initial translation, revise your translation using following prompt.")
|
206 |
+
prompt_with_glossary_output = gr.Textbox(label="Prompt with glossary", lines=3, show_copy_button=True, value=get_prompt_with_glossary())
|
207 |
# TODO: add check for segments, indicating whether user should add or remove new lines from their input. (gr.Row)
|
208 |
+
gr.Markdown("3. After getting the complete translation, remove randomly inserted newlines on your favorite text editor and paste the result below.")
|
209 |
ui_translated_input = gr.Textbox(label="Cleaned ChatGPT initial translation")
|
210 |
fill_button = gr.Button("Fill in scaffold", variant="primary")
|
211 |
with gr.TabItem("API (Not Implemented)"):
|
content.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import string
|
3 |
+
import re
|
4 |
+
|
5 |
+
|
6 |
+
def get_content_list() -> str:
|
7 |
+
url = string.Template(
|
8 |
+
"https://raw.githubusercontent.com/huggingface/"
|
9 |
+
"transformers/main/docs/source/en/_toctree.yml"
|
10 |
+
).safe_substitute()
|
11 |
+
response = requests.get(url)
|
12 |
+
if response.status_code == 200:
|
13 |
+
content = response.text
|
14 |
+
return content
|
15 |
+
else:
|
16 |
+
raise ValueError("Failed to retrieve content list from the URL.", url)
|
17 |
+
|
18 |
+
def retrieve_content_list(content: str) -> dict[any, list[str]]:
|
19 |
+
file_paths = re.findall(r'local:\s*(\S+)', content)
|
20 |
+
categories = {None: []}
|
21 |
+
|
22 |
+
for filepath in file_paths:
|
23 |
+
filepath += ".md"
|
24 |
+
if '/' in filepath:
|
25 |
+
prefix = filepath.split('/')[0]
|
26 |
+
if prefix in categories:
|
27 |
+
categories[prefix].append(filepath)
|
28 |
+
else:
|
29 |
+
categories[prefix] = [filepath]
|
30 |
+
else:
|
31 |
+
categories[None].append(filepath)
|
32 |
+
return categories
|
33 |
+
|
prompt.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import string
|
2 |
+
|
3 |
+
|
4 |
+
PROMPT_WITH_GLOSSARY = """
|
5 |
+
You have a glossary of terms with their Korean translations. When translating a sentence, you need to check if any of the words in the sentence are in the glossary, and if so, translate them according to the provided Korean terms. Here is the glossary:
|
6 |
+
|
7 |
+
- revision: ๊ฐ์
|
8 |
+
- method: ๋ฉ์๋
|
9 |
+
- secrets: ๋น๋ฐ๊ฐ
|
10 |
+
- search helper: ๊ฒ์ ํฌํผ
|
11 |
+
- logging level: ๋ก๊ทธ ๋ ๋ฒจ
|
12 |
+
- workflow: ์ํฌํ๋ก์ฐ
|
13 |
+
- corner case: ์ฝ๋ ์ผ์ด์ค
|
14 |
+
- tokenization: ํ ํฐํ
|
15 |
+
- architecture: ์ํคํ
์ฒ
|
16 |
+
- attention mask: ์ดํ
์
๋ง์คํฌ
|
17 |
+
- backbone: ๋ฐฑ๋ณธ
|
18 |
+
- argmax: argmax
|
19 |
+
- beam search: ๋น ์์น
|
20 |
+
- clustering: ๊ตฐ์งํ
|
21 |
+
- configuration: ๊ตฌ์ฑ
|
22 |
+
- context: ๋ฌธ๋งฅ
|
23 |
+
- cross entropy: ๊ต์ฐจ ์ํธ๋กํผ
|
24 |
+
- cross-attention: ํฌ๋ก์ค ์ดํ
์
|
25 |
+
- dictionary: ๋์
๋๋ฆฌ
|
26 |
+
- entry: ์ํธ๋ฆฌ
|
27 |
+
- few shot: ํจ์ท
|
28 |
+
- flatten: flatten
|
29 |
+
- ground truth: ์ ๋ต
|
30 |
+
- head: ํค๋
|
31 |
+
- helper function: ํฌํผ ํจ์
|
32 |
+
- image captioning: ์ด๋ฏธ์ง ์บก์
๋
|
33 |
+
- image patch: ์ด๋ฏธ์ง ํจ์น
|
34 |
+
- inference: ์ถ๋ก
|
35 |
+
- instance: ์ธ์คํด์ค
|
36 |
+
- Instantiate: ์ธ์คํด์คํ
|
37 |
+
- knowledge distillation: ์ง์ ์ฆ๋ฅ
|
38 |
+
- labels: ๋ ์ด๋ธ
|
39 |
+
- large language models (LLM): ๋๊ท๋ชจ ์ธ์ด ๋ชจ๋ธ
|
40 |
+
- layer: ๋ ์ด์ด
|
41 |
+
- learning rate scheduler: Learning Rate Scheduler
|
42 |
+
- localization: ๋ก์ปฌ๋ฆฌ์ ์ด์
|
43 |
+
- log mel-filter bank: ๋ก๊ทธ ๋ฉ ํํฐ ๋ฑ
ํฌ
|
44 |
+
- look-up table: ๋ฃฉ์
ํ
์ด๋ธ
|
45 |
+
- loss function: ์์ค ํจ์
|
46 |
+
- machine learning: ๋จธ์ ๋ฌ๋
|
47 |
+
- mapping: ๋งคํ
|
48 |
+
- masked language modeling (MLM): ๋ง์คํฌ๋ ์ธ์ด ๋ชจ๋ธ
|
49 |
+
- malware: ์
์ฑ์ฝ๋
|
50 |
+
- metric: ์งํ
|
51 |
+
- mixed precision: ํผํฉ ์ ๋ฐ๋
|
52 |
+
- modality: ๋ชจ๋ฌ๋ฆฌํฐ
|
53 |
+
- monolingual model: ๋จ์ผ ์ธ์ด ๋ชจ๋ธ
|
54 |
+
- multi gpu: ๋ค์ค GPU
|
55 |
+
- multilingual model: ๋ค๊ตญ์ด ๋ชจ๋ธ
|
56 |
+
- parsing: ํ์ฑ
|
57 |
+
- perplexity (PPL): ํํ๋ ์ํฐ(Perplexity)
|
58 |
+
- pipeline: ํ์ดํ๋ผ์ธ
|
59 |
+
- pixel values: ํฝ์
๊ฐ
|
60 |
+
- pooling: ํ๋ง
|
61 |
+
- position IDs: ์์น ID
|
62 |
+
- preprocessing: ์ ์ฒ๋ฆฌ
|
63 |
+
- prompt: ํ๋กฌํํธ
|
64 |
+
- pythonic: ํ์ด์จ๋
|
65 |
+
- query: ์ฟผ๋ฆฌ
|
66 |
+
- question answering: ์ง์ ์๋ต
|
67 |
+
- raw audio waveform: ์์ ์ค๋์ค ํํ
|
68 |
+
- recurrent neural network (RNN): ์ํ ์ ๊ฒฝ๋ง
|
69 |
+
- accelerator: ๊ฐ์๊ธฐ
|
70 |
+
- Accelerate: Accelerate
|
71 |
+
- architecture: ์ํคํ
์ฒ
|
72 |
+
- arguments: ์ธ์
|
73 |
+
- attention mask: ์ดํ
์
๋ง์คํฌ
|
74 |
+
- augmentation: ์ฆ๊ฐ
|
75 |
+
- autoencoding models: ์คํ ์ธ์ฝ๋ฉ ๋ชจ๋ธ
|
76 |
+
- autoregressive models: ์๊ธฐํ๊ท ๋ชจ๋ธ
|
77 |
+
- backward: ์ญ๋ฐฉํฅ
|
78 |
+
- bounding box: ๋ฐ์ด๋ฉ ๋ฐ์ค
|
79 |
+
- causal language modeling: ์ธ๊ณผ์ ์ธ์ด ๋ชจ๋ธ๋ง(causal language modeling)
|
80 |
+
- channel: ์ฑ๋
|
81 |
+
- checkpoint: ์ฒดํฌํฌ์ธํธ(checkpoint)
|
82 |
+
- chunk: ๋ฌถ์
|
83 |
+
- computer vision: ์ปดํจํฐ ๋น์
|
84 |
+
- convolution: ํฉ์ฑ๊ณฑ
|
85 |
+
- crop: ์๋ฅด๊ธฐ
|
86 |
+
- custom: ์ฌ์ฉ์ ์ ์
|
87 |
+
- customize: ๋ง์ถค ์ค์ ํ๋ค
|
88 |
+
- data collator: ๋ฐ์ดํฐ ์ฝ๋ ์ดํฐ
|
89 |
+
- dataset: ๋ฐ์ดํฐ ์ธํธ
|
90 |
+
- decoder input IDs: ๋์ฝ๋ ์
๋ ฅ ID
|
91 |
+
- decoder models: ๋์ฝ๋ ๋ชจ๋ธ
|
92 |
+
- deep learning (DL): ๋ฅ๋ฌ๋
|
93 |
+
- directory: ๋๋ ํฐ๋ฆฌ
|
94 |
+
- distributed training: ๋ถ์ฐ ํ์ต
|
95 |
+
- downstream: ๋ค์ด์คํธ๋ฆผ
|
96 |
+
- encoder models: ์ธ์ฝ๋ ๋ชจ๋ธ
|
97 |
+
- entity: ๊ฐ์ฒด
|
98 |
+
- epoch: ์ํญ
|
99 |
+
- evaluation method: ํ๊ฐ ๋ฐฉ๋ฒ
|
100 |
+
- feature extraction: ํน์ฑ ์ถ์ถ
|
101 |
+
- feature matrix: ํน์ฑ ํ๋ ฌ(feature matrix)
|
102 |
+
- fine-tunning: ๋ฏธ์ธ ์กฐ์
|
103 |
+
- finetuned models: ๋ฏธ์ธ ์กฐ์ ๋ชจ๋ธ
|
104 |
+
- hidden state: ์๋ ์ํ
|
105 |
+
- hyperparameter: ํ์ดํผํ๋ผ๋ฏธํฐ
|
106 |
+
- learning: ํ์ต
|
107 |
+
- load: ๊ฐ์ ธ์ค๋ค
|
108 |
+
- method: ๋ฉ์๋
|
109 |
+
- optimizer: ์ตํฐ๋ง์ด์
|
110 |
+
- pad (padding): ํจ๋ (ํจ๋ฉ)
|
111 |
+
- parameter: ๋งค๊ฐ๋ณ์
|
112 |
+
- pretrained model: ์ฌ์ ํ๋ จ๋ ๋ชจ๋ธ
|
113 |
+
- separator (* [SEP]๋ฅผ ๋ถ๋ฅด๋ ์ด๋ฆ): ๋ถํ ํ ํฐ
|
114 |
+
- sequence: ์ํ์ค
|
115 |
+
- silent error: ์กฐ์ฉํ ์ค๋ฅ
|
116 |
+
- token: ํ ํฐ
|
117 |
+
- tokenizer: ํ ํฌ๋์ด์
|
118 |
+
- training: ํ๋ จ
|
119 |
+
- workflow: ์ํฌํ๋ก์ฐ
|
120 |
+
|
121 |
+
Please revise the translated sentences accordingly using the terms provided in this glossary.
|
122 |
+
"""
|
123 |
+
|
124 |
+
def get_prompt_with_glossary() -> str:
|
125 |
+
prompt = string.Template(
|
126 |
+
PROMPT_WITH_GLOSSARY
|
127 |
+
).safe_substitute()
|
128 |
+
return prompt
|