wony617 commited on
Commit
2f541f4
โ€ข
1 Parent(s): c9615d3

feat: add file path list in the dropdown

Browse files
Files changed (3) hide show
  1. app.py +36 -9
  2. content.py +33 -0
  3. prompt.py +128 -0
app.py CHANGED
@@ -8,6 +8,10 @@ import os
8
  import openai
9
  import gradio as gr
10
 
 
 
 
 
11
  def get_content(filepath: str) -> str:
12
  url = string.Template(
13
  "https://raw.githubusercontent.com/huggingface/"
@@ -163,22 +167,45 @@ with demo:
163
  "๋„์ฐฉ์–ด", "lรญngua alvo"
164
  ])
165
  )
166
- filtpath_list = ["tasks/masked_language_modeling.md", "main_classes/callback.md"]
167
- filepath_input = gr.Dropdown(
168
- choices=filtpath_list,
169
- value="tasks/masked_language_modeling.md",
170
- label="File path of transformers document",
171
- interactive=True,
172
- allow_custom_value=True,
173
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  with gr.Tabs():
175
  with gr.TabItem("Web UI"):
176
  prompt_button = gr.Button("Show Full Prompt", variant="primary")
177
  # TODO: add with_prompt_checkbox so people can freely use other services such as DeepL or Papago.
178
  gr.Markdown("1. Copy with the button right-hand side and paste into [chat.openai.com](https://chat.openai.com).")
179
  prompt_output = gr.Textbox(label="Full Prompt", lines=3, show_copy_button=True)
 
 
180
  # TODO: add check for segments, indicating whether user should add or remove new lines from their input. (gr.Row)
181
- gr.Markdown("2. After getting the complete translation, remove randomly inserted newlines on your favorite text editor and paste the result below.")
182
  ui_translated_input = gr.Textbox(label="Cleaned ChatGPT initial translation")
183
  fill_button = gr.Button("Fill in scaffold", variant="primary")
184
  with gr.TabItem("API (Not Implemented)"):
 
8
  import openai
9
  import gradio as gr
10
 
11
+ from content import get_content_list, retrieve_content_list
12
+ from prompt import get_prompt_with_glossary
13
+
14
+
15
  def get_content(filepath: str) -> str:
16
  url = string.Template(
17
  "https://raw.githubusercontent.com/huggingface/"
 
167
  "๋„์ฐฉ์–ด", "lรญngua alvo"
168
  ])
169
  )
170
+ with gr.Column():
171
+ content = get_content_list()
172
+ content_list = retrieve_content_list(content)
173
+
174
+ def update_dropdown_b(choice_a):
175
+ return gr.Dropdown(
176
+ choices=content_list[choice_a],
177
+ label="File path of document",
178
+ interactive=True,
179
+ allow_custom_value=True,
180
+ )
181
+
182
+ category_list = list(content_list.keys())
183
+ category_input = gr.Dropdown(
184
+ choices=category_list,
185
+ value=category_list[0],
186
+ label="Category of document",
187
+ interactive=True,
188
+ allow_custom_value=True,
189
+ )
190
+
191
+ filtpath_list = content_list.get(category_input)
192
+ filepath_input = gr.Dropdown(
193
+ label="File path of document",
194
+ interactive=True
195
+ )
196
+
197
+ category_input.change(fn=update_dropdown_b, inputs=category_input, outputs=filepath_input)
198
+
199
  with gr.Tabs():
200
  with gr.TabItem("Web UI"):
201
  prompt_button = gr.Button("Show Full Prompt", variant="primary")
202
  # TODO: add with_prompt_checkbox so people can freely use other services such as DeepL or Papago.
203
  gr.Markdown("1. Copy with the button right-hand side and paste into [chat.openai.com](https://chat.openai.com).")
204
  prompt_output = gr.Textbox(label="Full Prompt", lines=3, show_copy_button=True)
205
+ gr.Markdown("2. After getting the initial translation, revise your translation using following prompt.")
206
+ prompt_with_glossary_output = gr.Textbox(label="Prompt with glossary", lines=3, show_copy_button=True, value=get_prompt_with_glossary())
207
  # TODO: add check for segments, indicating whether user should add or remove new lines from their input. (gr.Row)
208
+ gr.Markdown("3. After getting the complete translation, remove randomly inserted newlines on your favorite text editor and paste the result below.")
209
  ui_translated_input = gr.Textbox(label="Cleaned ChatGPT initial translation")
210
  fill_button = gr.Button("Fill in scaffold", variant="primary")
211
  with gr.TabItem("API (Not Implemented)"):
content.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import string
3
+ import re
4
+
5
+
6
+ def get_content_list() -> str:
7
+ url = string.Template(
8
+ "https://raw.githubusercontent.com/huggingface/"
9
+ "transformers/main/docs/source/en/_toctree.yml"
10
+ ).safe_substitute()
11
+ response = requests.get(url)
12
+ if response.status_code == 200:
13
+ content = response.text
14
+ return content
15
+ else:
16
+ raise ValueError("Failed to retrieve content list from the URL.", url)
17
+
18
+ def retrieve_content_list(content: str) -> dict[any, list[str]]:
19
+ file_paths = re.findall(r'local:\s*(\S+)', content)
20
+ categories = {None: []}
21
+
22
+ for filepath in file_paths:
23
+ filepath += ".md"
24
+ if '/' in filepath:
25
+ prefix = filepath.split('/')[0]
26
+ if prefix in categories:
27
+ categories[prefix].append(filepath)
28
+ else:
29
+ categories[prefix] = [filepath]
30
+ else:
31
+ categories[None].append(filepath)
32
+ return categories
33
+
prompt.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import string
2
+
3
+
4
+ PROMPT_WITH_GLOSSARY = """
5
+ You have a glossary of terms with their Korean translations. When translating a sentence, you need to check if any of the words in the sentence are in the glossary, and if so, translate them according to the provided Korean terms. Here is the glossary:
6
+
7
+ - revision: ๊ฐœ์ •
8
+ - method: ๋ฉ”์†Œ๋“œ
9
+ - secrets: ๋น„๋ฐ€๊ฐ’
10
+ - search helper: ๊ฒ€์ƒ‰ ํ—ฌํผ
11
+ - logging level: ๋กœ๊ทธ ๋ ˆ๋ฒจ
12
+ - workflow: ์›Œํฌํ”Œ๋กœ์šฐ
13
+ - corner case: ์ฝ”๋„ˆ ์ผ€์ด์Šค
14
+ - tokenization: ํ† ํฐํ™”
15
+ - architecture: ์•„ํ‚คํ…์ฒ˜
16
+ - attention mask: ์–ดํ…์…˜ ๋งˆ์Šคํฌ
17
+ - backbone: ๋ฐฑ๋ณธ
18
+ - argmax: argmax
19
+ - beam search: ๋น” ์„œ์น˜
20
+ - clustering: ๊ตฐ์ง‘ํ™”
21
+ - configuration: ๊ตฌ์„ฑ
22
+ - context: ๋ฌธ๋งฅ
23
+ - cross entropy: ๊ต์ฐจ ์—”ํŠธ๋กœํ”ผ
24
+ - cross-attention: ํฌ๋กœ์Šค ์–ดํ…์…˜
25
+ - dictionary: ๋”•์…”๋„ˆ๋ฆฌ
26
+ - entry: ์—”ํŠธ๋ฆฌ
27
+ - few shot: ํ“จ์ƒท
28
+ - flatten: flatten
29
+ - ground truth: ์ •๋‹ต
30
+ - head: ํ—ค๋“œ
31
+ - helper function: ํ—ฌํผ ํ•จ์ˆ˜
32
+ - image captioning: ์ด๋ฏธ์ง€ ์บก์…”๋‹
33
+ - image patch: ์ด๋ฏธ์ง€ ํŒจ์น˜
34
+ - inference: ์ถ”๋ก 
35
+ - instance: ์ธ์Šคํ„ด์Šค
36
+ - Instantiate: ์ธ์Šคํ„ด์Šคํ™”
37
+ - knowledge distillation: ์ง€์‹ ์ฆ๋ฅ˜
38
+ - labels: ๋ ˆ์ด๋ธ”
39
+ - large language models (LLM): ๋Œ€๊ทœ๋ชจ ์–ธ์–ด ๋ชจ๋ธ
40
+ - layer: ๋ ˆ์ด์–ด
41
+ - learning rate scheduler: Learning Rate Scheduler
42
+ - localization: ๋กœ์ปฌ๋ฆฌ์ œ์ด์…˜
43
+ - log mel-filter bank: ๋กœ๊ทธ ๋ฉœ ํ•„ํ„ฐ ๋ฑ…ํฌ
44
+ - look-up table: ๋ฃฉ์—… ํ…Œ์ด๋ธ”
45
+ - loss function: ์†์‹ค ํ•จ์ˆ˜
46
+ - machine learning: ๋จธ์‹  ๋Ÿฌ๋‹
47
+ - mapping: ๋งคํ•‘
48
+ - masked language modeling (MLM): ๋งˆ์Šคํฌ๋“œ ์–ธ์–ด ๋ชจ๋ธ
49
+ - malware: ์•…์„ฑ์ฝ”๋“œ
50
+ - metric: ์ง€ํ‘œ
51
+ - mixed precision: ํ˜ผํ•ฉ ์ •๋ฐ€๋„
52
+ - modality: ๋ชจ๋‹ฌ๋ฆฌํ‹ฐ
53
+ - monolingual model: ๋‹จ์ผ ์–ธ์–ด ๋ชจ๋ธ
54
+ - multi gpu: ๋‹ค์ค‘ GPU
55
+ - multilingual model: ๋‹ค๊ตญ์–ด ๋ชจ๋ธ
56
+ - parsing: ํŒŒ์‹ฑ
57
+ - perplexity (PPL): ํŽ„ํ”Œ๋ ‰์„œํ‹ฐ(Perplexity)
58
+ - pipeline: ํŒŒ์ดํ”„๋ผ์ธ
59
+ - pixel values: ํ”ฝ์…€ ๊ฐ’
60
+ - pooling: ํ’€๋ง
61
+ - position IDs: ์œ„์น˜ ID
62
+ - preprocessing: ์ „์ฒ˜๋ฆฌ
63
+ - prompt: ํ”„๋กฌํ”„ํŠธ
64
+ - pythonic: ํŒŒ์ด์จ๋‹‰
65
+ - query: ์ฟผ๋ฆฌ
66
+ - question answering: ์งˆ์˜ ์‘๋‹ต
67
+ - raw audio waveform: ์›์‹œ ์˜ค๋””์˜ค ํŒŒํ˜•
68
+ - recurrent neural network (RNN): ์ˆœํ™˜ ์‹ ๊ฒฝ๋ง
69
+ - accelerator: ๊ฐ€์†๊ธฐ
70
+ - Accelerate: Accelerate
71
+ - architecture: ์•„ํ‚คํ…์ฒ˜
72
+ - arguments: ์ธ์ˆ˜
73
+ - attention mask: ์–ดํ…์…˜ ๋งˆ์Šคํฌ
74
+ - augmentation: ์ฆ๊ฐ•
75
+ - autoencoding models: ์˜คํ† ์ธ์ฝ”๋”ฉ ๋ชจ๋ธ
76
+ - autoregressive models: ์ž๊ธฐํšŒ๊ท€ ๋ชจ๋ธ
77
+ - backward: ์—ญ๋ฐฉํ–ฅ
78
+ - bounding box: ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค
79
+ - causal language modeling: ์ธ๊ณผ์  ์–ธ์–ด ๋ชจ๋ธ๋ง(causal language modeling)
80
+ - channel: ์ฑ„๋„
81
+ - checkpoint: ์ฒดํฌํฌ์ธํŠธ(checkpoint)
82
+ - chunk: ๋ฌถ์Œ
83
+ - computer vision: ์ปดํ“จํ„ฐ ๋น„์ „
84
+ - convolution: ํ•ฉ์„ฑ๊ณฑ
85
+ - crop: ์ž๋ฅด๊ธฐ
86
+ - custom: ์‚ฌ์šฉ์ž ์ •์˜
87
+ - customize: ๋งž์ถค ์„ค์ •ํ•˜๋‹ค
88
+ - data collator: ๋ฐ์ดํ„ฐ ์ฝœ๋ ˆ์ดํ„ฐ
89
+ - dataset: ๋ฐ์ดํ„ฐ ์„ธํŠธ
90
+ - decoder input IDs: ๋””์ฝ”๋” ์ž…๋ ฅ ID
91
+ - decoder models: ๋””์ฝ”๋” ๋ชจ๋ธ
92
+ - deep learning (DL): ๋”ฅ๋Ÿฌ๋‹
93
+ - directory: ๋””๋ ‰ํ„ฐ๋ฆฌ
94
+ - distributed training: ๋ถ„์‚ฐ ํ•™์Šต
95
+ - downstream: ๋‹ค์šด์ŠคํŠธ๋ฆผ
96
+ - encoder models: ์ธ์ฝ”๋” ๋ชจ๋ธ
97
+ - entity: ๊ฐœ์ฒด
98
+ - epoch: ์—ํญ
99
+ - evaluation method: ํ‰๊ฐ€ ๋ฐฉ๋ฒ•
100
+ - feature extraction: ํŠน์„ฑ ์ถ”์ถœ
101
+ - feature matrix: ํŠน์„ฑ ํ–‰๋ ฌ(feature matrix)
102
+ - fine-tunning: ๋ฏธ์„ธ ์กฐ์ •
103
+ - finetuned models: ๋ฏธ์„ธ ์กฐ์ • ๋ชจ๋ธ
104
+ - hidden state: ์€๋‹‰ ์ƒํƒœ
105
+ - hyperparameter: ํ•˜์ดํผํŒŒ๋ผ๋ฏธํ„ฐ
106
+ - learning: ํ•™์Šต
107
+ - load: ๊ฐ€์ ธ์˜ค๋‹ค
108
+ - method: ๋ฉ”์†Œ๋“œ
109
+ - optimizer: ์˜ตํ‹ฐ๋งˆ์ด์ €
110
+ - pad (padding): ํŒจ๋“œ (ํŒจ๋”ฉ)
111
+ - parameter: ๋งค๊ฐœ๋ณ€์ˆ˜
112
+ - pretrained model: ์‚ฌ์ „ํ›ˆ๋ จ๋œ ๋ชจ๋ธ
113
+ - separator (* [SEP]๋ฅผ ๋ถ€๋ฅด๋Š” ์ด๋ฆ„): ๋ถ„ํ•  ํ† ํฐ
114
+ - sequence: ์‹œํ€€์Šค
115
+ - silent error: ์กฐ์šฉํ•œ ์˜ค๋ฅ˜
116
+ - token: ํ† ํฐ
117
+ - tokenizer: ํ† ํฌ๋‚˜์ด์ €
118
+ - training: ํ›ˆ๋ จ
119
+ - workflow: ์›Œํฌํ”Œ๋กœ์šฐ
120
+
121
+ Please revise the translated sentences accordingly using the terms provided in this glossary.
122
+ """
123
+
124
+ def get_prompt_with_glossary() -> str:
125
+ prompt = string.Template(
126
+ PROMPT_WITH_GLOSSARY
127
+ ).safe_substitute()
128
+ return prompt