upgrade gradio to 4.44.0 version

#1
Files changed (3) hide show
  1. app.py +5 -36
  2. content.py +0 -33
  3. prompt.py +0 -128
app.py CHANGED
@@ -8,10 +8,6 @@ import os
8
  import openai
9
  import gradio as gr
10
 
11
- from content import get_content_list, retrieve_content_list
12
- from prompt import get_prompt_with_glossary
13
-
14
-
15
  def get_content(filepath: str) -> str:
16
  url = string.Template(
17
  "https://raw.githubusercontent.com/huggingface/"
@@ -167,45 +163,18 @@ with demo:
167
  "도착어", "língua alvo"
168
  ])
169
  )
170
- with gr.Column():
171
- content = get_content_list()
172
- content_list = retrieve_content_list(content)
173
-
174
- def update_dropdown_b(choice_a):
175
- return gr.Dropdown(
176
- choices=content_list[choice_a],
177
- label="File path of document",
178
- interactive=True,
179
- allow_custom_value=True,
180
- )
181
-
182
- category_list = list(content_list.keys())
183
- category_input = gr.Dropdown(
184
- choices=category_list,
185
- value=category_list[0],
186
- label="Category of document",
187
- interactive=True,
188
- allow_custom_value=True,
189
- )
190
-
191
- filtpath_list = content_list.get(category_input)
192
- filepath_input = gr.Dropdown(
193
- label="File path of document",
194
- interactive=True
195
- )
196
-
197
- category_input.change(fn=update_dropdown_b, inputs=category_input, outputs=filepath_input)
198
-
199
  with gr.Tabs():
200
  with gr.TabItem("Web UI"):
201
  prompt_button = gr.Button("Show Full Prompt", variant="primary")
202
  # TODO: add with_prompt_checkbox so people can freely use other services such as DeepL or Papago.
203
  gr.Markdown("1. Copy with the button right-hand side and paste into [chat.openai.com](https://chat.openai.com).")
204
  prompt_output = gr.Textbox(label="Full Prompt", lines=3, show_copy_button=True)
205
- gr.Markdown("2. After getting the initial translation, revise your translation using following prompt.")
206
- prompt_with_glossary_output = gr.Textbox(label="Prompt with glossary", lines=3, show_copy_button=True, value=get_prompt_with_glossary())
207
  # TODO: add check for segments, indicating whether user should add or remove new lines from their input. (gr.Row)
208
- gr.Markdown("3. After getting the complete translation, remove randomly inserted newlines on your favorite text editor and paste the result below.")
209
  ui_translated_input = gr.Textbox(label="Cleaned ChatGPT initial translation")
210
  fill_button = gr.Button("Fill in scaffold", variant="primary")
211
  with gr.TabItem("API (Not Implemented)"):
 
8
  import openai
9
  import gradio as gr
10
 
 
 
 
 
11
  def get_content(filepath: str) -> str:
12
  url = string.Template(
13
  "https://raw.githubusercontent.com/huggingface/"
 
163
  "도착어", "língua alvo"
164
  ])
165
  )
166
+ filepath_input = gr.Textbox(
167
+ value="tasks/masked_language_modeling.md",
168
+ label="File path of transformers document"
169
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  with gr.Tabs():
171
  with gr.TabItem("Web UI"):
172
  prompt_button = gr.Button("Show Full Prompt", variant="primary")
173
  # TODO: add with_prompt_checkbox so people can freely use other services such as DeepL or Papago.
174
  gr.Markdown("1. Copy with the button right-hand side and paste into [chat.openai.com](https://chat.openai.com).")
175
  prompt_output = gr.Textbox(label="Full Prompt", lines=3, show_copy_button=True)
 
 
176
  # TODO: add check for segments, indicating whether user should add or remove new lines from their input. (gr.Row)
177
+ gr.Markdown("2. After getting the complete translation, remove randomly inserted newlines on your favorite text editor and paste the result below.")
178
  ui_translated_input = gr.Textbox(label="Cleaned ChatGPT initial translation")
179
  fill_button = gr.Button("Fill in scaffold", variant="primary")
180
  with gr.TabItem("API (Not Implemented)"):
content.py DELETED
@@ -1,33 +0,0 @@
1
- import requests
2
- import string
3
- import re
4
-
5
-
6
- def get_content_list() -> str:
7
- url = string.Template(
8
- "https://raw.githubusercontent.com/huggingface/"
9
- "transformers/main/docs/source/en/_toctree.yml"
10
- ).safe_substitute()
11
- response = requests.get(url)
12
- if response.status_code == 200:
13
- content = response.text
14
- return content
15
- else:
16
- raise ValueError("Failed to retrieve content list from the URL.", url)
17
-
18
- def retrieve_content_list(content: str) -> dict[any, list[str]]:
19
- file_paths = re.findall(r'local:\s*(\S+)', content)
20
- categories = {None: []}
21
-
22
- for filepath in file_paths:
23
- filepath += ".md"
24
- if '/' in filepath:
25
- prefix = filepath.split('/')[0]
26
- if prefix in categories:
27
- categories[prefix].append(filepath)
28
- else:
29
- categories[prefix] = [filepath]
30
- else:
31
- categories[None].append(filepath)
32
- return categories
33
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompt.py DELETED
@@ -1,128 +0,0 @@
1
- import string
2
-
3
-
4
- PROMPT_WITH_GLOSSARY = """
5
- You have a glossary of terms with their Korean translations. When translating a sentence, you need to check if any of the words in the sentence are in the glossary, and if so, translate them according to the provided Korean terms. Here is the glossary:
6
-
7
- - revision: 개정
8
- - method: 메소드
9
- - secrets: 비밀값
10
- - search helper: 검색 헬퍼
11
- - logging level: 로그 레벨
12
- - workflow: 워크플로우
13
- - corner case: 코너 케이스
14
- - tokenization: 토큰화
15
- - architecture: 아키텍처
16
- - attention mask: 어텐션 마스크
17
- - backbone: 백본
18
- - argmax: argmax
19
- - beam search: 빔 서치
20
- - clustering: 군집화
21
- - configuration: 구성
22
- - context: 문맥
23
- - cross entropy: 교차 엔트로피
24
- - cross-attention: 크로스 어텐션
25
- - dictionary: 딕셔너리
26
- - entry: 엔트리
27
- - few shot: 퓨샷
28
- - flatten: flatten
29
- - ground truth: 정답
30
- - head: 헤드
31
- - helper function: 헬퍼 함수
32
- - image captioning: 이미지 캡셔닝
33
- - image patch: 이미지 패치
34
- - inference: 추론
35
- - instance: 인스턴스
36
- - Instantiate: 인스턴스화
37
- - knowledge distillation: 지식 증류
38
- - labels: 레이블
39
- - large language models (LLM): 대규모 언어 모델
40
- - layer: 레이어
41
- - learning rate scheduler: Learning Rate Scheduler
42
- - localization: 로컬리제이션
43
- - log mel-filter bank: 로그 멜 필터 뱅크
44
- - look-up table: 룩업 테이블
45
- - loss function: 손실 함수
46
- - machine learning: 머신 러닝
47
- - mapping: 매핑
48
- - masked language modeling (MLM): 마스크드 언어 모델
49
- - malware: 악성코드
50
- - metric: 지표
51
- - mixed precision: 혼합 정밀도
52
- - modality: 모달리티
53
- - monolingual model: 단일 언어 모델
54
- - multi gpu: 다중 GPU
55
- - multilingual model: 다국어 모델
56
- - parsing: 파싱
57
- - perplexity (PPL): 펄플렉서티(Perplexity)
58
- - pipeline: 파이프라인
59
- - pixel values: 픽셀 값
60
- - pooling: 풀링
61
- - position IDs: 위치 ID
62
- - preprocessing: 전처리
63
- - prompt: 프롬프트
64
- - pythonic: 파이써닉
65
- - query: 쿼리
66
- - question answering: 질의 응답
67
- - raw audio waveform: 원시 오디오 파형
68
- - recurrent neural network (RNN): 순환 신경망
69
- - accelerator: 가속기
70
- - Accelerate: Accelerate
71
- - architecture: 아키텍처
72
- - arguments: 인수
73
- - attention mask: 어텐션 마스크
74
- - augmentation: 증강
75
- - autoencoding models: 오토인코딩 모델
76
- - autoregressive models: 자기회귀 모델
77
- - backward: 역방향
78
- - bounding box: 바운딩 박스
79
- - causal language modeling: 인과적 언어 모델링(causal language modeling)
80
- - channel: 채널
81
- - checkpoint: 체크포인트(checkpoint)
82
- - chunk: 묶음
83
- - computer vision: 컴퓨터 비전
84
- - convolution: 합성곱
85
- - crop: 자르기
86
- - custom: 사용자 정의
87
- - customize: 맞춤 설정하다
88
- - data collator: 데이터 콜레이터
89
- - dataset: 데이터 세트
90
- - decoder input IDs: 디코더 입력 ID
91
- - decoder models: 디코더 모델
92
- - deep learning (DL): 딥러닝
93
- - directory: 디렉터리
94
- - distributed training: 분산 학습
95
- - downstream: 다운스트림
96
- - encoder models: 인코더 모델
97
- - entity: 개체
98
- - epoch: 에폭
99
- - evaluation method: 평가 방법
100
- - feature extraction: 특성 추출
101
- - feature matrix: 특성 행렬(feature matrix)
102
- - fine-tunning: 미세 조정
103
- - finetuned models: 미세 조정 모델
104
- - hidden state: 은닉 상태
105
- - hyperparameter: 하이퍼파라미터
106
- - learning: 학습
107
- - load: 가져오다
108
- - method: 메소드
109
- - optimizer: 옵티마이저
110
- - pad (padding): 패드 (패딩)
111
- - parameter: 매개변수
112
- - pretrained model: 사전훈련된 모델
113
- - separator (* [SEP]를 부르는 이름): 분할 토큰
114
- - sequence: 시퀀스
115
- - silent error: 조용한 오류
116
- - token: 토큰
117
- - tokenizer: 토크나이저
118
- - training: 훈련
119
- - workflow: 워크플로우
120
-
121
- Please revise the translated sentences accordingly using the terms provided in this glossary.
122
- """
123
-
124
- def get_prompt_with_glossary() -> str:
125
- prompt = string.Template(
126
- PROMPT_WITH_GLOSSARY
127
- ).safe_substitute()
128
- return prompt