Update app.py
Browse files
app.py
CHANGED
@@ -11,10 +11,12 @@ from functools import lru_cache
|
|
11 |
from concurrent.futures import ThreadPoolExecutor
|
12 |
import math
|
13 |
import nltk
|
14 |
-
nltk.download('punkt')
|
15 |
from nltk.tokenize import sent_tokenize
|
16 |
from transformers import AutoTokenizer
|
17 |
|
|
|
|
|
|
|
18 |
# μΆλ‘ API ν΄λΌμ΄μΈνΈ μ€μ
|
19 |
hf_client = InferenceClient(
|
20 |
"CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN")
|
@@ -63,13 +65,13 @@ def preprocess_single_chunk(chunk: str) -> str:
|
|
63 |
prompt=full_prompt,
|
64 |
max_new_tokens=2000, # ν ν° μ μ ν
|
65 |
temperature=0.1, # λ κ²°μ μ μΈ μΆλ ₯
|
66 |
-
top_p=0.5,
|
67 |
-
stream=False
|
68 |
)
|
69 |
-
|
70 |
return response.strip()
|
71 |
except Exception as e:
|
72 |
-
|
|
|
73 |
|
74 |
def load_code(filename: str) -> str:
|
75 |
try:
|
@@ -78,7 +80,8 @@ def load_code(filename: str) -> str:
|
|
78 |
except FileNotFoundError:
|
79 |
return f"{filename} νμΌμ μ°Ύμ μ μμ΅λλ€."
|
80 |
except Exception as e:
|
81 |
-
|
|
|
82 |
|
83 |
def load_parquet(filename: str) -> str:
|
84 |
try:
|
@@ -87,7 +90,8 @@ def load_parquet(filename: str) -> str:
|
|
87 |
except FileNotFoundError:
|
88 |
return f"{filename} νμΌμ μ°Ύμ μ μμ΅λλ€."
|
89 |
except Exception as e:
|
90 |
-
|
|
|
91 |
|
92 |
def respond(
|
93 |
message: str,
|
@@ -136,9 +140,9 @@ def respond(
|
|
136 |
response += msg
|
137 |
yield response
|
138 |
except Exception as e:
|
139 |
-
error_message = f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}
|
140 |
print(error_message)
|
141 |
-
yield
|
142 |
|
143 |
def upload_csv(file_path: str) -> Tuple[str, str]:
|
144 |
try:
|
@@ -160,7 +164,8 @@ def upload_csv(file_path: str) -> Tuple[str, str]:
|
|
160 |
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
161 |
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλκ³ λ³νλμμ΅λλ€.", parquet_filename
|
162 |
except Exception as e:
|
163 |
-
|
|
|
164 |
|
165 |
def upload_parquet(file_path: str) -> Tuple[str, str, str]:
|
166 |
try:
|
@@ -172,25 +177,26 @@ def upload_parquet(file_path: str) -> Tuple[str, str, str]:
|
|
172 |
parquet_json = df.to_json(orient='records', force_ascii=False)
|
173 |
return "Parquet νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλμμ΅λλ€.", parquet_content, parquet_json
|
174 |
except Exception as e:
|
175 |
-
|
|
|
176 |
|
177 |
def text_to_parquet(text: str) -> Tuple[str, str, str]:
|
178 |
try:
|
179 |
from io import StringIO
|
180 |
import csv
|
181 |
-
|
182 |
# μ
λ ₯ ν
μ€νΈ μ μ
|
183 |
lines = text.strip().split('\n')
|
184 |
cleaned_lines = []
|
185 |
-
|
186 |
for line in lines:
|
187 |
# λΉ μ€ κ±΄λλ°κΈ°
|
188 |
if not line.strip():
|
189 |
continue
|
190 |
-
|
191 |
# μλ°μ΄ν μ κ·ν
|
192 |
line = line.replace('""', '"') # μ€λ³΅ μλ°μ΄ν μ²λ¦¬
|
193 |
-
|
194 |
# CSV νμ±μ μν μμ StringIO κ°μ²΄ μμ±
|
195 |
temp_buffer = StringIO(line)
|
196 |
try:
|
@@ -201,14 +207,14 @@ def text_to_parquet(text: str) -> Tuple[str, str, str]:
|
|
201 |
# κ° νλλ₯Ό μ μ ν ν¬λ§·ν
|
202 |
formatted_line = f'{parsed_line[0]},"{parsed_line[1]}","{parsed_line[2]}","{parsed_line[3]}"'
|
203 |
cleaned_lines.append(formatted_line)
|
204 |
-
except:
|
205 |
continue
|
206 |
finally:
|
207 |
temp_buffer.close()
|
208 |
-
|
209 |
# μ μ λ CSV λ°μ΄ν° μμ±
|
210 |
cleaned_csv = '\n'.join(cleaned_lines)
|
211 |
-
|
212 |
# DataFrame μμ±
|
213 |
df = pd.read_csv(
|
214 |
StringIO(cleaned_csv),
|
@@ -217,28 +223,28 @@ def text_to_parquet(text: str) -> Tuple[str, str, str]:
|
|
217 |
escapechar='\\',
|
218 |
names=['id', 'text', 'label', 'metadata']
|
219 |
)
|
220 |
-
|
221 |
# λ°μ΄ν° μ ν μ΅μ ν
|
222 |
df = df.astype({'id': 'int32', 'text': 'string', 'label': 'string', 'metadata': 'string'})
|
223 |
-
|
224 |
# Parquet νμΌλ‘ λ³ν
|
225 |
parquet_filename = 'text_to_parquet.parquet'
|
226 |
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
227 |
-
|
228 |
# Parquet νμΌ λ΄μ© 미리보기
|
229 |
parquet_content = load_parquet(parquet_filename)
|
230 |
-
|
231 |
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ λ³νλμμ΅λλ€.", parquet_content, parquet_filename
|
232 |
-
|
233 |
except Exception as e:
|
234 |
error_message = f"ν
μ€νΈ λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
235 |
print(f"{error_message}\n{traceback.format_exc()}")
|
236 |
-
return
|
237 |
|
238 |
def preprocess_text_with_llm(input_text: str) -> str:
|
239 |
if not input_text.strip():
|
240 |
-
return "μ
λ ₯ ν
μ€νΈκ°
|
241 |
-
|
242 |
system_prompt = """λΉμ μ λ°μ΄ν° μ μ²λ¦¬ μ λ¬Έκ°μ
λλ€. μ
λ ₯λ ν
μ€νΈλ₯Ό CSV λ°μ΄ν°μ
νμμΌλ‘ λ³ννμΈμ.
|
243 |
|
244 |
κ·μΉ:
|
@@ -270,7 +276,7 @@ def preprocess_text_with_llm(input_text: str) -> str:
|
|
270 |
try:
|
271 |
# ν
μ€νΈλ₯Ό μ²ν¬λ‘ λΆν
|
272 |
chunks = chunk_text(input_text)
|
273 |
-
|
274 |
# λ³λ ¬ μ²λ¦¬λ‘ μ²ν¬λ€μ μ²λ¦¬
|
275 |
with ThreadPoolExecutor(max_workers=3) as executor:
|
276 |
futures = []
|
@@ -286,18 +292,19 @@ def preprocess_text_with_llm(input_text: str) -> str:
|
|
286 |
stream=False
|
287 |
)
|
288 |
futures.append(future)
|
|
|
289 |
processed_chunks = [future.result() for future in futures]
|
290 |
-
|
291 |
# κ²°κ³Ό λ³ν© λ° μ€λ³΅ μ κ±°
|
292 |
all_lines = []
|
293 |
seen_texts = set()
|
294 |
current_id = 1
|
295 |
-
|
296 |
for chunk_result in processed_chunks:
|
297 |
# EOS_TOKEN μ²λ¦¬
|
298 |
if "<EOS_TOKEN>" in chunk_result:
|
299 |
chunk_result = chunk_result.split("<EOS_TOKEN>")[0]
|
300 |
-
|
301 |
lines = chunk_result.strip().split('\n')
|
302 |
for line in lines:
|
303 |
line = line.strip()
|
@@ -310,9 +317,9 @@ def preprocess_text_with_llm(input_text: str) -> str:
|
|
310 |
all_lines.append(new_line)
|
311 |
seen_texts.add(new_line)
|
312 |
current_id += 1
|
313 |
-
|
314 |
processed_text = '\n'.join(all_lines)
|
315 |
-
|
316 |
# CSV νμ κ²μ¦
|
317 |
try:
|
318 |
from io import StringIO
|
@@ -321,11 +328,11 @@ def preprocess_text_with_llm(input_text: str) -> str:
|
|
321 |
return processed_text
|
322 |
except csv.Error:
|
323 |
return "LLMμ΄ μ¬λ°λ₯Έ CSV νμμ μμ±νμ§ λͺ»νμ΅λλ€. λ€μ μλν΄μ£ΌμΈμ."
|
324 |
-
|
325 |
except Exception as e:
|
326 |
error_message = f"μ μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
327 |
print(error_message)
|
328 |
-
return
|
329 |
|
330 |
# CSS μ€μ
|
331 |
css = """
|
@@ -407,7 +414,8 @@ with gr.Blocks(css=css) as demo:
|
|
407 |
# μ΄μμ€ν΄νΈμ μλ΅μ νμ€ν 리μ μΆκ°
|
408 |
history.append({"role": "assistant", "content": partial_response})
|
409 |
except Exception as e:
|
410 |
-
|
|
|
411 |
history.append({"role": "assistant", "content": response})
|
412 |
yield history, ""
|
413 |
|
@@ -528,46 +536,48 @@ with gr.Blocks(css=css) as demo:
|
|
528 |
lines=15,
|
529 |
placeholder="μ¬κΈ°μ μ μ²λ¦¬ν ν
μ€νΈλ₯Ό μ
λ ₯νμΈμ..."
|
530 |
)
|
531 |
-
|
532 |
with gr.Row():
|
533 |
preprocess_button = gr.Button("μ μ²λ¦¬ μ€ν", variant="primary")
|
534 |
clear_button = gr.Button("μ΄κΈ°ν")
|
535 |
-
|
536 |
preprocess_status = gr.Textbox(
|
537 |
label="μ μ²λ¦¬ μν",
|
538 |
interactive=False,
|
539 |
value="λκΈ° μ€..."
|
540 |
)
|
541 |
-
|
542 |
processed_text_output = gr.Textbox(
|
543 |
label="μ μ²λ¦¬λ λ°μ΄ν°μ
μΆλ ₯",
|
544 |
lines=15,
|
545 |
interactive=False
|
546 |
)
|
547 |
-
|
548 |
# Parquet λ³ν λ° λ€μ΄λ‘λ μΉμ
|
549 |
convert_to_parquet_button = gr.Button("ParquetμΌλ‘ λ³ν")
|
550 |
download_parquet = gr.File(label="λ³νλ Parquet νμΌ λ€μ΄λ‘λ")
|
551 |
|
552 |
def handle_text_preprocessing(input_text: str):
|
553 |
if not input_text.strip():
|
554 |
-
|
555 |
-
|
|
|
556 |
try:
|
557 |
preprocess_status_msg = "μ μ²λ¦¬λ₯Ό μμν©λλ€..."
|
558 |
yield preprocess_status_msg, ""
|
559 |
-
|
560 |
processed_text = preprocess_text_with_llm(input_text)
|
561 |
-
|
562 |
if processed_text:
|
563 |
preprocess_status_msg = "μ μ²λ¦¬κ° μλ£λμμ΅λλ€."
|
564 |
yield preprocess_status_msg, processed_text
|
565 |
else:
|
566 |
preprocess_status_msg = "μ μ²λ¦¬ κ²°κ³Όκ° μμ΅λλ€."
|
567 |
yield preprocess_status_msg, ""
|
568 |
-
|
569 |
except Exception as e:
|
570 |
-
error_msg =
|
|
|
571 |
yield error_msg, ""
|
572 |
|
573 |
def clear_inputs():
|
@@ -576,14 +586,15 @@ with gr.Blocks(css=css) as demo:
|
|
576 |
def convert_to_parquet_file(processed_text: str):
|
577 |
if not processed_text.strip():
|
578 |
return "λ³νν ν
μ€νΈκ° μμ΅λλ€.", None
|
579 |
-
|
580 |
try:
|
581 |
message, parquet_content, parquet_filename = text_to_parquet(processed_text)
|
582 |
if parquet_filename:
|
583 |
return message, parquet_filename
|
584 |
return message, None
|
585 |
except Exception as e:
|
586 |
-
|
|
|
587 |
|
588 |
# μ΄λ²€νΈ νΈλ€λ¬ μ°κ²°
|
589 |
preprocess_button.click(
|
@@ -619,3 +630,4 @@ with gr.Blocks(css=css) as demo:
|
|
619 |
|
620 |
if __name__ == "__main__":
|
621 |
demo.launch(share=True) # μ½λμμ μ€λ₯λ κ°μ μ΄ νμν μ¬νμ μΆλ‘ νμ¬ λ³΄κ³ νλΌ
|
|
|
|
11 |
from concurrent.futures import ThreadPoolExecutor
|
12 |
import math
|
13 |
import nltk
|
|
|
14 |
from nltk.tokenize import sent_tokenize
|
15 |
from transformers import AutoTokenizer
|
16 |
|
17 |
+
# NLTK λ°μ΄ν° λ€μ΄λ‘λ
|
18 |
+
nltk.download('punkt')
|
19 |
+
|
20 |
# μΆλ‘ API ν΄λΌμ΄μΈνΈ μ€μ
|
21 |
hf_client = InferenceClient(
|
22 |
"CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN")
|
|
|
65 |
prompt=full_prompt,
|
66 |
max_new_tokens=2000, # ν ν° μ μ ν
|
67 |
temperature=0.1, # λ κ²°μ μ μΈ μΆλ ₯
|
68 |
+
top_p=0.5, # λ μ§μ€λ μΆλ ₯
|
69 |
+
stream=False # μ€νΈλ¦¬λ° λΉνμ±ν
|
70 |
)
|
|
|
71 |
return response.strip()
|
72 |
except Exception as e:
|
73 |
+
print(f"μ²ν¬ μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}\n{traceback.format_exc()}")
|
74 |
+
return "μ²ν¬ μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ."
|
75 |
|
76 |
def load_code(filename: str) -> str:
|
77 |
try:
|
|
|
80 |
except FileNotFoundError:
|
81 |
return f"{filename} νμΌμ μ°Ύμ μ μμ΅λλ€."
|
82 |
except Exception as e:
|
83 |
+
print(f"νμΌ μ½κΈ° μ€λ₯: {str(e)}\n{traceback.format_exc()}")
|
84 |
+
return "νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ."
|
85 |
|
86 |
def load_parquet(filename: str) -> str:
|
87 |
try:
|
|
|
90 |
except FileNotFoundError:
|
91 |
return f"{filename} νμΌμ μ°Ύμ μ μμ΅λλ€."
|
92 |
except Exception as e:
|
93 |
+
print(f"Parquet νμΌ λ‘λ μ€λ₯: {str(e)}\n{traceback.format_exc()}")
|
94 |
+
return "νμΌμ μ½λ μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ."
|
95 |
|
96 |
def respond(
|
97 |
message: str,
|
|
|
140 |
response += msg
|
141 |
yield response
|
142 |
except Exception as e:
|
143 |
+
error_message = f"μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
144 |
print(error_message)
|
145 |
+
yield "μΆλ‘ μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ."
|
146 |
|
147 |
def upload_csv(file_path: str) -> Tuple[str, str]:
|
148 |
try:
|
|
|
164 |
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
165 |
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλκ³ λ³νλμμ΅λλ€.", parquet_filename
|
166 |
except Exception as e:
|
167 |
+
print(f"CSV νμΌ μ
λ‘λ λ° λ³ν μ€ μ€λ₯ λ°μ: {str(e)}\n{traceback.format_exc()}")
|
168 |
+
return "CSV νμΌ μ
λ‘λ λ° λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ.", ""
|
169 |
|
170 |
def upload_parquet(file_path: str) -> Tuple[str, str, str]:
|
171 |
try:
|
|
|
177 |
parquet_json = df.to_json(orient='records', force_ascii=False)
|
178 |
return "Parquet νμΌμ΄ μ±κ³΅μ μΌλ‘ μ
λ‘λλμμ΅λλ€.", parquet_content, parquet_json
|
179 |
except Exception as e:
|
180 |
+
print(f"Parquet νμΌ μ
λ‘λ μ€ μ€λ₯ λ°μ: {str(e)}\n{traceback.format_exc()}")
|
181 |
+
return "Parquet νμΌ μ
λ‘λ μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ.", "", ""
|
182 |
|
183 |
def text_to_parquet(text: str) -> Tuple[str, str, str]:
|
184 |
try:
|
185 |
from io import StringIO
|
186 |
import csv
|
187 |
+
|
188 |
# μ
λ ₯ ν
μ€νΈ μ μ
|
189 |
lines = text.strip().split('\n')
|
190 |
cleaned_lines = []
|
191 |
+
|
192 |
for line in lines:
|
193 |
# λΉ μ€ κ±΄λλ°κΈ°
|
194 |
if not line.strip():
|
195 |
continue
|
196 |
+
|
197 |
# μλ°μ΄ν μ κ·ν
|
198 |
line = line.replace('""', '"') # μ€λ³΅ μλ°μ΄ν μ²λ¦¬
|
199 |
+
|
200 |
# CSV νμ±μ μν μμ StringIO κ°μ²΄ μμ±
|
201 |
temp_buffer = StringIO(line)
|
202 |
try:
|
|
|
207 |
# κ° νλλ₯Ό μ μ ν ν¬λ§·ν
|
208 |
formatted_line = f'{parsed_line[0]},"{parsed_line[1]}","{parsed_line[2]}","{parsed_line[3]}"'
|
209 |
cleaned_lines.append(formatted_line)
|
210 |
+
except Exception as e:
|
211 |
continue
|
212 |
finally:
|
213 |
temp_buffer.close()
|
214 |
+
|
215 |
# μ μ λ CSV λ°μ΄ν° μμ±
|
216 |
cleaned_csv = '\n'.join(cleaned_lines)
|
217 |
+
|
218 |
# DataFrame μμ±
|
219 |
df = pd.read_csv(
|
220 |
StringIO(cleaned_csv),
|
|
|
223 |
escapechar='\\',
|
224 |
names=['id', 'text', 'label', 'metadata']
|
225 |
)
|
226 |
+
|
227 |
# λ°μ΄ν° μ ν μ΅μ ν
|
228 |
df = df.astype({'id': 'int32', 'text': 'string', 'label': 'string', 'metadata': 'string'})
|
229 |
+
|
230 |
# Parquet νμΌλ‘ λ³ν
|
231 |
parquet_filename = 'text_to_parquet.parquet'
|
232 |
df.to_parquet(parquet_filename, engine='pyarrow', compression='snappy')
|
233 |
+
|
234 |
# Parquet νμΌ λ΄μ© 미리보기
|
235 |
parquet_content = load_parquet(parquet_filename)
|
236 |
+
|
237 |
return f"{parquet_filename} νμΌμ΄ μ±κ³΅μ μΌλ‘ λ³νλμμ΅λλ€.", parquet_content, parquet_filename
|
238 |
+
|
239 |
except Exception as e:
|
240 |
error_message = f"ν
μ€νΈ λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
241 |
print(f"{error_message}\n{traceback.format_exc()}")
|
242 |
+
return "ν
μ€νΈ λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ.", "", ""
|
243 |
|
244 |
def preprocess_text_with_llm(input_text: str) -> str:
|
245 |
if not input_text.strip():
|
246 |
+
return "μ
λ ₯ ν
μ€νΈκ° μμ΅λλ€."
|
247 |
+
|
248 |
system_prompt = """λΉμ μ λ°μ΄ν° μ μ²λ¦¬ μ λ¬Έκ°μ
λλ€. μ
λ ₯λ ν
μ€νΈλ₯Ό CSV λ°μ΄ν°μ
νμμΌλ‘ λ³ννμΈμ.
|
249 |
|
250 |
κ·μΉ:
|
|
|
276 |
try:
|
277 |
# ν
μ€νΈλ₯Ό μ²ν¬λ‘ λΆν
|
278 |
chunks = chunk_text(input_text)
|
279 |
+
|
280 |
# λ³λ ¬ μ²λ¦¬λ‘ μ²ν¬λ€μ μ²λ¦¬
|
281 |
with ThreadPoolExecutor(max_workers=3) as executor:
|
282 |
futures = []
|
|
|
292 |
stream=False
|
293 |
)
|
294 |
futures.append(future)
|
295 |
+
|
296 |
processed_chunks = [future.result() for future in futures]
|
297 |
+
|
298 |
# κ²°κ³Ό λ³ν© λ° μ€λ³΅ μ κ±°
|
299 |
all_lines = []
|
300 |
seen_texts = set()
|
301 |
current_id = 1
|
302 |
+
|
303 |
for chunk_result in processed_chunks:
|
304 |
# EOS_TOKEN μ²λ¦¬
|
305 |
if "<EOS_TOKEN>" in chunk_result:
|
306 |
chunk_result = chunk_result.split("<EOS_TOKEN>")[0]
|
307 |
+
|
308 |
lines = chunk_result.strip().split('\n')
|
309 |
for line in lines:
|
310 |
line = line.strip()
|
|
|
317 |
all_lines.append(new_line)
|
318 |
seen_texts.add(new_line)
|
319 |
current_id += 1
|
320 |
+
|
321 |
processed_text = '\n'.join(all_lines)
|
322 |
+
|
323 |
# CSV νμ κ²μ¦
|
324 |
try:
|
325 |
from io import StringIO
|
|
|
328 |
return processed_text
|
329 |
except csv.Error:
|
330 |
return "LLMμ΄ μ¬λ°λ₯Έ CSV νμμ μμ±νμ§ λͺ»νμ΅λλ€. λ€μ μλν΄μ£ΌμΈμ."
|
331 |
+
|
332 |
except Exception as e:
|
333 |
error_message = f"μ μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€: {str(e)}"
|
334 |
print(error_message)
|
335 |
+
return "μ μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ."
|
336 |
|
337 |
# CSS μ€μ
|
338 |
css = """
|
|
|
414 |
# μ΄μμ€ν΄νΈμ μλ΅μ νμ€ν 리μ μΆκ°
|
415 |
history.append({"role": "assistant", "content": partial_response})
|
416 |
except Exception as e:
|
417 |
+
print(f"λ©μμ§ μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}\n{traceback.format_exc()}")
|
418 |
+
response = "λ©μμ§ μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ."
|
419 |
history.append({"role": "assistant", "content": response})
|
420 |
yield history, ""
|
421 |
|
|
|
536 |
lines=15,
|
537 |
placeholder="μ¬κΈ°μ μ μ²λ¦¬ν ν
μ€νΈλ₯Ό μ
λ ₯νμΈμ..."
|
538 |
)
|
539 |
+
|
540 |
with gr.Row():
|
541 |
preprocess_button = gr.Button("μ μ²λ¦¬ μ€ν", variant="primary")
|
542 |
clear_button = gr.Button("μ΄κΈ°ν")
|
543 |
+
|
544 |
preprocess_status = gr.Textbox(
|
545 |
label="μ μ²λ¦¬ μν",
|
546 |
interactive=False,
|
547 |
value="λκΈ° μ€..."
|
548 |
)
|
549 |
+
|
550 |
processed_text_output = gr.Textbox(
|
551 |
label="μ μ²λ¦¬λ λ°μ΄ν°μ
μΆλ ₯",
|
552 |
lines=15,
|
553 |
interactive=False
|
554 |
)
|
555 |
+
|
556 |
# Parquet λ³ν λ° λ€μ΄λ‘λ μΉμ
|
557 |
convert_to_parquet_button = gr.Button("ParquetμΌλ‘ λ³ν")
|
558 |
download_parquet = gr.File(label="λ³νλ Parquet νμΌ λ€μ΄λ‘λ")
|
559 |
|
560 |
def handle_text_preprocessing(input_text: str):
|
561 |
if not input_text.strip():
|
562 |
+
yield "μ
λ ₯ ν
μ€νΈκ° μμ΅λλ€.", ""
|
563 |
+
return
|
564 |
+
|
565 |
try:
|
566 |
preprocess_status_msg = "μ μ²λ¦¬λ₯Ό μμν©λλ€..."
|
567 |
yield preprocess_status_msg, ""
|
568 |
+
|
569 |
processed_text = preprocess_text_with_llm(input_text)
|
570 |
+
|
571 |
if processed_text:
|
572 |
preprocess_status_msg = "μ μ²λ¦¬κ° μλ£λμμ΅λλ€."
|
573 |
yield preprocess_status_msg, processed_text
|
574 |
else:
|
575 |
preprocess_status_msg = "μ μ²λ¦¬ κ²°κ³Όκ° μμ΅λλ€."
|
576 |
yield preprocess_status_msg, ""
|
577 |
+
|
578 |
except Exception as e:
|
579 |
+
error_msg = "μ μ²λ¦¬ μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ."
|
580 |
+
print(f"μ μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}\n{traceback.format_exc()}")
|
581 |
yield error_msg, ""
|
582 |
|
583 |
def clear_inputs():
|
|
|
586 |
def convert_to_parquet_file(processed_text: str):
|
587 |
if not processed_text.strip():
|
588 |
return "λ³νν ν
μ€νΈκ° μμ΅λλ€.", None
|
589 |
+
|
590 |
try:
|
591 |
message, parquet_content, parquet_filename = text_to_parquet(processed_text)
|
592 |
if parquet_filename:
|
593 |
return message, parquet_filename
|
594 |
return message, None
|
595 |
except Exception as e:
|
596 |
+
print(f"Parquet λ³ν μ€ μ€λ₯ λ°μ: {str(e)}\n{traceback.format_exc()}")
|
597 |
+
return "Parquet λ³ν μ€ μ€λ₯κ° λ°μνμ΅λλ€. κ΄λ¦¬μμκ² λ¬ΈμνμΈμ.", None
|
598 |
|
599 |
# μ΄λ²€νΈ νΈλ€λ¬ μ°κ²°
|
600 |
preprocess_button.click(
|
|
|
630 |
|
631 |
if __name__ == "__main__":
|
632 |
demo.launch(share=True) # μ½λμμ μ€λ₯λ κ°μ μ΄ νμν μ¬νμ μΆλ‘ νμ¬ λ³΄κ³ νλΌ
|
633 |
+
|