Spaces:
Running
Running
Add some descripsion
Browse files
app.py
CHANGED
@@ -1,35 +1,50 @@
|
|
1 |
-
|
2 |
-
import
|
3 |
-
import
|
4 |
-
|
|
|
5 |
|
|
|
6 |
UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")
|
7 |
|
8 |
def parse_document(file):
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
12 |
data = {
|
13 |
-
"base64_encoding": "['table']",
|
14 |
-
"model": "document-parse"
|
15 |
}
|
16 |
|
|
|
17 |
response = requests.post(url, headers=headers, files=files, data=data)
|
|
|
|
|
18 |
result = response.json()
|
19 |
html_text = result.get("content", {}).get("html", "")
|
20 |
return html_text
|
21 |
|
22 |
def chat_with_document(history, html_text, user_question):
|
|
|
|
|
|
|
23 |
if not html_text.strip():
|
|
|
24 |
return history, history, "โ ๏ธ ๋จผ์ ๋ฌธ์๋ฅผ ๋ณํํด์ฃผ์ธ์."
|
25 |
|
|
|
26 |
client = OpenAI(
|
27 |
api_key=UPSTAGE_API_KEY,
|
28 |
base_url="https://api.upstage.ai/v1"
|
29 |
)
|
30 |
|
|
|
31 |
history = history or []
|
32 |
|
|
|
33 |
system_prompt = f"""The following is a financial statement document extracted in HTML format.
|
34 |
Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
|
35 |
|
@@ -37,67 +52,85 @@ def chat_with_document(history, html_text, user_question):
|
|
37 |
{html_text}
|
38 |
"""
|
39 |
|
|
|
40 |
messages = [{"role": "system", "content": system_prompt}]
|
41 |
for user, bot in history:
|
42 |
messages.append({"role": "user", "content": user})
|
43 |
messages.append({"role": "assistant", "content": bot})
|
44 |
messages.append({"role": "user", "content": user_question})
|
45 |
|
|
|
46 |
try:
|
47 |
response = client.chat.completions.create(
|
48 |
-
model="solar-pro",
|
49 |
-
messages=messages,
|
50 |
-
temperature=0,
|
51 |
-
max_tokens=1024
|
52 |
)
|
53 |
-
bot_reply = response.choices[0].message.content
|
54 |
except Exception as e:
|
55 |
-
bot_reply = f"โ ๏ธ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
56 |
|
|
|
57 |
history.append((user_question, bot_reply))
|
58 |
-
return history, history, ""
|
|
|
|
|
59 |
|
60 |
def toggle_html_view(current_html, is_visible):
|
|
|
|
|
|
|
61 |
return (
|
62 |
-
gr.update(value=current_html, visible=not is_visible),
|
63 |
-
gr.update(value=current_html, visible=is_visible),
|
64 |
-
not is_visible
|
65 |
)
|
66 |
|
67 |
with gr.Blocks() as demo:
|
68 |
-
|
|
|
69 |
gr.Markdown("# ๐ ์ฌ๋ฌด์ ํ ๋ถ์ ์ฑ๋ด")
|
70 |
gr.Markdown("1. Document Parse API๋ก PDF ๋ฌธ์๋ฅผ HTML๋ก ๋ณํํฉ๋๋ค.\n"
|
71 |
"2. Solar LLM์ ํตํด ๋ฌธ์ ๊ธฐ๋ฐ ์ง๋ฌธ์ ๋ต๋ณํฉ๋๋ค.")
|
72 |
gr.Markdown("์์ ํ์ผ์ Files ๋ฒํผ์ ํด๋ฆญํ๋ฉด ํ์ธ ๋ฐ ๋ค์ด๋ก๋ ๊ฐ๋ฅํฉ๋๋ค.")
|
73 |
|
|
|
|
|
74 |
with gr.Row():
|
75 |
-
file_input = gr.File(label="๐ ์ฌ๋ฌด์ ํ ์
๋ก๋")
|
76 |
-
parse_btn = gr.Button("๋ฌธ์ HTML ๋ณํ")
|
|
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
82 |
|
|
|
83 |
parse_btn.click(fn=parse_document, inputs=file_input, outputs=html_output)
|
|
|
|
|
84 |
toggle_html_btn.click(
|
85 |
fn=toggle_html_view,
|
86 |
inputs=[html_output, html_visible_state],
|
87 |
outputs=[html_output, html_display, html_visible_state]
|
88 |
)
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
94 |
|
|
|
95 |
with gr.Row():
|
96 |
gr.Markdown("๐ก ์์ ์ง๋ฌธ:")
|
97 |
ex1 = gr.Button("์ด๋ค ๊ธฐ์
์ ์ฌ๋ฌด์ ํ์ธ๊ฐ์?")
|
98 |
ex2 = gr.Button("3๋ถ๊ธฐ ์ด ์๋งค์ถ์ ์ผ๋ง์ธ๊ฐ์?")
|
99 |
|
100 |
-
# ์์ ์ง๋ฌธ
|
101 |
ex1.click(
|
102 |
fn=lambda: "์ด๋ค ๊ธฐ์
์ ์ฌ๋ฌด์ ํ์ธ๊ฐ์?",
|
103 |
inputs=[],
|
@@ -109,6 +142,7 @@ with gr.Blocks() as demo:
|
|
109 |
show_progress=True
|
110 |
)
|
111 |
|
|
|
112 |
ex2.click(
|
113 |
fn=lambda: "1๋ถ๊ธฐ ์ด ์๋งค์ถ์ ์ผ๋ง์ธ๊ฐ์?",
|
114 |
inputs=[],
|
@@ -120,6 +154,7 @@ with gr.Blocks() as demo:
|
|
120 |
show_progress=True
|
121 |
)
|
122 |
|
|
|
123 |
answer_btn.click(
|
124 |
fn=chat_with_document,
|
125 |
inputs=[chat_state, html_output, user_question],
|
@@ -127,6 +162,7 @@ with gr.Blocks() as demo:
|
|
127 |
show_progress=True
|
128 |
)
|
129 |
|
|
|
130 |
demo.css = """
|
131 |
#scrollable-html, #scrollable-html-display {
|
132 |
max-height: 400px;
|
@@ -136,4 +172,6 @@ demo.css = """
|
|
136 |
}
|
137 |
"""
|
138 |
|
139 |
-
|
|
|
|
|
|
1 |
+
# ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ๋ถ๋ฌ์ค๊ธฐ
|
2 |
+
import gradio as gr # Gradio: ์น ์ธํฐํ์ด์ค ๊ตฌ์ฑ์ฉ ๋ผ์ด๋ธ๋ฌ๋ฆฌ
|
3 |
+
import requests # HTTP ์์ฒญ ์ ์ก์ฉ ๋ผ์ด๋ธ๋ฌ๋ฆฌ (API ํธ์ถ์ ์ฌ์ฉ)
|
4 |
+
import os # ํ๊ฒฝ๋ณ์์์ API ํค ๋ถ๋ฌ์ค๊ธฐ ์ํด ์ฌ์ฉ
|
5 |
+
from openai import OpenAI # Upstage Solar LLM ํธ์ถ์ ์ํ OpenAI ํธํ ํด๋ผ์ด์ธํธ
|
6 |
|
7 |
+
# ํ๊ฒฝ ๋ณ์์์ API ํค ๋ถ๋ฌ์ค๊ธฐ
|
8 |
UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")
|
9 |
|
10 |
def parse_document(file):
|
11 |
+
"""
|
12 |
+
์
๋ก๋๋ PDF ๋ฌธ์๋ฅผ HTML๋ก ๋ณํํ๋ ํจ์ (Upstage Document Parse API ์ฌ์ฉ)
|
13 |
+
"""
|
14 |
+
url = "https://api.upstage.ai/v1/document-ai/document-parse" # API URL
|
15 |
+
headers = {'Authorization': f'Bearer {UPSTAGE_API_KEY}'} # ์ธ์ฆ ํค๋
|
16 |
+
files = {"document": open(file.name, "rb")} # ํ์ผ ์ด์ด์ ๋ฐ์ด๋๋ฆฌ๋ก ์ ๋ฌ
|
17 |
data = {
|
18 |
+
"base64_encoding": "['table']", # ํ
์ด๋ธ์ base64๋ก ์ธ์ฝ๋ฉ ์์ฒญ
|
19 |
+
"model": "document-parse" # ์ฌ์ฉํ ๋ชจ๋ธ ์ง์
|
20 |
}
|
21 |
|
22 |
+
# POST ์์ฒญ์ผ๋ก ๋ฌธ์ ๋ถ์ API ํธ์ถ
|
23 |
response = requests.post(url, headers=headers, files=files, data=data)
|
24 |
+
|
25 |
+
# ๊ฒฐ๊ณผ์์ HTML ์ฝํ
์ธ ์ถ์ถ
|
26 |
result = response.json()
|
27 |
html_text = result.get("content", {}).get("html", "")
|
28 |
return html_text
|
29 |
|
30 |
def chat_with_document(history, html_text, user_question):
|
31 |
+
"""
|
32 |
+
์ด์ ๋ํ๊ธฐ๋ก์ ๋ฐํ์ผ๋ก ์ฌ์ฉ์ ์ง๋ฌธ์ ๋ํด ๋ฌธ์ ๊ธฐ๋ฐ ๋ต๋ณ์ ์์ฑํ๋ ๋ฉํฐํด ์ฑ๋ด ํจ์
|
33 |
+
"""
|
34 |
if not html_text.strip():
|
35 |
+
# ๋ฌธ์๊ฐ ์์ ๊ฒฝ์ฐ ๊ฒฝ๊ณ ๋ฉ์์ง ๋ฐํ
|
36 |
return history, history, "โ ๏ธ ๋จผ์ ๋ฌธ์๋ฅผ ๋ณํํด์ฃผ์ธ์."
|
37 |
|
38 |
+
# Clinet ํธ์ถ
|
39 |
client = OpenAI(
|
40 |
api_key=UPSTAGE_API_KEY,
|
41 |
base_url="https://api.upstage.ai/v1"
|
42 |
)
|
43 |
|
44 |
+
# ์ด์ ์ฑํ
๊ธฐ๋ก์ด ์์ผ๋ฉด ๋น ๋ฆฌ์คํธ๋ก ์ด๊ธฐํ
|
45 |
history = history or []
|
46 |
|
47 |
+
# ์์คํ
ํ๋กฌํํธ
|
48 |
system_prompt = f"""The following is a financial statement document extracted in HTML format.
|
49 |
Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
|
50 |
|
|
|
52 |
{html_text}
|
53 |
"""
|
54 |
|
55 |
+
# ๋ํ ๋ด์ญ ๊ตฌ์ฑ (์์คํ
โ ์ด์ ์ฌ์ฉ์ ๋ฐ ๋ด ๋ํ โ ํ์ฌ ์ง๋ฌธ)
|
56 |
messages = [{"role": "system", "content": system_prompt}]
|
57 |
for user, bot in history:
|
58 |
messages.append({"role": "user", "content": user})
|
59 |
messages.append({"role": "assistant", "content": bot})
|
60 |
messages.append({"role": "user", "content": user_question})
|
61 |
|
62 |
+
# Solar Pro API ํธ์ถ
|
63 |
try:
|
64 |
response = client.chat.completions.create(
|
65 |
+
model="solar-pro", # ์ฌ์ฉํ Solar LLM ๋ชจ๋ธ
|
66 |
+
messages=messages, # ๊ตฌ์ฑ๋ ๋ํ ๋ฉ์์ง๋ค
|
67 |
+
temperature=0, # ์ฐฝ์์ฑ ์ต์ํ (์ ํํ ๋ต๋ณ ์ ๋)
|
68 |
+
max_tokens=1024 # ์ต๋ ์๋ต ๊ธธ์ด
|
69 |
)
|
70 |
+
bot_reply = response.choices[0].message.content # ์๋ต ํ
์คํธ ์ถ์ถ
|
71 |
except Exception as e:
|
72 |
+
bot_reply = f"โ ๏ธ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}" # ์์ธ ์ฒ๋ฆฌ
|
73 |
|
74 |
+
# ์ฑํ
๊ธฐ๋ก์ ์ง๋ฌธ/๋ต๋ณ ์ถ๊ฐ
|
75 |
history.append((user_question, bot_reply))
|
76 |
+
return history, history, "" # ์ฑํ
๊ธฐ๋ก, ์ํ, ์ง๋ฌธ ์
๋ ฅ์นธ ์ด๊ธฐํ
|
77 |
+
|
78 |
+
|
79 |
|
80 |
def toggle_html_view(current_html, is_visible):
|
81 |
+
"""
|
82 |
+
HTML ๋ณด๊ธฐ ๋ฒํผ ํด๋ฆญ ์ ๋ณด์ด๊ธฐ/์จ๊ธฐ๊ธฐ ํ ๊ธ
|
83 |
+
"""
|
84 |
return (
|
85 |
+
gr.update(value=current_html, visible=not is_visible), # Textbox ํ ๊ธ
|
86 |
+
gr.update(value=current_html, visible=is_visible), # HTML ๋ทฐ ํ ๊ธ
|
87 |
+
not is_visible # ์ํ ๋ฐ์
|
88 |
)
|
89 |
|
90 |
with gr.Blocks() as demo:
|
91 |
+
|
92 |
+
# ์๋จ ์ ๋ชฉ ๋ฐ ์ค๋ช
ํ์
|
93 |
gr.Markdown("# ๐ ์ฌ๋ฌด์ ํ ๋ถ์ ์ฑ๋ด")
|
94 |
gr.Markdown("1. Document Parse API๋ก PDF ๋ฌธ์๋ฅผ HTML๋ก ๋ณํํฉ๋๋ค.\n"
|
95 |
"2. Solar LLM์ ํตํด ๋ฌธ์ ๊ธฐ๋ฐ ์ง๋ฌธ์ ๋ต๋ณํฉ๋๋ค.")
|
96 |
gr.Markdown("์์ ํ์ผ์ Files ๋ฒํผ์ ํด๋ฆญํ๋ฉด ํ์ธ ๋ฐ ๋ค์ด๋ก๋ ๊ฐ๋ฅํฉ๋๋ค.")
|
97 |
|
98 |
+
|
99 |
+
# ํ์ผ ์
๋ก๋ ๋ฐ ๋ฌธ์ ํ์ฑ ์์ญ
|
100 |
with gr.Row():
|
101 |
+
file_input = gr.File(label="๐ ์ฌ๋ฌด์ ํ ์
๋ก๋") # ํ์ผ ์
๋ก๋
|
102 |
+
parse_btn = gr.Button("๋ฌธ์ HTML ๋ณํ") # ํ์ฑ ๋ฒํผ
|
103 |
+
|
104 |
|
105 |
+
# HTML ์ถ๋ ฅ ๋ฐ ๋ณด๊ธฐ ํ ๊ธ
|
106 |
+
html_output = gr.Textbox(label="๐ ๋ฌธ์ ๋ด์ฉ", lines=10, visible=True, elem_id="scrollable-html") # ํ
์คํธ ํ์
|
107 |
+
html_display = gr.HTML(visible=False, elem_id="scrollable-html-display") # HTML ๋ ๋๋ง
|
108 |
+
toggle_html_btn = gr.Button("๐ HTML ๋ณด๊ธฐ ์ ํ") # ๋ณด๊ธฐ ์ ํ ๋ฒํผ
|
109 |
+
html_visible_state = gr.State(False) # ๋ณด๊ธฐ ์ํ ์ ์ฅ (๊ธฐ๋ณธ: ์ ๋ณด์)
|
110 |
|
111 |
+
# ๋ฌธ์ ๋ณํ ๋ฒํผ ํด๋ฆญ โ HTML ์ถ๋ ฅ
|
112 |
parse_btn.click(fn=parse_document, inputs=file_input, outputs=html_output)
|
113 |
+
|
114 |
+
# ๋ณด๊ธฐ ์ ํ ๋ฒํผ ํด๋ฆญ โ ๋ ์์ญ ํ ๊ธ
|
115 |
toggle_html_btn.click(
|
116 |
fn=toggle_html_view,
|
117 |
inputs=[html_output, html_visible_state],
|
118 |
outputs=[html_output, html_display, html_visible_state]
|
119 |
)
|
120 |
|
121 |
+
# ๋ฌธ์ ๊ธฐ๋ฐ ์ฑ๋ด ์ธํฐํ์ด์ค
|
122 |
+
chatbot = gr.Chatbot(label="๐ฌ ๋ฌธ์ ๊ธฐ๋ฐ Q&A", height=400) # ์ฑํ
์ฐฝ
|
123 |
+
user_question = gr.Textbox(label="โ ์ง๋ฌธ์ ์
๋ ฅํ์ธ์", lines=2) # ์ฌ์ฉ์ ์ง๋ฌธ
|
124 |
+
answer_btn = gr.Button("๋ต๋ณ ์์ฑ") # ๋ต๋ณ ๋ฒํผ
|
125 |
+
chat_state = gr.State([]) # ์ฑํ
์ํ ์ ์ฅ
|
126 |
|
127 |
+
# ์์ ์ง๋ฌธ ๋ฒํผ
|
128 |
with gr.Row():
|
129 |
gr.Markdown("๐ก ์์ ์ง๋ฌธ:")
|
130 |
ex1 = gr.Button("์ด๋ค ๊ธฐ์
์ ์ฌ๋ฌด์ ํ์ธ๊ฐ์?")
|
131 |
ex2 = gr.Button("3๋ถ๊ธฐ ์ด ์๋งค์ถ์ ์ผ๋ง์ธ๊ฐ์?")
|
132 |
|
133 |
+
# ์์ ์ง๋ฌธ 1
|
134 |
ex1.click(
|
135 |
fn=lambda: "์ด๋ค ๊ธฐ์
์ ์ฌ๋ฌด์ ํ์ธ๊ฐ์?",
|
136 |
inputs=[],
|
|
|
142 |
show_progress=True
|
143 |
)
|
144 |
|
145 |
+
# ์์ ์ง๋ฌธ 2
|
146 |
ex2.click(
|
147 |
fn=lambda: "1๋ถ๊ธฐ ์ด ์๋งค์ถ์ ์ผ๋ง์ธ๊ฐ์?",
|
148 |
inputs=[],
|
|
|
154 |
show_progress=True
|
155 |
)
|
156 |
|
157 |
+
# ์ฌ์ฉ์๊ฐ ์ง๋ฌธ ์
๋ ฅ ํ ๋ฒํผ ํด๋ฆญ ์ ์๋ต ์์ฑ
|
158 |
answer_btn.click(
|
159 |
fn=chat_with_document,
|
160 |
inputs=[chat_state, html_output, user_question],
|
|
|
162 |
show_progress=True
|
163 |
)
|
164 |
|
165 |
+
# ์คํฌ๋กค ๊ฐ๋ฅํ HTML ์์ญ ์คํ์ผ ์ถ๊ฐ
|
166 |
demo.css = """
|
167 |
#scrollable-html, #scrollable-html-display {
|
168 |
max-height: 400px;
|
|
|
172 |
}
|
173 |
"""
|
174 |
|
175 |
+
# ์ฑ ์คํ
|
176 |
+
if __name__ == "__main__":
|
177 |
+
demo.launch()
|