wanda222 commited on
Commit
ae11c94
ยท
verified ยท
1 Parent(s): 8f141f5

Add some descripsion

Browse files
Files changed (1) hide show
  1. app.py +70 -32
app.py CHANGED
@@ -1,35 +1,50 @@
1
- import gradio as gr
2
- import requests
3
- import os
4
- from openai import OpenAI
 
5
 
 
6
  UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")
7
 
8
  def parse_document(file):
9
- url = "https://api.upstage.ai/v1/document-ai/document-parse"
10
- headers = {'Authorization': f'Bearer {UPSTAGE_API_KEY}'}
11
- files = {"document": open(file.name, "rb")}
 
 
 
12
  data = {
13
- "base64_encoding": "['table']",
14
- "model": "document-parse"
15
  }
16
 
 
17
  response = requests.post(url, headers=headers, files=files, data=data)
 
 
18
  result = response.json()
19
  html_text = result.get("content", {}).get("html", "")
20
  return html_text
21
 
22
  def chat_with_document(history, html_text, user_question):
 
 
 
23
  if not html_text.strip():
 
24
  return history, history, "โš ๏ธ ๋จผ์ € ๋ฌธ์„œ๋ฅผ ๋ณ€ํ™˜ํ•ด์ฃผ์„ธ์š”."
25
 
 
26
  client = OpenAI(
27
  api_key=UPSTAGE_API_KEY,
28
  base_url="https://api.upstage.ai/v1"
29
  )
30
 
 
31
  history = history or []
32
 
 
33
  system_prompt = f"""The following is a financial statement document extracted in HTML format.
34
  Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
35
 
@@ -37,67 +52,85 @@ def chat_with_document(history, html_text, user_question):
37
  {html_text}
38
  """
39
 
 
40
  messages = [{"role": "system", "content": system_prompt}]
41
  for user, bot in history:
42
  messages.append({"role": "user", "content": user})
43
  messages.append({"role": "assistant", "content": bot})
44
  messages.append({"role": "user", "content": user_question})
45
 
 
46
  try:
47
  response = client.chat.completions.create(
48
- model="solar-pro",
49
- messages=messages,
50
- temperature=0,
51
- max_tokens=1024
52
  )
53
- bot_reply = response.choices[0].message.content
54
  except Exception as e:
55
- bot_reply = f"โš ๏ธ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
56
 
 
57
  history.append((user_question, bot_reply))
58
- return history, history, ""
 
 
59
 
60
  def toggle_html_view(current_html, is_visible):
 
 
 
61
  return (
62
- gr.update(value=current_html, visible=not is_visible),
63
- gr.update(value=current_html, visible=is_visible),
64
- not is_visible
65
  )
66
 
67
  with gr.Blocks() as demo:
68
-
 
69
  gr.Markdown("# ๐Ÿ“„ ์žฌ๋ฌด์ œํ‘œ ๋ถ„์„ ์ฑ—๋ด‡")
70
  gr.Markdown("1. Document Parse API๋กœ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.\n"
71
  "2. Solar LLM์„ ํ†ตํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
72
  gr.Markdown("์˜ˆ์ œ ํŒŒ์ผ์€ Files ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ํ™•์ธ ๋ฐ ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.")
73
 
 
 
74
  with gr.Row():
75
- file_input = gr.File(label="๐Ÿ“Ž ์žฌ๋ฌด์ œํ‘œ ์—…๋กœ๋“œ")
76
- parse_btn = gr.Button("๋ฌธ์„œ HTML ๋ณ€ํ™˜")
 
77
 
78
- html_output = gr.Textbox(label="๐Ÿ“˜ ๋ฌธ์„œ ๋‚ด์šฉ", lines=10, visible=True, elem_id="scrollable-html")
79
- html_display = gr.HTML(visible=False, elem_id="scrollable-html-display")
80
- toggle_html_btn = gr.Button("๐Ÿ” HTML ๋ณด๊ธฐ ์ „ํ™˜")
81
- html_visible_state = gr.State(False)
 
82
 
 
83
  parse_btn.click(fn=parse_document, inputs=file_input, outputs=html_output)
 
 
84
  toggle_html_btn.click(
85
  fn=toggle_html_view,
86
  inputs=[html_output, html_visible_state],
87
  outputs=[html_output, html_display, html_visible_state]
88
  )
89
 
90
- chatbot = gr.Chatbot(label="๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A", height=400)
91
- user_question = gr.Textbox(label="โ“ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", lines=2)
92
- answer_btn = gr.Button("๋‹ต๋ณ€ ์ƒ์„ฑ")
93
- chat_state = gr.State([])
 
94
 
 
95
  with gr.Row():
96
  gr.Markdown("๐Ÿ’ก ์˜ˆ์ œ ์งˆ๋ฌธ:")
97
  ex1 = gr.Button("์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?")
98
  ex2 = gr.Button("3๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")
99
 
100
- # ์˜ˆ์ œ ์งˆ๋ฌธ โ†’ ์งˆ๋ฌธ ์ž…๋ ฅ + ์ž๋™ ์‘๋‹ต
101
  ex1.click(
102
  fn=lambda: "์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?",
103
  inputs=[],
@@ -109,6 +142,7 @@ with gr.Blocks() as demo:
109
  show_progress=True
110
  )
111
 
 
112
  ex2.click(
113
  fn=lambda: "1๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?",
114
  inputs=[],
@@ -120,6 +154,7 @@ with gr.Blocks() as demo:
120
  show_progress=True
121
  )
122
 
 
123
  answer_btn.click(
124
  fn=chat_with_document,
125
  inputs=[chat_state, html_output, user_question],
@@ -127,6 +162,7 @@ with gr.Blocks() as demo:
127
  show_progress=True
128
  )
129
 
 
130
  demo.css = """
131
  #scrollable-html, #scrollable-html-display {
132
  max-height: 400px;
@@ -136,4 +172,6 @@ demo.css = """
136
  }
137
  """
138
 
139
- demo.launch()
 
 
 
1
+ # ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
2
+ import gradio as gr # Gradio: ์›น ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ์šฉ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ
3
+ import requests # HTTP ์š”์ฒญ ์ „์†ก์šฉ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ (API ํ˜ธ์ถœ์— ์‚ฌ์šฉ)
4
+ import os # ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๋ถˆ๋Ÿฌ์˜ค๊ธฐ ์œ„ํ•ด ์‚ฌ์šฉ
5
+ from openai import OpenAI # Upstage Solar LLM ํ˜ธ์ถœ์„ ์œ„ํ•œ OpenAI ํ˜ธํ™˜ ํด๋ผ์ด์–ธํŠธ
6
 
7
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
8
  UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")
9
 
10
  def parse_document(file):
11
+ """
12
+ ์—…๋กœ๋“œ๋œ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜ (Upstage Document Parse API ์‚ฌ์šฉ)
13
+ """
14
+ url = "https://api.upstage.ai/v1/document-ai/document-parse" # API URL
15
+ headers = {'Authorization': f'Bearer {UPSTAGE_API_KEY}'} # ์ธ์ฆ ํ—ค๋”
16
+ files = {"document": open(file.name, "rb")} # ํŒŒ์ผ ์—ด์–ด์„œ ๋ฐ”์ด๋„ˆ๋ฆฌ๋กœ ์ „๋‹ฌ
17
  data = {
18
+ "base64_encoding": "['table']", # ํ…Œ์ด๋ธ”์€ base64๋กœ ์ธ์ฝ”๋”ฉ ์š”์ฒญ
19
+ "model": "document-parse" # ์‚ฌ์šฉํ•  ๋ชจ๋ธ ์ง€์ •
20
  }
21
 
22
+ # POST ์š”์ฒญ์œผ๋กœ ๋ฌธ์„œ ๋ถ„์„ API ํ˜ธ์ถœ
23
  response = requests.post(url, headers=headers, files=files, data=data)
24
+
25
+ # ๊ฒฐ๊ณผ์—์„œ HTML ์ฝ˜ํ…์ธ  ์ถ”์ถœ
26
  result = response.json()
27
  html_text = result.get("content", {}).get("html", "")
28
  return html_text
29
 
30
  def chat_with_document(history, html_text, user_question):
31
+ """
32
+ ์ด์ „ ๋Œ€ํ™”๊ธฐ๋ก์„ ๋ฐ”ํƒ•์œผ๋กœ ์‚ฌ์šฉ์ž ์งˆ๋ฌธ์— ๋Œ€ํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๋Š” ๋ฉ€ํ‹ฐํ„ด ์ฑ—๋ด‡ ํ•จ์ˆ˜
33
+ """
34
  if not html_text.strip():
35
+ # ๋ฌธ์„œ๊ฐ€ ์—†์„ ๊ฒฝ์šฐ ๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€ ๋ฐ˜ํ™˜
36
  return history, history, "โš ๏ธ ๋จผ์ € ๋ฌธ์„œ๋ฅผ ๋ณ€ํ™˜ํ•ด์ฃผ์„ธ์š”."
37
 
38
+ # Clinet ํ˜ธ์ถœ
39
  client = OpenAI(
40
  api_key=UPSTAGE_API_KEY,
41
  base_url="https://api.upstage.ai/v1"
42
  )
43
 
44
+ # ์ด์ „ ์ฑ„ํŒ… ๊ธฐ๋ก์ด ์—†์œผ๋ฉด ๋นˆ ๋ฆฌ์ŠคํŠธ๋กœ ์ดˆ๊ธฐํ™”
45
  history = history or []
46
 
47
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
48
  system_prompt = f"""The following is a financial statement document extracted in HTML format.
49
  Please answer user questions accurately and concisely in Korean, based on the text within HTML tags.
50
 
 
52
  {html_text}
53
  """
54
 
55
+ # ๋Œ€ํ™” ๋‚ด์—ญ ๊ตฌ์„ฑ (์‹œ์Šคํ…œ โ†’ ์ด์ „ ์‚ฌ์šฉ์ž ๋ฐ ๋ด‡ ๋Œ€ํ™” โ†’ ํ˜„์žฌ ์งˆ๋ฌธ)
56
  messages = [{"role": "system", "content": system_prompt}]
57
  for user, bot in history:
58
  messages.append({"role": "user", "content": user})
59
  messages.append({"role": "assistant", "content": bot})
60
  messages.append({"role": "user", "content": user_question})
61
 
62
+ # Solar Pro API ํ˜ธ์ถœ
63
  try:
64
  response = client.chat.completions.create(
65
+ model="solar-pro", # ์‚ฌ์šฉํ•  Solar LLM ๋ชจ๋ธ
66
+ messages=messages, # ๊ตฌ์„ฑ๋œ ๋Œ€ํ™” ๋ฉ”์‹œ์ง€๋“ค
67
+ temperature=0, # ์ฐฝ์˜์„ฑ ์ตœ์†Œํ™” (์ •ํ™•ํ•œ ๋‹ต๋ณ€ ์œ ๋„)
68
+ max_tokens=1024 # ์ตœ๋Œ€ ์‘๋‹ต ๊ธธ์ด
69
  )
70
+ bot_reply = response.choices[0].message.content # ์‘๋‹ต ํ…์ŠคํŠธ ์ถ”์ถœ
71
  except Exception as e:
72
+ bot_reply = f"โš ๏ธ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}" # ์˜ˆ์™ธ ์ฒ˜๋ฆฌ
73
 
74
+ # ์ฑ„ํŒ… ๊ธฐ๋ก์— ์งˆ๋ฌธ/๋‹ต๋ณ€ ์ถ”๊ฐ€
75
  history.append((user_question, bot_reply))
76
+ return history, history, "" # ์ฑ„ํŒ… ๊ธฐ๋ก, ์ƒํƒœ, ์งˆ๋ฌธ ์ž…๋ ฅ์นธ ์ดˆ๊ธฐํ™”
77
+
78
+
79
 
80
  def toggle_html_view(current_html, is_visible):
81
+ """
82
+ HTML ๋ณด๊ธฐ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ๋ณด์ด๊ธฐ/์ˆจ๊ธฐ๊ธฐ ํ† ๊ธ€
83
+ """
84
  return (
85
+ gr.update(value=current_html, visible=not is_visible), # Textbox ํ† ๊ธ€
86
+ gr.update(value=current_html, visible=is_visible), # HTML ๋ทฐ ํ† ๊ธ€
87
+ not is_visible # ์ƒํƒœ ๋ฐ˜์ „
88
  )
89
 
90
  with gr.Blocks() as demo:
91
+
92
+ # ์ƒ๋‹จ ์ œ๋ชฉ ๋ฐ ์„ค๋ช… ํ‘œ์‹œ
93
  gr.Markdown("# ๐Ÿ“„ ์žฌ๋ฌด์ œํ‘œ ๋ถ„์„ ์ฑ—๋ด‡")
94
  gr.Markdown("1. Document Parse API๋กœ PDF ๋ฌธ์„œ๋ฅผ HTML๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.\n"
95
  "2. Solar LLM์„ ํ†ตํ•ด ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค.")
96
  gr.Markdown("์˜ˆ์ œ ํŒŒ์ผ์€ Files ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ํ™•์ธ ๋ฐ ๋‹ค์šด๋กœ๋“œ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.")
97
 
98
+
99
+ # ํŒŒ์ผ ์—…๋กœ๋“œ ๋ฐ ๋ฌธ์„œ ํŒŒ์‹ฑ ์˜์—ญ
100
  with gr.Row():
101
+ file_input = gr.File(label="๐Ÿ“Ž ์žฌ๋ฌด์ œํ‘œ ์—…๋กœ๋“œ") # ํŒŒ์ผ ์—…๋กœ๋“œ
102
+ parse_btn = gr.Button("๋ฌธ์„œ HTML ๋ณ€ํ™˜") # ํŒŒ์‹ฑ ๋ฒ„ํŠผ
103
+
104
 
105
+ # HTML ์ถœ๋ ฅ ๋ฐ ๋ณด๊ธฐ ํ† ๊ธ€
106
+ html_output = gr.Textbox(label="๐Ÿ“˜ ๋ฌธ์„œ ๋‚ด์šฉ", lines=10, visible=True, elem_id="scrollable-html") # ํ…์ŠคํŠธ ํ˜•์‹
107
+ html_display = gr.HTML(visible=False, elem_id="scrollable-html-display") # HTML ๋ Œ๋”๋ง
108
+ toggle_html_btn = gr.Button("๐Ÿ” HTML ๋ณด๊ธฐ ์ „ํ™˜") # ๋ณด๊ธฐ ์ „ํ™˜ ๋ฒ„ํŠผ
109
+ html_visible_state = gr.State(False) # ๋ณด๊ธฐ ์ƒํƒœ ์ €์žฅ (๊ธฐ๋ณธ: ์•ˆ ๋ณด์ž„)
110
 
111
+ # ๋ฌธ์„œ ๋ณ€ํ™˜ ๋ฒ„ํŠผ ํด๋ฆญ โ†’ HTML ์ถœ๋ ฅ
112
  parse_btn.click(fn=parse_document, inputs=file_input, outputs=html_output)
113
+
114
+ # ๋ณด๊ธฐ ์ „ํ™˜ ๋ฒ„ํŠผ ํด๋ฆญ โ†’ ๋‘ ์˜์—ญ ํ† ๊ธ€
115
  toggle_html_btn.click(
116
  fn=toggle_html_view,
117
  inputs=[html_output, html_visible_state],
118
  outputs=[html_output, html_display, html_visible_state]
119
  )
120
 
121
+ # ๋ฌธ์„œ ๊ธฐ๋ฐ˜ ์ฑ—๋ด‡ ์ธํ„ฐํŽ˜์ด์Šค
122
+ chatbot = gr.Chatbot(label="๐Ÿ’ฌ ๋ฌธ์„œ ๊ธฐ๋ฐ˜ Q&A", height=400) # ์ฑ„ํŒ…์ฐฝ
123
+ user_question = gr.Textbox(label="โ“ ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•˜์„ธ์š”", lines=2) # ์‚ฌ์šฉ์ž ์งˆ๋ฌธ
124
+ answer_btn = gr.Button("๋‹ต๋ณ€ ์ƒ์„ฑ") # ๋‹ต๋ณ€ ๋ฒ„ํŠผ
125
+ chat_state = gr.State([]) # ์ฑ„ํŒ… ์ƒํƒœ ์ €์žฅ
126
 
127
+ # ์˜ˆ์ œ ์งˆ๋ฌธ ๋ฒ„ํŠผ
128
  with gr.Row():
129
  gr.Markdown("๐Ÿ’ก ์˜ˆ์ œ ์งˆ๋ฌธ:")
130
  ex1 = gr.Button("์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?")
131
  ex2 = gr.Button("3๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?")
132
 
133
+ # ์˜ˆ์ œ ์งˆ๋ฌธ 1
134
  ex1.click(
135
  fn=lambda: "์–ด๋–ค ๊ธฐ์—…์˜ ์žฌ๋ฌด์ œํ‘œ์ธ๊ฐ€์š”?",
136
  inputs=[],
 
142
  show_progress=True
143
  )
144
 
145
+ # ์˜ˆ์ œ ์งˆ๋ฌธ 2
146
  ex2.click(
147
  fn=lambda: "1๋ถ„๊ธฐ ์ด ์ˆœ๋งค์ถœ์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?",
148
  inputs=[],
 
154
  show_progress=True
155
  )
156
 
157
+ # ์‚ฌ์šฉ์ž๊ฐ€ ์งˆ๋ฌธ ์ž…๋ ฅ ํ›„ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์‘๋‹ต ์ƒ์„ฑ
158
  answer_btn.click(
159
  fn=chat_with_document,
160
  inputs=[chat_state, html_output, user_question],
 
162
  show_progress=True
163
  )
164
 
165
+ # ์Šคํฌ๋กค ๊ฐ€๋Šฅํ•œ HTML ์˜์—ญ ์Šคํƒ€์ผ ์ถ”๊ฐ€
166
  demo.css = """
167
  #scrollable-html, #scrollable-html-display {
168
  max-height: 400px;
 
172
  }
173
  """
174
 
175
+ # ์•ฑ ์‹คํ–‰
176
+ if __name__ == "__main__":
177
+ demo.launch()