Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import openai
|
3 |
+
import os
|
4 |
+
import requests
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
+
|
7 |
+
# OpenAI API ํด๋ผ์ด์ธํธ ์ค์
|
8 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
9 |
+
if not openai.api_key:
|
10 |
+
raise ValueError("OpenAI API ํ ํฐ(OPENAI_API_KEY)์ด ์ค์ ๋์ง ์์์ต๋๋ค.")
|
11 |
+
|
12 |
+
#############################
|
13 |
+
# OpenAI API ํธ์ถ ํจ์
|
14 |
+
#############################
|
15 |
+
def call_openai_api(content: str, system_message: str, max_tokens: int, temperature: float, top_p: float) -> str:
|
16 |
+
try:
|
17 |
+
response = openai.ChatCompletion.create(
|
18 |
+
model="gpt-4o-mini",
|
19 |
+
messages=[
|
20 |
+
{"role": "system", "content": system_message},
|
21 |
+
{"role": "user", "content": content},
|
22 |
+
],
|
23 |
+
max_tokens=max_tokens,
|
24 |
+
temperature=temperature,
|
25 |
+
top_p=top_p,
|
26 |
+
)
|
27 |
+
return response.choices[0].message['content']
|
28 |
+
except Exception as e:
|
29 |
+
return f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"
|
30 |
+
|
31 |
+
#############################
|
32 |
+
# OpenAI ์ค์
|
33 |
+
#############################
|
34 |
+
OPENAI_SYSTEM_MESSAGE = """๋ฐ๋์ ํ๊ธ๋ก ๋ต๋ณํ ๊ฒ.
|
35 |
+
๋๋ ์ต๊ณ ์ ๋น์์ด๋ค.
|
36 |
+
๋ด๊ฐ ์๊ตฌํ๋ ๊ฒ๋ค์ ์ต๋ํ ์์ธํ๊ณ ์ ํํ๊ฒ ๋ต๋ณํ๋ผ.
|
37 |
+
##[๊ธฐ๋ณธ๊ท์น]
|
38 |
+
1. ๋ฐ๋์ ํ๊ตญ์ด(ํ๊ธ)๋ก ์์ฑํ๋ผ.
|
39 |
+
2. ๋๋ ๊ฐ์ฅ ์ฃผ๋ชฉ๋ฐ๋ ๋ง์ผํฐ์ด๋ฉฐ ๋ธ๋ก๊ทธ ๋ง์ผํ
์ ๋ฌธ๊ฐ์ด๋ค.
|
40 |
+
3. ํนํ ๋๋ '์ ๋ณด์ฑ(Informative)' ์ ๋ฌธ ๋ธ๋ก๊ทธ ๋ง์ผํ
์ ๋ฌธ๊ฐ์ด๋ค.
|
41 |
+
4. ์ ๋ณด ์ ๊ณต์ ์ด์ ์ ๋ง์ถ์ด ์์ฑํ๋ค.
|
42 |
+
##[ํ
์คํธ ์์ฑ ๊ท์น]
|
43 |
+
1. ์์ฃผ์ ๋ฅผ 5๊ฐ๋ก ๊ตฌ๋ถํ์ฌ 2000์ ์ด์๋๋๋ก ์์ฑํ๋ผ.
|
44 |
+
2. ์ ์ฒด ๋งฅ๋ฝ์ ์ดํดํ๊ณ ๋ฌธ์ฅ์ ์ผ๊ด์ฑ์ ์ ์งํ๋ผ.
|
45 |
+
3. ์ ๋๋ก ์ฐธ๊ณ ๊ธ์ ํ๋ฌธ์ฅ ์ด์ ๊ทธ๋๋ก ์ถ๋ ฅํ์ง ๋ง ๊ฒ.
|
46 |
+
4. ์ฃผ์ ์ ์ํฉ์ ๋ง๋ ์ ์ ํ ์ดํ๋ฅผ ์ ํํ๋ผ.
|
47 |
+
5. ํ๊ธ ์ดํ์ ๋์ด๋๋ ์ฝ๊ฒ ์์ฑํ๋ผ.
|
48 |
+
6. ์ ๋ ๋ฌธ์ฅ์ ๋์ '๋ต๋๋ค'๋ฅผ ์ฌ์ฉํ์ง ๋ง ๊ฒ.
|
49 |
+
###[์ ๋ณด์ฑ ๋ธ๋ก๊ทธ ์์ฑ ๊ท์น]
|
50 |
+
1. ๋
์๊ฐ ์ป๊ณ ์ ํ๋ ์ ์ฉํ ์ ๋ณด์ ํฅ๋ฏธ๋ก์ด ์ ๋ณด๋ฅผ ์ ๊ณตํ๋๋ก ์์ฑํ๋ผ.
|
51 |
+
2. ๋
์์ ๊ณต๊ฐ์ ์ด๋์ด๋ด๊ณ ๊ถ๊ธ์ฆ์ ํด๊ฒฐํ๋๋ก ์์ฑํ๋ผ.
|
52 |
+
3. ๋
์์ ๊ด์ฌ์ฌ๋ฅผ ์ถฉ์กฑ์ํค๋๋ก ์์ฑํ๋ผ.
|
53 |
+
4. ๋
์์๊ฒ ์ด๋์ด ๋๋ ์ ๋ณด๋ฅผ ์์ฑํ๋ผ.
|
54 |
+
##[์ ์ธ ๊ท์น]
|
55 |
+
1. ๋ฐ๋์ ๋น์์ด ๋ฐ ์์ค(expletive, abusive language, slang)์ ์ ์ธํ๋ผ.
|
56 |
+
2. ๋ฐ๋์ ์ฐธ๊ณ ๊ธ์ ๋งํฌ(URL)๋ ์ ์ธํ๋ผ.
|
57 |
+
3. ์ฐธ๊ณ ๊ธ์์ '๋งํฌ๋ฅผ ํ์ธํด์ฃผ์ธ์'์ ๊ฐ์ ๋งํฌ ์ด๋์ ๋ฌธ๊ตฌ๋ ์ ์ธํ๋ผ.
|
58 |
+
4. ์ฐธ๊ณ ๊ธ์ ์๋ ์์ฑ์, ํ์, ์ ํ๋ฒ, ๊ธฐ์์ ์ด๋ฆ, ์ ์นญ, ๋๋ค์์ ๋ฐ๋์ ์ ์ธํ๋ผ.
|
59 |
+
5. ๋ฐ๋์ ๋ฌธ์ฅ์ ๋๋ถ๋ถ์ด ์ด์ํ ํ๊ตญ์ด ํํ์ ์ ์ธํ๋ผ('์์', '๋ต๋๋ค', 'ํด์', 'ํด์ฃผ์ฃ ', '๋์ฃ ', '๋์ด์', '๊ณ ์' ๋ฑ.)
|
60 |
+
"""
|
61 |
+
OPENAI_MAX_TOKENS = 4000
|
62 |
+
OPENAI_TEMPERATURE = 0.7
|
63 |
+
OPENAI_TOP_P = 0.95
|
64 |
+
|
65 |
+
#############################
|
66 |
+
# ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ
|
67 |
+
#############################
|
68 |
+
def convert_to_mobile_url(url):
|
69 |
+
if "m.blog.naver.com" not in url:
|
70 |
+
if "blog.naver.com" in url:
|
71 |
+
url_parts = url.split("/")
|
72 |
+
if len(url_parts) >= 5:
|
73 |
+
user_id = url_parts[3]
|
74 |
+
post_id = url_parts[4]
|
75 |
+
return f"https://m.blog.naver.com/{user_id}/{post_id}"
|
76 |
+
return url
|
77 |
+
|
78 |
+
def scrape_naver_blog(url):
|
79 |
+
try:
|
80 |
+
mobile_url = convert_to_mobile_url(url)
|
81 |
+
response = requests.get(mobile_url)
|
82 |
+
response.raise_for_status()
|
83 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
84 |
+
title_element = soup.find("div", class_="se-module se-module-text se-title-text")
|
85 |
+
title = title_element.get_text(strip=True) if title_element else "์ ๋ชฉ์ ์ฐพ์ ์ ์์"
|
86 |
+
content_elements = soup.find_all("div", class_="se-module se-module-text")
|
87 |
+
content = "\n".join(
|
88 |
+
elem.get_text(strip=True) for elem in content_elements
|
89 |
+
) if content_elements else "๋ด์ฉ์ ์ฐพ์ ์ ์์"
|
90 |
+
return f"์ ๋ชฉ: {title}\n\n๋ด์ฉ: {content}"
|
91 |
+
except Exception as e:
|
92 |
+
return f"Error: {e}"
|
93 |
+
|
94 |
+
#############################
|
95 |
+
# Gradio UI ๊ตฌ์ฑ (ํญ ๊ตฌ์กฐ ์ ์ฉ)
|
96 |
+
#############################
|
97 |
+
with gr.Blocks() as demo:
|
98 |
+
gr.Markdown("# ๋ค๊ธฐ๋ฅ ๋๊ตฌ")
|
99 |
+
with gr.Tabs():
|
100 |
+
with gr.Tab("๋ธ๋ก๊ทธ ์์ฑ๊ธฐ"):
|
101 |
+
tone_radio = gr.Radio(
|
102 |
+
label="๋งํฌ๋ฐ๊พธ๊ธฐ",
|
103 |
+
choices=["์น๊ทผํ๊ฒ", "์ผ๋ฐ์ ์ธ", "์ ๋ฌธ์ ์ธ"],
|
104 |
+
value="์ผ๋ฐ์ ์ธ"
|
105 |
+
)
|
106 |
+
ref1 = gr.Textbox(label="์ฐธ์กฐ๊ธ 1")
|
107 |
+
ref2 = gr.Textbox(label="์ฐธ์กฐ๊ธ 2")
|
108 |
+
ref3 = gr.Textbox(label="์ฐธ์กฐ๊ธ 3")
|
109 |
+
output_box = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20, interactive=False)
|
110 |
+
generate_button = gr.Button("์์ฑํ๊ธฐ")
|
111 |
+
generate_button.click(
|
112 |
+
fn=lambda t, r1, r2, r3: call_openai_api(
|
113 |
+
f"๋งํฌ: {t}\n์ฐธ์กฐ๊ธ1: {r1}\n์ฐธ์กฐ๊ธ2: {r2}\n์ฐธ์กฐ๊ธ3: {r3}\n",
|
114 |
+
OPENAI_SYSTEM_MESSAGE, OPENAI_MAX_TOKENS, OPENAI_TEMPERATURE, OPENAI_TOP_P
|
115 |
+
),
|
116 |
+
inputs=[tone_radio, ref1, ref2, ref3],
|
117 |
+
outputs=output_box
|
118 |
+
)
|
119 |
+
|
120 |
+
with gr.Tab("๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ์คํฌ๋ํ"):
|
121 |
+
blog_url = gr.Textbox(label="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ URL")
|
122 |
+
scrape_output = gr.Textbox(label="์คํฌ๋ํ ๊ฒฐ๊ณผ", lines=10, interactive=False)
|
123 |
+
scrape_button = gr.Button("์คํฌ๋ํํ๊ธฐ")
|
124 |
+
scrape_button.click(
|
125 |
+
fn=scrape_naver_blog,
|
126 |
+
inputs=blog_url,
|
127 |
+
outputs=scrape_output
|
128 |
+
)
|
129 |
+
|
130 |
+
if __name__ == "__main__":
|
131 |
+
demo.launch()
|