File size: 5,729 Bytes
1fbeb24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
import openai
import os
import requests
from bs4 import BeautifulSoup

# OpenAI API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
openai.api_key = os.getenv("OPENAI_API_KEY")
if not openai.api_key:
    raise ValueError("OpenAI API ํ† ํฐ(OPENAI_API_KEY)์ด ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")

#############################
# OpenAI API ํ˜ธ์ถœ ํ•จ์ˆ˜
#############################
def call_openai_api(content: str, system_message: str, max_tokens: int, temperature: float, top_p: float) -> str:
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": content},
            ],
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
        )
        return response.choices[0].message['content']
    except Exception as e:
        return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"

#############################
# OpenAI ์„ค์ •
#############################
OPENAI_SYSTEM_MESSAGE = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€๋กœ ๋‹ต๋ณ€ํ•  ๊ฒƒ.
๋„ˆ๋Š” ์ตœ๊ณ ์˜ ๋น„์„œ์ด๋‹ค.
๋‚ด๊ฐ€ ์š”๊ตฌํ•˜๋Š” ๊ฒƒ๋“ค์„ ์ตœ๋Œ€ํ•œ ์ž์„ธํ•˜๊ณ  ์ •ํ™•ํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•˜๋ผ.
##[๊ธฐ๋ณธ๊ทœ์น™]
1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ตญ์–ด(ํ•œ๊ธ€)๋กœ ์ž‘์„ฑํ•˜๋ผ.
2. ๋„ˆ๋Š” ๊ฐ€์žฅ ์ฃผ๋ชฉ๋ฐ›๋Š” ๋งˆ์ผ€ํ„ฐ์ด๋ฉฐ ๋ธ”๋กœ๊ทธ ๋งˆ์ผ€ํŒ… ์ „๋ฌธ๊ฐ€์ด๋‹ค.
3. ํŠนํžˆ ๋„ˆ๋Š” '์ •๋ณด์„ฑ(Informative)' ์ „๋ฌธ ๋ธ”๋กœ๊ทธ ๋งˆ์ผ€ํŒ… ์ „๋ฌธ๊ฐ€์ด๋‹ค.
4. ์ •๋ณด ์ œ๊ณต์— ์ดˆ์ ์„ ๋งž์ถ”์–ด ์ž‘์„ฑํ•œ๋‹ค.
##[ํ…์ŠคํŠธ ์ž‘์„ฑ ๊ทœ์น™]
1. ์†Œ์ฃผ์ œ๋ฅผ 5๊ฐœ๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ 2000์ž ์ด์ƒ๋˜๋„๋ก ์ž‘์„ฑํ•˜๋ผ.
2. ์ „์ฒด ๋งฅ๋ฝ์„ ์ดํ•ดํ•˜๊ณ  ๋ฌธ์žฅ์˜ ์ผ๊ด€์„ฑ์„ ์œ ์ง€ํ•˜๋ผ.
3. ์ ˆ๋Œ€๋กœ ์ฐธ๊ณ ๊ธ€์„ ํ•œ๋ฌธ์žฅ ์ด์ƒ ๊ทธ๋Œ€๋กœ ์ถœ๋ ฅํ•˜์ง€ ๋ง ๊ฒƒ.
4. ์ฃผ์ œ์™€ ์ƒํ™ฉ์— ๋งž๋Š” ์ ์ ˆํ•œ ์–ดํœ˜๋ฅผ ์„ ํƒํ•˜๋ผ.
5. ํ•œ๊ธ€ ์–ดํœ˜์˜ ๋‚œ์ด๋„๋Š” ์‰ฝ๊ฒŒ ์ž‘์„ฑํ•˜๋ผ.
6. ์ ˆ๋Œ€ ๋ฌธ์žฅ์˜ ๋์— '๋‹ต๋‹ˆ๋‹ค'๋ฅผ ์‚ฌ์šฉํ•˜์ง€ ๋ง ๊ฒƒ.
###[์ •๋ณด์„ฑ ๋ธ”๋กœ๊ทธ ์ž‘์„ฑ ๊ทœ์น™]
1. ๋…์ž๊ฐ€ ์–ป๊ณ ์ž ํ•˜๋Š” ์œ ์šฉํ•œ ์ •๋ณด์™€ ํฅ๋ฏธ๋กœ์šด ์ •๋ณด๋ฅผ ์ œ๊ณตํ•˜๋„๋ก ์ž‘์„ฑํ•˜๋ผ.
2. ๋…์ž์˜ ๊ณต๊ฐ์„ ์ด๋Œ์–ด๋‚ด๊ณ  ๊ถ๊ธˆ์ฆ์„ ํ•ด๊ฒฐํ•˜๋„๋ก ์ž‘์„ฑํ•˜๋ผ.
3. ๋…์ž์˜ ๊ด€์‹ฌ์‚ฌ๋ฅผ ์ถฉ์กฑ์‹œํ‚ค๋„๋ก ์ž‘์„ฑํ•˜๋ผ.
4. ๋…์ž์—๊ฒŒ ์ด๋“์ด ๋˜๋Š” ์ •๋ณด๋ฅผ ์ž‘์„ฑํ•˜๋ผ.
##[์ œ์™ธ ๊ทœ์น™]
1. ๋ฐ˜๋“œ์‹œ ๋น„์†์–ด ๋ฐ ์š•์„ค(expletive, abusive language, slang)์€ ์ œ์™ธํ•˜๋ผ.
2. ๋ฐ˜๋“œ์‹œ ์ฐธ๊ณ ๊ธ€์˜ ๋งํฌ(URL)๋Š” ์ œ์™ธํ•˜๋ผ.
3. ์ฐธ๊ณ ๊ธ€์—์„œ '๋งํฌ๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”'์™€ ๊ฐ™์€ ๋งํฌ ์ด๋™์˜ ๋ฌธ๊ตฌ๋Š” ์ œ์™ธํ•˜๋ผ.
4. ์ฐธ๊ณ ๊ธ€์— ์žˆ๋Š” ์ž‘์„ฑ์ž, ํ™”์ž, ์œ ํŠœ๋ฒ„, ๊ธฐ์ž์˜ ์ด๋ฆ„, ์• ์นญ, ๋‹‰๋„ค์ž„์€ ๋ฐ˜๋“œ์‹œ ์ œ์™ธํ•˜๋ผ.
5. ๋ฐ˜๋“œ์‹œ ๋ฌธ์žฅ์˜ ๋๋ถ€๋ถ„์ด ์–ด์ƒ‰ํ•œ ํ•œ๊ตญ์–ด ํ‘œํ˜„์€ ์ œ์™ธํ•˜๋ผ('์˜ˆ์š”', '๋‹ต๋‹ˆ๋‹ค', 'ํ•ด์š”', 'ํ•ด์ฃผ์ฃ ', '๋์ฃ ', '๋์–ด์š”', '๊ณ ์š”' ๋“ฑ.)
"""
OPENAI_MAX_TOKENS = 4000
OPENAI_TEMPERATURE = 0.7
OPENAI_TOP_P = 0.95

#############################
# ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘
#############################
def convert_to_mobile_url(url):
    if "m.blog.naver.com" not in url:
        if "blog.naver.com" in url:
            url_parts = url.split("/")
            if len(url_parts) >= 5:
                user_id = url_parts[3]
                post_id = url_parts[4]
                return f"https://m.blog.naver.com/{user_id}/{post_id}"
    return url

def scrape_naver_blog(url):
    try:
        mobile_url = convert_to_mobile_url(url)
        response = requests.get(mobile_url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        title_element = soup.find("div", class_="se-module se-module-text se-title-text")
        title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
        content_elements = soup.find_all("div", class_="se-module se-module-text")
        content = "\n".join(
            elem.get_text(strip=True) for elem in content_elements
        ) if content_elements else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
        return f"์ œ๋ชฉ: {title}\n\n๋‚ด์šฉ: {content}"
    except Exception as e:
        return f"Error: {e}"

#############################
# Gradio UI ๊ตฌ์„ฑ (ํƒญ ๊ตฌ์กฐ ์ ์šฉ)
#############################
with gr.Blocks() as demo:
    gr.Markdown("# ๋‹ค๊ธฐ๋Šฅ ๋„๊ตฌ")
    with gr.Tabs():
        with gr.Tab("๋ธ”๋กœ๊ทธ ์ƒ์„ฑ๊ธฐ"):
            tone_radio = gr.Radio(
                label="๋งํˆฌ๋ฐ”๊พธ๊ธฐ",
                choices=["์นœ๊ทผํ•˜๊ฒŒ", "์ผ๋ฐ˜์ ์ธ", "์ „๋ฌธ์ ์ธ"],
                value="์ผ๋ฐ˜์ ์ธ"
            )
            ref1 = gr.Textbox(label="์ฐธ์กฐ๊ธ€ 1")
            ref2 = gr.Textbox(label="์ฐธ์กฐ๊ธ€ 2")
            ref3 = gr.Textbox(label="์ฐธ์กฐ๊ธ€ 3")
            output_box = gr.Textbox(label="๊ฒฐ๊ณผ", lines=20, interactive=False)
            generate_button = gr.Button("์ƒ์„ฑํ•˜๊ธฐ")
            generate_button.click(
                fn=lambda t, r1, r2, r3: call_openai_api(
                    f"๋งํˆฌ: {t}\n์ฐธ์กฐ๊ธ€1: {r1}\n์ฐธ์กฐ๊ธ€2: {r2}\n์ฐธ์กฐ๊ธ€3: {r3}\n",
                    OPENAI_SYSTEM_MESSAGE, OPENAI_MAX_TOKENS, OPENAI_TEMPERATURE, OPENAI_TOP_P
                ),
                inputs=[tone_radio, ref1, ref2, ref3],
                outputs=output_box
            )
        
        with gr.Tab("๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘"):
            blog_url = gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL")
            scrape_output = gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ", lines=10, interactive=False)
            scrape_button = gr.Button("์Šคํฌ๋ž˜ํ•‘ํ•˜๊ธฐ")
            scrape_button.click(
                fn=scrape_naver_blog,
                inputs=blog_url,
                outputs=scrape_output
            )

if __name__ == "__main__":
    demo.launch()