kijeoung's picture
Update app.py
67764eb verified
import os
import pandas as pd
import gradio as gr
import openai
from datetime import datetime
# OpenAI API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
openai.api_key = os.getenv("OPENAI_API_KEY")
# LLM ํ˜ธ์ถœ ํ•จ์ˆ˜
def call_api(content, system_message, max_tokens=2000, temperature=0.7, top_p=0.9):
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": content},
],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
return response.choices[0].message['content']
# ์—‘์…€ ๋ฐ์ดํ„ฐ ์ฝ๊ธฐ ํ•จ์ˆ˜
def read_excel_data(file):
df = pd.read_excel(file, usecols="A, B, C, D, E", skiprows=1,
names=["ID", "Review Date", "Option", "Review", "ReviewScore"], engine='openpyxl')
df['Review Date'] = pd.to_datetime(df['Review Date']).dt.tz_localize(None).dt.date
df['Year'] = df['Review Date'].astype(str).str.slice(0, 4)
df['Option1'] = df['Option'].astype(str).str.split(" / ").str[0]
df['Review Length'] = df['Review'].str.len()
return df
# ๊ธ์ •์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ˜ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜
def get_positive_reviews(df):
positive_reviews = df[df['ReviewScore'] >= 4].sort_values(by='Review Length', ascending=False)
positive_reviews = positive_reviews.head(20)
positive_reviews.reset_index(drop=True, inplace=True)
positive_reviews.index += 1
positive_reviews['์ˆœ๋ฒˆ'] = positive_reviews.index
positive_output = "\n\n".join(positive_reviews.apply(
lambda x: f"{x['์ˆœ๋ฒˆ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1))
system_message = """[์ค‘์š” ๊ทœ์น™]
1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€(ํ•œ๊ตญ์–ด)๋กœ ์ถœ๋ ฅํ•˜๋ผ.
2. ๋„ˆ๋Š” ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ํ•˜๋Š” ๋น…๋ฐ์ดํ„ฐ ๋ถ„์„๊ฐ€์ด๋‹ค.
3. ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๊ธ์ •์ ์ธ ์˜๊ฒฌ์˜ ๋ฐ์ดํ„ฐ๋งŒ ๋ถ„์„ํ•˜๋ผ.
4. ๋ฐ˜๋“œ์‹œ ์ œ๊ณต๋œ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์—์„œ๋งŒ ๋ถ„์„ํ•˜๋ผ.
5. ๋„ˆ์˜ ์ƒ๊ฐ์„ ํฌํ•จํ•˜์ง€ ๋ง ๊ฒƒ.
[๋ถ„์„ ์กฐ๊ฑด]
1. ์ด 20๊ฐœ์˜ ๋ฆฌ๋ทฐ๋ฐ์ดํ„ฐ๋ฅผ ์ œ๊ณตํ•œ๋‹ค.
2. ๊ฐ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์˜ ๋‘˜์งธ์ค„ ๋ถ€ํ„ฐ์˜ ์‹ค์ œ ๊ณ ๊ฐ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ˜์˜ํ•˜๋ผ.
3. ๋ฐ˜๋“œ์‹œ ๊ธ์ •์ ์ธ ์˜๊ฒฌ๋งŒ์„ ๋ถ„์„ํ•˜๋ผ. ๋ถ€์ •์ ์ธ ์˜๊ฒฌ์€ ์ œ์™ธํ•˜๋ผ.
4. ๊ธฐ๋Šฅ๊ณผ ์„ฑ๋Šฅ์˜ ๋ถ€๋ถ„, ๊ฐ์„ฑ์ ์ธ ๋ถ€๋ถ„, ์‹ค์ œ ์‚ฌ์šฉ ์ธก๋ฉด์˜ ๋ถ€๋ถ„, ๋ฐฐ์†ก์˜ ๋ถ€๋ถ„, ํƒ€๊ฒŸ๋ณ„ ๋ถ€๋ถ„์˜ ๊ด€์ ์œผ๋กœ ๋ถ„์„ํ•˜๋ผ.
5. 4๋ฒˆ์˜ ์กฐ๊ฑด์— ํฌํ•จ๋˜์ง€ ์•Š๋Š” ๊ธ์ •์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ณ„๋„๋กœ ์ถœ๋ ฅํ•˜๋ผ.
6. ๋งˆ์ผ€ํŒ…์ ์ธ ์š”์†Œ๋กœ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ๊ณ ๊ฐ์˜ ์‹ค์ œ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ˜์˜ํ•˜๋ผ.
[์ถœ๋ ฅ ํ˜•ํƒœ ์กฐ๊ฑด]
1. ๊ฐ๊ฐ์˜ ์ œ๋ชฉ ์•ž์— '๐Ÿ“'์ด๋ชจ์ง€๋ฅผ ์ถœ๋ ฅํ•˜๋ผ,'#', '##'์€ ์ถœ๋ ฅํ•˜์ง€ ๋ง๊ฒƒ.
2. ๊ฐ€์žฅ ๋งˆ์ง€๋ง‰์— ์ข…ํ•ฉ ์˜๊ฒฌ์„ ์ž‘์„ฑํ•˜๋ผ, "๐Ÿ†์ข…ํ•ฉ์˜๊ฒฌ"์˜ ์ œ๋ชฉํ˜•ํƒœ๋ฅผ ์‚ฌ์šฉํ•˜๋ผ.
[์ข…ํ•ฉ์˜๊ฒฌ์˜ ์ถœ๋ ฅ ์กฐ๊ฑด ์‹œ์ž‘]
('์ข…ํ•ฉ์˜๊ฒฌ'์ด ์•„๋‹Œ ๋‹ค๋ฅธ ๋ถ€๋ถ„์— ์ด ์ถœ๋ ฅ ์กฐ๊ฑด์„ ๋ฐ˜์˜ํ•˜์ง€ ๋ง ๊ฒƒ.
- ํ•ญ๋ชฉ๋ณ„ ์ œ๋ชฉ์„ ์ œ์™ธํ•˜๋ผ.
- ์ข…ํ•ฉ์˜๊ฒฌ์—๋Š” ํ•ญ๋ชฉ๋ณ„ ์ œ๋ชฉ์„ ์ œ์™ธํ•˜๊ณ  ์„œ์ˆ ์‹ ๋ฌธ์žฅ์œผ๋กœ ์ž‘์„ฑํ•˜๋ผ.
- ๋งค์ถœ์„ ๊ทน๋Œ€ํ™” ํ•  ์ˆ˜ ์žˆ๋Š” ๊ณ ๊ฐ์˜ ์‹ค์ œ ๋ฆฌ๋ทฐ ํฌ์ธํŠธ๋ฅผ ์ œ์‹œํ•˜๋ผ.
[SWOT๋ถ„์„ ์กฐ๊ฑด]
1. '์ข…ํ•ฉ์˜๊ฒฌ' ๋‹ค์Œ ๋‚ด์šฉ์œผ๋กœ SWOT๋ถ„์„ ์˜๊ฒฌ์„ ์ถœ๋ ฅํ•˜๋ผ.
2. SWOT๋ถ„์„ ์ค‘ '๊ฐ•์ '์˜๊ฒฌ๊ณผ '๊ธฐํšŒ'์˜ ์˜๊ฒฌ์„ ์ถœ๋ ฅํ•˜๋ผ.
3. ๋ฐ˜๋“œ์‹œ '์ข…ํ•ฉ์˜๊ฒฌ'์˜ ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์ž‘์„ฑํ•˜๋ผ.
4. ์ œ๋ชฉ์€ '๐Ÿน ๊ฐ•์ ', '๐Ÿน ๊ธฐํšŒ'์œผ๋กœ ์ถœ๋ ฅํ•˜๋ผ.
[์ข…ํ•ฉ์˜๊ฒฌ์˜ ์ถœ๋ ฅ ์กฐ๊ฑด ๋]
3. ์‹ค์ œ ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์—์„œ ์‚ฌ์šฉ๋œ ๋‹จ์–ด๋ฅผ ํฌํ•จํ•˜๋ผ.
4. ๋„ˆ์˜ ์ƒ๊ฐ์„ ์ž„์˜๋กœ ๋„ฃ์ง€ ๋ง ๊ฒƒ.
"""
analysis = call_api(positive_output, system_message=system_message)
return positive_output, analysis
# ๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ˜ํ™˜ํ•˜๋Š” ํ•จ์ˆ˜
def get_negative_reviews(df):
negative_reviews = df[df['ReviewScore'] <= 2].sort_values(by='Review Length', ascending=False)
negative_reviews = negative_reviews.head(30)
negative_reviews.reset_index(drop=True, inplace=True)
negative_reviews.index += 1
negative_reviews['์ˆœ๋ฒˆ'] = negative_reviews.index
negative_output = "\n\n".join(negative_reviews.apply(
lambda x: f"{x['์ˆœ๋ฒˆ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1))
system_message = """[์ค‘์š” ๊ทœ์น™]
1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€(ํ•œ๊ตญ์–ด)๋กœ ์ถœ๋ ฅํ•˜๋ผ.
2. ๋„ˆ๋Š” ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ถ„์„ํ•˜๋Š” ๋น…๋ฐ์ดํ„ฐ ๋ถ„์„๊ฐ€์ด๋‹ค.
3. ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ๋ถ€์ •์ ์ธ ์˜๊ฒฌ์˜ ๋ฐ์ดํ„ฐ๋งŒ ๋ถ„์„ํ•˜๋ผ.
4. ๋ฐ˜๋“œ์‹œ ์ œ๊ณต๋œ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์—์„œ๋งŒ ๋ถ„์„ํ•˜๋ผ.
5. ๋„ˆ์˜ ์ƒ๊ฐ์„ ํฌํ•จํ•˜์ง€ ๋ง ๊ฒƒ.
[๋ถ„์„ ์กฐ๊ฑด]
1. ์ด 30๊ฐœ์˜ ๋ฆฌ๋ทฐ๋ฐ์ดํ„ฐ๋ฅผ ์ œ๊ณตํ•œ๋‹ค.
2. ๊ฐ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์˜ ๋‘˜์งธ์ค„ ๋ถ€ํ„ฐ์˜ ์‹ค์ œ ๊ณ ๊ฐ๋ฆฌ๋ทฐ๋ฅผ ๋ฐ˜์˜ํ•˜๋ผ.
3. ๋ถ€์ •์ ์ธ ์˜๊ฒฌ๋งŒ์„ ๋ถ„์„ํ•˜๋ผ.
4. ๊ธฐ๋Šฅ๊ณผ ์„ฑ๋Šฅ์˜ ๋ถ€๋ถ„, ๊ฐ์„ฑ์ ์ธ ๋ถ€๋ถ„, ์‹ค์ œ ์‚ฌ์šฉ ์ธก๋ฉด์˜ ๋ถ€๋ถ„, ๋ฐฐ์†ก์˜ ๋ถ€๋ถ„, ๊ณ ๊ฐ์˜ ๋ถ„๋…ธ ๋ถ€๋ถ„์˜ ๊ด€์ ์œผ๋กœ ๋ถ„์„ํ•˜๋ผ.
5. 4๋ฒˆ์˜ ์กฐ๊ฑด์— ํฌํ•จ๋˜์ง€ ์•Š๋Š” ๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ๋ฅผ ๋ณ„๋„๋กœ ์ถœ๋ ฅํ•˜๋ผ.
6. ๋ถ€์ •์ ์ธ ๋ฆฌ๋ทฐ ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ '๊ฐœ์„ ํ•  ์ '์„ ์ถœ๋ ฅํ•˜๋ผ.
[์ถœ๋ ฅ ํ˜•ํƒœ ์กฐ๊ฑด]
1. ๊ฐ๊ฐ์˜ ์ œ๋ชฉ ์•ž์— '๐Ÿ“'์ด๋ชจ์ง€๋ฅผ ์ถœ๋ ฅํ•˜๋ผ,'#', '##'์€ ์ถœ๋ ฅํ•˜์ง€ ๋ง๊ฒƒ.
2. ๊ฐ€์žฅ ๋งˆ์ง€๋ง‰์— '๊ฐœ์„ ํ•  ์ '์„ ์ถœ๋ ฅํ•˜๋ผ("๐Ÿ“ข๊ฐœ์„ ํ•  ์ "์˜ ์ œ๋ชฉํ˜•ํƒœ๋ฅผ ์‚ฌ์šฉํ•˜๋ผ.)
[๊ฐœ์„ ํ•  ์ ์˜ ์ถœ๋ ฅ ์กฐ๊ฑด ์‹œ์ž‘]
('๊ฐœ์„ ํ•  ์ '์ด ์•„๋‹Œ ๋‹ค๋ฅธ ๋ถ€๋ถ„์— ์ด ์ถœ๋ ฅ ์กฐ๊ฑด์„ ๋ฐ˜์˜ํ•˜์ง€ ๋ง ๊ฒƒ.
- ํ•ญ๋ชฉ๋ณ„ ์ œ๋ชฉ์„ ์ œ์™ธํ•˜๋ผ.
- ์ฃผ์š” ํ•ญ๋ชฉ๋ณ„๋กœ ๊ฐœ์„ ํ•  ์ ์„ ์ถœ๋ ฅํ•˜๋ผ.
- ์ „๋ฌธ์ ์ด๊ณ , ๋ถ„์„์ ์ด๋ฉฐ, ์ œ์•ˆํ•˜๋Š” ํ˜•ํƒœ์˜ ๊ณต์†ํ•œ ์–ดํˆฌ๋ฅผ ์‚ฌ์šฉํ•˜๋ผ.(๋‹จ๋‹ตํ˜• ํ‘œํ˜„ ๊ธˆ์ง€)
[SWOT๋ถ„์„ ์กฐ๊ฑด]
1. '์ข…ํ•ฉ์˜๊ฒฌ' ๋‹ค์Œ ๋‚ด์šฉ์œผ๋กœ SWOT๋ถ„์„ ์˜๊ฒฌ์„ ์ถœ๋ ฅํ•˜๋ผ.
2. SWOT๋ถ„์„ ์ค‘ '์•ฝ์ '์˜๊ฒฌ๊ณผ '์œ„ํ˜‘'์˜ ์˜๊ฒฌ์„ ์ถœ๋ ฅํ•˜๋ผ.
3. ๋ฐ˜๋“œ์‹œ '๊ฐœ์„ ํ•  ์ '์˜ ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์ž‘์„ฑํ•˜๋ผ.
4. ์ œ๋ชฉ์€ '๐Ÿ’‰ ์•ฝ์ ', '๐Ÿ’‰ ์œ„ํ˜‘'์œผ๋กœ ์ถœ๋ ฅํ•˜๋ผ.
[๊ฐœ์„ ํ•  ์ ์˜ ์ถœ๋ ฅ ์กฐ๊ฑด ๋]
3. ์‹ค์ œ ๊ณ ๊ฐ์˜ ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ์—์„œ ์‚ฌ์šฉ๋œ ๋‹จ์–ด๋ฅผ ํฌํ•จํ•˜๋ผ.
4. ๋„ˆ์˜ ์ƒ๊ฐ์„ ์ž„์˜๋กœ ๋„ฃ์ง€ ๋ง ๊ฒƒ.
"""
analysis = call_api(negative_output, system_message=system_message)
return negative_output, analysis
# ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ๋ฅผ ์ฒ˜๋ฆฌํ•˜์—ฌ ๊ธ์ • ๋ฐ ๋ถ€์ • ๋ฆฌ๋ทฐ๋ฅผ ์ถ”์ถœํ•˜๋Š” ํ•จ์ˆ˜
def process_reviews(file):
df = read_excel_data(file)
positive_reviews, positive_analysis = get_positive_reviews(df)
negative_reviews, negative_analysis = get_negative_reviews(df)
return positive_reviews, positive_analysis, negative_reviews, negative_analysis
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("### ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ ๋ฐ ๋ถ„์„")
file_input = gr.File(label="์—‘์…€ ํŒŒ์ผ ์—…๋กœ๋“œ", file_types=[".xlsx"])
analyze_button = gr.Button("๋ฆฌ๋ทฐ๋ถ„์„")
with gr.Column():
gr.Markdown("### ๊ธ์ •์ ์ธ ์ฃผ์š” ๋ฆฌ๋ทฐ (์ตœ๋Œ€ 20๊ฐœ)")
positive_reviews_output = gr.Textbox(label="๊ธ์ •์ ์ธ ์ฃผ์š” ๋ฆฌ๋ทฐ", interactive=False, lines=20)
positive_analysis_output = gr.Textbox(label="๊ธ์ • ๋ฆฌ๋ทฐ ๋ถ„์„ ๊ฒฐ๊ณผ", interactive=False, lines=10)
gr.Markdown("### ๋ถ€์ •์ ์ธ ์ฃผ์š” ๋ฆฌ๋ทฐ (์ตœ๋Œ€ 30๊ฐœ)")
negative_reviews_output = gr.Textbox(label="๋ถ€์ •์ ์ธ ์ฃผ์š” ๋ฆฌ๋ทฐ", interactive=False, lines=30)
negative_analysis_output = gr.Textbox(label="๋ถ€์ • ๋ฆฌ๋ทฐ ๋ถ„์„ ๊ฒฐ๊ณผ", interactive=False, lines=10)
analyze_button.click(
fn=process_reviews,
inputs=[file_input],
outputs=[positive_reviews_output, positive_analysis_output, negative_reviews_output, negative_analysis_output]
)
return demo
if __name__ == "__main__":
interface = create_interface()
interface.launch()