blog_sc_jjang / app.py
kijeoung's picture
Create app.py
94cc512 verified
import gradio as gr
import requests
from bs4 import BeautifulSoup
import random
import logging
# ๋””๋ฒ„๊น… ๋กœ๊ทธ ๋ ˆ๋ฒจ ์„ค์ •
logging.basicConfig(level=logging.DEBUG)
def scrape_blog_links(keyword):
"""
์ž…๋ ฅ๋ฐ›์€ ํ‚ค์›Œ๋“œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ ํŽ˜์ด์ง€๋ฅผ ์š”์ฒญํ•˜๊ณ ,
๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋งŒ ์ถ”์ถœํ•œ ํ›„, ๋žœ๋คํ•˜๊ฒŒ 3๊ฐœ์˜ ๋งํฌ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
"""
base_url = "https://search.naver.com/search.naver?ssc=tab.blog.all&sm=tab_jum&query="
url = base_url + keyword
logging.debug(f"์š”์ฒญ URL: {url}")
try:
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=10)
logging.debug(f"์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
if response.status_code != 200:
logging.error("ํŽ˜์ด์ง€ ์š”์ฒญ ์‹คํŒจ")
return "ํŽ˜์ด์ง€ ์š”์ฒญ์— ์‹คํŒจํ•˜์˜€์Šต๋‹ˆ๋‹ค."
except Exception as e:
logging.exception("์š”์ฒญ ์ค‘ ์˜ˆ์™ธ ๋ฐœ์ƒ")
return f"์˜ˆ์™ธ ๋ฐœ์ƒ: {e}"
soup = BeautifulSoup(response.text, "html.parser")
detail_boxes = soup.find_all("div", class_="detail_box")
logging.debug(f"detail_box ๊ฐœ์ˆ˜: {len(detail_boxes)}")
blog_links = set()
for box in detail_boxes:
a_tags = box.find_all("a")
for a in a_tags:
href = a.get("href", "")
cru = a.get("cru", "")
logging.debug(f"a ํƒœ๊ทธ href: {href}, cru: {cru}")
# ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋งŒ ์ถ”์ถœ (href ํ˜น์€ cru ์†์„ฑ ์‚ฌ์šฉ)
if href.startswith("https://blog.naver.com/"):
blog_links.add(href)
elif cru.startswith("https://blog.naver.com/"):
blog_links.add(cru)
logging.debug(f"์ถ”์ถœ๋œ ๋ธ”๋กœ๊ทธ ๋งํฌ ๊ฐœ์ˆ˜: {len(blog_links)}")
if not blog_links:
return "๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
# ๋žœ๋คํ•˜๊ฒŒ ์ตœ๋Œ€ 3๊ฐœ์˜ ๋งํฌ ์„ ํƒ
num_links = min(3, len(blog_links))
selected_links = random.sample(blog_links, num_links)
logging.debug(f"๋žœ๋ค์œผ๋กœ ์„ ํƒ๋œ ๋งํฌ: {selected_links}")
# ๊ฐ ๋งํฌ๋ฅผ ์ค„๋ฐ”๊ฟˆ์œผ๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ๋ฐ˜ํ™˜
return "\n".join(selected_links)
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
iface = gr.Interface(
fn=scrape_blog_links,
inputs=gr.Textbox(label="ํ‚ค์›Œ๋“œ ์ž…๋ ฅ", placeholder="์˜ˆ: ์˜ค์ง•์–ด๊ฒŒ์ž„2"),
outputs=gr.Textbox(label="๋žœ๋ค ๋ธ”๋กœ๊ทธ ๋งํฌ ์ถœ๋ ฅ"),
title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ ์ถ”์ถœ๊ธฐ",
description="ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์‹คํ–‰ ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์—์„œ ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋ฅผ ์ถ”์ถœํ•˜์—ฌ ๋žœ๋คํ•˜๊ฒŒ 3๊ฐœ๋ฅผ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค."
)
if __name__ == "__main__":
logging.debug("์•ฑ ์‹œ์ž‘")
iface.launch(debug=True)