Spaces:
Sleeping
Sleeping
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
import random | |
import logging | |
# ๋๋ฒ๊น ๋ก๊ทธ ๋ ๋ฒจ ์ค์ | |
logging.basicConfig(level=logging.DEBUG) | |
def scrape_blog_links(keyword): | |
""" | |
์ ๋ ฅ๋ฐ์ ํค์๋๋ฅผ ์ฌ์ฉํ์ฌ ๋ค์ด๋ฒ ๊ฒ์ ํ์ด์ง๋ฅผ ์์ฒญํ๊ณ , | |
๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ง ์ถ์ถํ ํ, ๋๋คํ๊ฒ 3๊ฐ์ ๋งํฌ๋ฅผ ๋ฐํํฉ๋๋ค. | |
""" | |
base_url = "https://search.naver.com/search.naver?ssc=tab.blog.all&sm=tab_jum&query=" | |
url = base_url + keyword | |
logging.debug(f"์์ฒญ URL: {url}") | |
try: | |
headers = {"User-Agent": "Mozilla/5.0"} | |
response = requests.get(url, headers=headers, timeout=10) | |
logging.debug(f"์๋ต ์ํ ์ฝ๋: {response.status_code}") | |
if response.status_code != 200: | |
logging.error("ํ์ด์ง ์์ฒญ ์คํจ") | |
return "ํ์ด์ง ์์ฒญ์ ์คํจํ์์ต๋๋ค." | |
except Exception as e: | |
logging.exception("์์ฒญ ์ค ์์ธ ๋ฐ์") | |
return f"์์ธ ๋ฐ์: {e}" | |
soup = BeautifulSoup(response.text, "html.parser") | |
detail_boxes = soup.find_all("div", class_="detail_box") | |
logging.debug(f"detail_box ๊ฐ์: {len(detail_boxes)}") | |
blog_links = set() | |
for box in detail_boxes: | |
a_tags = box.find_all("a") | |
for a in a_tags: | |
href = a.get("href", "") | |
cru = a.get("cru", "") | |
logging.debug(f"a ํ๊ทธ href: {href}, cru: {cru}") | |
# ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ง ์ถ์ถ (href ํน์ cru ์์ฑ ์ฌ์ฉ) | |
if href.startswith("https://blog.naver.com/"): | |
blog_links.add(href) | |
elif cru.startswith("https://blog.naver.com/"): | |
blog_links.add(cru) | |
logging.debug(f"์ถ์ถ๋ ๋ธ๋ก๊ทธ ๋งํฌ ๊ฐ์: {len(blog_links)}") | |
if not blog_links: | |
return "๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค." | |
# ๋๋คํ๊ฒ ์ต๋ 3๊ฐ์ ๋งํฌ ์ ํ | |
num_links = min(3, len(blog_links)) | |
selected_links = random.sample(blog_links, num_links) | |
logging.debug(f"๋๋ค์ผ๋ก ์ ํ๋ ๋งํฌ: {selected_links}") | |
# ๊ฐ ๋งํฌ๋ฅผ ์ค๋ฐ๊ฟ์ผ๋ก ๊ตฌ๋ถํ์ฌ ๋ฐํ | |
return "\n".join(selected_links) | |
# Gradio ์ธํฐํ์ด์ค ์์ฑ | |
iface = gr.Interface( | |
fn=scrape_blog_links, | |
inputs=gr.Textbox(label="ํค์๋ ์ ๋ ฅ", placeholder="์: ์ค์ง์ด๊ฒ์2"), | |
outputs=gr.Textbox(label="๋๋ค ๋ธ๋ก๊ทธ ๋งํฌ ์ถ๋ ฅ"), | |
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ ์ถ์ถ๊ธฐ", | |
description="ํค์๋๋ฅผ ์ ๋ ฅํ๊ณ ์คํ ๋ฒํผ์ ํด๋ฆญํ๋ฉด ๋ค์ด๋ฒ ๊ฒ์ ๊ฒฐ๊ณผ์์ ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ฅผ ์ถ์ถํ์ฌ ๋๋คํ๊ฒ 3๊ฐ๋ฅผ ์ถ๋ ฅํฉ๋๋ค." | |
) | |
if __name__ == "__main__": | |
logging.debug("์ฑ ์์") | |
iface.launch(debug=True) | |