Spaces:
Sleeping
Sleeping
File size: 2,773 Bytes
94cc512 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
import random
import logging
# ๋๋ฒ๊น
๋ก๊ทธ ๋ ๋ฒจ ์ค์
logging.basicConfig(level=logging.DEBUG)
def scrape_blog_links(keyword):
"""
์
๋ ฅ๋ฐ์ ํค์๋๋ฅผ ์ฌ์ฉํ์ฌ ๋ค์ด๋ฒ ๊ฒ์ ํ์ด์ง๋ฅผ ์์ฒญํ๊ณ ,
๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ง ์ถ์ถํ ํ, ๋๋คํ๊ฒ 3๊ฐ์ ๋งํฌ๋ฅผ ๋ฐํํฉ๋๋ค.
"""
base_url = "https://search.naver.com/search.naver?ssc=tab.blog.all&sm=tab_jum&query="
url = base_url + keyword
logging.debug(f"์์ฒญ URL: {url}")
try:
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=10)
logging.debug(f"์๋ต ์ํ ์ฝ๋: {response.status_code}")
if response.status_code != 200:
logging.error("ํ์ด์ง ์์ฒญ ์คํจ")
return "ํ์ด์ง ์์ฒญ์ ์คํจํ์์ต๋๋ค."
except Exception as e:
logging.exception("์์ฒญ ์ค ์์ธ ๋ฐ์")
return f"์์ธ ๋ฐ์: {e}"
soup = BeautifulSoup(response.text, "html.parser")
detail_boxes = soup.find_all("div", class_="detail_box")
logging.debug(f"detail_box ๊ฐ์: {len(detail_boxes)}")
blog_links = set()
for box in detail_boxes:
a_tags = box.find_all("a")
for a in a_tags:
href = a.get("href", "")
cru = a.get("cru", "")
logging.debug(f"a ํ๊ทธ href: {href}, cru: {cru}")
# ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ง ์ถ์ถ (href ํน์ cru ์์ฑ ์ฌ์ฉ)
if href.startswith("https://blog.naver.com/"):
blog_links.add(href)
elif cru.startswith("https://blog.naver.com/"):
blog_links.add(cru)
logging.debug(f"์ถ์ถ๋ ๋ธ๋ก๊ทธ ๋งํฌ ๊ฐ์: {len(blog_links)}")
if not blog_links:
return "๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค."
# ๋๋คํ๊ฒ ์ต๋ 3๊ฐ์ ๋งํฌ ์ ํ
num_links = min(3, len(blog_links))
selected_links = random.sample(blog_links, num_links)
logging.debug(f"๋๋ค์ผ๋ก ์ ํ๋ ๋งํฌ: {selected_links}")
# ๊ฐ ๋งํฌ๋ฅผ ์ค๋ฐ๊ฟ์ผ๋ก ๊ตฌ๋ถํ์ฌ ๋ฐํ
return "\n".join(selected_links)
# Gradio ์ธํฐํ์ด์ค ์์ฑ
iface = gr.Interface(
fn=scrape_blog_links,
inputs=gr.Textbox(label="ํค์๋ ์
๋ ฅ", placeholder="์: ์ค์ง์ด๊ฒ์2"),
outputs=gr.Textbox(label="๋๋ค ๋ธ๋ก๊ทธ ๋งํฌ ์ถ๋ ฅ"),
title="๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ ์ถ์ถ๊ธฐ",
description="ํค์๋๋ฅผ ์
๋ ฅํ๊ณ ์คํ ๋ฒํผ์ ํด๋ฆญํ๋ฉด ๋ค์ด๋ฒ ๊ฒ์ ๊ฒฐ๊ณผ์์ ๋ค์ด๋ฒ ๋ธ๋ก๊ทธ ๋งํฌ๋ฅผ ์ถ์ถํ์ฌ ๋๋คํ๊ฒ 3๊ฐ๋ฅผ ์ถ๋ ฅํฉ๋๋ค."
)
if __name__ == "__main__":
logging.debug("์ฑ ์์")
iface.launch(debug=True)
|