File size: 2,773 Bytes
94cc512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import gradio as gr
import requests
from bs4 import BeautifulSoup
import random
import logging

# ๋””๋ฒ„๊น… ๋กœ๊ทธ ๋ ˆ๋ฒจ ์„ค์ •
logging.basicConfig(level=logging.DEBUG)

def scrape_blog_links(keyword):
    """
    ์ž…๋ ฅ๋ฐ›์€ ํ‚ค์›Œ๋“œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ ํŽ˜์ด์ง€๋ฅผ ์š”์ฒญํ•˜๊ณ ,
    ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋งŒ ์ถ”์ถœํ•œ ํ›„, ๋žœ๋คํ•˜๊ฒŒ 3๊ฐœ์˜ ๋งํฌ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
    """
    base_url = "https://search.naver.com/search.naver?ssc=tab.blog.all&sm=tab_jum&query="
    url = base_url + keyword
    logging.debug(f"์š”์ฒญ URL: {url}")

    try:
        headers = {"User-Agent": "Mozilla/5.0"}
        response = requests.get(url, headers=headers, timeout=10)
        logging.debug(f"์‘๋‹ต ์ƒํƒœ ์ฝ”๋“œ: {response.status_code}")
        if response.status_code != 200:
            logging.error("ํŽ˜์ด์ง€ ์š”์ฒญ ์‹คํŒจ")
            return "ํŽ˜์ด์ง€ ์š”์ฒญ์— ์‹คํŒจํ•˜์˜€์Šต๋‹ˆ๋‹ค."
    except Exception as e:
        logging.exception("์š”์ฒญ ์ค‘ ์˜ˆ์™ธ ๋ฐœ์ƒ")
        return f"์˜ˆ์™ธ ๋ฐœ์ƒ: {e}"

    soup = BeautifulSoup(response.text, "html.parser")
    detail_boxes = soup.find_all("div", class_="detail_box")
    logging.debug(f"detail_box ๊ฐœ์ˆ˜: {len(detail_boxes)}")
    blog_links = set()

    for box in detail_boxes:
        a_tags = box.find_all("a")
        for a in a_tags:
            href = a.get("href", "")
            cru = a.get("cru", "")
            logging.debug(f"a ํƒœ๊ทธ href: {href}, cru: {cru}")
            # ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋งŒ ์ถ”์ถœ (href ํ˜น์€ cru ์†์„ฑ ์‚ฌ์šฉ)
            if href.startswith("https://blog.naver.com/"):
                blog_links.add(href)
            elif cru.startswith("https://blog.naver.com/"):
                blog_links.add(cru)

    logging.debug(f"์ถ”์ถœ๋œ ๋ธ”๋กœ๊ทธ ๋งํฌ ๊ฐœ์ˆ˜: {len(blog_links)}")
    if not blog_links:
        return "๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."

    # ๋žœ๋คํ•˜๊ฒŒ ์ตœ๋Œ€ 3๊ฐœ์˜ ๋งํฌ ์„ ํƒ
    num_links = min(3, len(blog_links))
    selected_links = random.sample(blog_links, num_links)
    logging.debug(f"๋žœ๋ค์œผ๋กœ ์„ ํƒ๋œ ๋งํฌ: {selected_links}")

    # ๊ฐ ๋งํฌ๋ฅผ ์ค„๋ฐ”๊ฟˆ์œผ๋กœ ๊ตฌ๋ถ„ํ•˜์—ฌ ๋ฐ˜ํ™˜
    return "\n".join(selected_links)

# Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
iface = gr.Interface(
    fn=scrape_blog_links,
    inputs=gr.Textbox(label="ํ‚ค์›Œ๋“œ ์ž…๋ ฅ", placeholder="์˜ˆ: ์˜ค์ง•์–ด๊ฒŒ์ž„2"),
    outputs=gr.Textbox(label="๋žœ๋ค ๋ธ”๋กœ๊ทธ ๋งํฌ ์ถœ๋ ฅ"),
    title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ ์ถ”์ถœ๊ธฐ",
    description="ํ‚ค์›Œ๋“œ๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์‹คํ–‰ ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜๋ฉด ๋„ค์ด๋ฒ„ ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ์—์„œ ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ๋งํฌ๋ฅผ ์ถ”์ถœํ•˜์—ฌ ๋žœ๋คํ•˜๊ฒŒ 3๊ฐœ๋ฅผ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค."
)

if __name__ == "__main__":
    logging.debug("์•ฑ ์‹œ์ž‘")
    iface.launch(debug=True)