File size: 2,530 Bytes
4431f41
4ecdb4b
 
4431f41
 
 
 
 
4ecdb4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4431f41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
import requests
from bs4 import BeautifulSoup

def scrape_blog(url):
    debug_logs = []  # 디버깅 메시지 저장용

    try:
        # HTTP 요청 보내기
        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
        debug_logs.append(f"Request sent to {url}")

        # 응답 확인
        if response.status_code == 200:
            debug_logs.append("Successfully fetched the webpage.")
            soup = BeautifulSoup(response.text, 'html.parser')

            # 제목 크롤링
            try:
                title_element = soup.select_one("div.se-title-text span")
                title = title_element.get_text(strip=True) if title_element else "Title not found"
                debug_logs.append(f"Title extracted: {title}")
            except Exception as e:
                title = "Error extracting title"
                debug_logs.append(f"Error extracting title: {e}")

            # 내용 크롤링
            try:
                content_element = soup.select_one("div.se-main-container span")
                content = content_element.get_text(strip=True) if content_element else "Content not found"
                debug_logs.append(f"Content extracted: {content}")
            except Exception as e:
                content = "Error extracting content"
                debug_logs.append(f"Error extracting content: {e}")

        else:
            title = "Error accessing blog"
            content = "Error accessing blog"
            debug_logs.append(f"Error accessing blog: Status code {response.status_code}")

    except Exception as e:
        title = "Error accessing blog"
        content = "Error accessing blog"
        debug_logs.append(f"Error accessing blog: {e}")

    return {"title": title, "content": content, "debug_logs": debug_logs}

def interface_function(url):
    result = scrape_blog(url)
    debug_output = "\n".join(result["debug_logs"])
    return f"제목: {result['title']}\n\n내용: {result['content']}\n\n[Debug Logs]\n{debug_output}"

# Gradio 인터페이스 설정
with gr.Blocks() as demo:
    gr.Markdown("# 네이버 블로그 크롤러")
    gr.Markdown("블로그 URL을 입력하면 제목과 내용을 추출합니다.")

    with gr.Row():
        url_input = gr.Textbox(label="네이버 블로그 URL")
        submit_button = gr.Button("크롤링 시작")

    output = gr.Textbox(label="결과")

    submit_button.click(interface_function, inputs=url_input, outputs=output)

# 앱 실행
demo.launch()