blogcr111111 / app.py
AIRider's picture
Update app.py
4ecdb4b verified
raw
history blame
2.53 kB
import gradio as gr
import requests
from bs4 import BeautifulSoup
def scrape_blog(url):
debug_logs = [] # 디버깅 메시지 저장용
try:
# HTTP 요청 보내기
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
debug_logs.append(f"Request sent to {url}")
# 응답 확인
if response.status_code == 200:
debug_logs.append("Successfully fetched the webpage.")
soup = BeautifulSoup(response.text, 'html.parser')
# 제목 크롤링
try:
title_element = soup.select_one("div.se-title-text span")
title = title_element.get_text(strip=True) if title_element else "Title not found"
debug_logs.append(f"Title extracted: {title}")
except Exception as e:
title = "Error extracting title"
debug_logs.append(f"Error extracting title: {e}")
# 내용 크롤링
try:
content_element = soup.select_one("div.se-main-container span")
content = content_element.get_text(strip=True) if content_element else "Content not found"
debug_logs.append(f"Content extracted: {content}")
except Exception as e:
content = "Error extracting content"
debug_logs.append(f"Error extracting content: {e}")
else:
title = "Error accessing blog"
content = "Error accessing blog"
debug_logs.append(f"Error accessing blog: Status code {response.status_code}")
except Exception as e:
title = "Error accessing blog"
content = "Error accessing blog"
debug_logs.append(f"Error accessing blog: {e}")
return {"title": title, "content": content, "debug_logs": debug_logs}
def interface_function(url):
result = scrape_blog(url)
debug_output = "\n".join(result["debug_logs"])
return f"제목: {result['title']}\n\n내용: {result['content']}\n\n[Debug Logs]\n{debug_output}"
# Gradio 인터페이스 설정
with gr.Blocks() as demo:
gr.Markdown("# 네이버 블로그 크롤러")
gr.Markdown("블로그 URL을 입력하면 제목과 내용을 추출합니다.")
with gr.Row():
url_input = gr.Textbox(label="네이버 블로그 URL")
submit_button = gr.Button("크롤링 시작")
output = gr.Textbox(label="결과")
submit_button.click(interface_function, inputs=url_input, outputs=output)
# 앱 실행
demo.launch()