Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,54 +4,40 @@ import gradio as gr
|
|
4 |
|
5 |
def scrape_naver_blog(url):
|
6 |
try:
|
7 |
-
#
|
8 |
-
|
|
|
9 |
|
10 |
-
#
|
11 |
-
|
12 |
-
response
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
if response.status_code != 200:
|
17 |
-
return f"오류: 페이지에 접근할 수 없습니다. HTTP 상태 코드: {response.status_code}"
|
18 |
|
19 |
-
#
|
20 |
-
|
|
|
21 |
|
22 |
-
#
|
23 |
-
|
24 |
-
|
25 |
|
26 |
-
# CSS 선택자 변환
|
27 |
-
title_element = soup.select_one(title_xpath.replace(" > ", " > "))
|
28 |
-
content_element = soup.select_one(content_xpath.replace(" > ", " > "))
|
29 |
-
|
30 |
-
if not title_element or not content_element:
|
31 |
-
return "오류: 제공된 XPath로 제목이나 내용을 찾을 수 없습니다."
|
32 |
-
|
33 |
-
# 텍스트 추출
|
34 |
-
title = title_element.get_text(strip=True)
|
35 |
-
content = content_element.get_text(strip=True)
|
36 |
-
|
37 |
-
# 결과 반환
|
38 |
return f"제목: {title}\n내용: {content}"
|
39 |
|
40 |
except Exception as e:
|
41 |
-
|
42 |
-
print(f"[DEBUG] 예외 발생: {str(e)}")
|
43 |
-
return f"오류가 발생했습니다: {str(e)}"
|
44 |
|
45 |
-
# Gradio
|
46 |
def gradio_interface(url):
|
47 |
return scrape_naver_blog(url)
|
48 |
|
49 |
iface = gr.Interface(
|
50 |
fn=gradio_interface,
|
51 |
-
inputs=gr.Textbox(label="
|
52 |
-
outputs=gr.Textbox(label="
|
53 |
-
title="
|
54 |
-
description="
|
55 |
)
|
56 |
|
57 |
if __name__ == "__main__":
|
|
|
4 |
|
5 |
def scrape_naver_blog(url):
|
6 |
try:
|
7 |
+
# Check if the URL is a mobile URL
|
8 |
+
if not url.startswith("https://m.blog.naver.com"):
|
9 |
+
return "Error: Please provide a valid mobile URL (https://m.blog.naver.com)."
|
10 |
|
11 |
+
# Send a GET request to the URL
|
12 |
+
response = requests.get(url)
|
13 |
+
response.raise_for_status() # Raise an error for HTTP issues
|
14 |
|
15 |
+
# Parse the HTML content
|
16 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
|
17 |
|
18 |
+
# Extract the title
|
19 |
+
title_div = soup.find('div', class_='se-module se-module-text se-title-text')
|
20 |
+
title = title_div.get_text(strip=True) if title_div else "Title not found."
|
21 |
|
22 |
+
# Extract the content
|
23 |
+
content_divs = soup.find_all('div', class_='se-module se-module-text se-quote')
|
24 |
+
content = "\n".join(div.get_text(strip=True) for div in content_divs) if content_divs else "Content not found."
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
return f"제목: {title}\n내용: {content}"
|
27 |
|
28 |
except Exception as e:
|
29 |
+
return f"Error occurred: {e}"
|
|
|
|
|
30 |
|
31 |
+
# Gradio interface
|
32 |
def gradio_interface(url):
|
33 |
return scrape_naver_blog(url)
|
34 |
|
35 |
iface = gr.Interface(
|
36 |
fn=gradio_interface,
|
37 |
+
inputs=gr.Textbox(label="Enter Naver Blog Mobile URL"),
|
38 |
+
outputs=gr.Textbox(label="Scraped Blog Content"),
|
39 |
+
title="Naver Blog Scraper",
|
40 |
+
description="Enter a Naver Blog mobile URL to scrape the title and content (text only).",
|
41 |
)
|
42 |
|
43 |
if __name__ == "__main__":
|