Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,44 +4,52 @@ from bs4 import BeautifulSoup
|
|
4 |
|
5 |
def scrape_naver_blog(url):
|
6 |
try:
|
7 |
-
# User-Agent 설정
|
|
|
8 |
headers = {
|
9 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
|
10 |
}
|
11 |
response = requests.get(url, headers=headers)
|
12 |
|
13 |
-
# 요청 성공 여부 확인
|
|
|
14 |
if response.status_code != 200:
|
15 |
debug_message = f"HTTP 요청 실패. 상태 코드: {response.status_code}"
|
16 |
print(debug_message)
|
17 |
return debug_message
|
18 |
|
19 |
-
# BeautifulSoup을 사용하여 HTML 파싱
|
|
|
20 |
soup = BeautifulSoup(response.text, 'html.parser')
|
21 |
|
22 |
-
# 제목 크롤링
|
|
|
23 |
try:
|
24 |
title_element = soup.select_one(
|
25 |
"body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(2) > div > p > span"
|
26 |
)
|
27 |
title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없습니다."
|
|
|
28 |
except Exception as e:
|
29 |
debug_message = f"제목 크롤링 중 오류 발생: {e}"
|
30 |
print(debug_message)
|
31 |
title = debug_message
|
32 |
|
33 |
-
# 내용 크롤링
|
|
|
34 |
try:
|
35 |
content_element = soup.select_one(
|
36 |
"body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4) > div > div > div > p:nth-of-type(1) > span"
|
37 |
)
|
38 |
content = content_element.get_text(strip=True) if content_element else "내용을 찾을 수 없습니다."
|
|
|
39 |
except Exception as e:
|
40 |
debug_message = f"내용 크롤링 중 오류 발생: {e}"
|
41 |
print(debug_message)
|
42 |
content = debug_message
|
43 |
|
44 |
-
# 결과 출력
|
|
|
45 |
return {"제목": title, "내용": content}
|
46 |
|
47 |
except Exception as e:
|
@@ -50,9 +58,9 @@ def scrape_naver_blog(url):
|
|
50 |
return debug_message
|
51 |
|
52 |
def gradio_interface(url):
|
53 |
-
print(f"
|
54 |
result = scrape_naver_blog(url)
|
55 |
-
print(f"
|
56 |
return f"제목: {result['제목']}\n내용: {result['내용']}"
|
57 |
|
58 |
# Gradio 인터페이스 구성
|
|
|
4 |
|
5 |
def scrape_naver_blog(url):
|
6 |
try:
|
7 |
+
# Step 1: User-Agent 설정
|
8 |
+
print("[DEBUG] Step 1: Setting User-Agent")
|
9 |
headers = {
|
10 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
|
11 |
}
|
12 |
response = requests.get(url, headers=headers)
|
13 |
|
14 |
+
# Step 2: HTTP 요청 성공 여부 확인
|
15 |
+
print(f"[DEBUG] Step 2: HTTP Response Code: {response.status_code}")
|
16 |
if response.status_code != 200:
|
17 |
debug_message = f"HTTP 요청 실패. 상태 코드: {response.status_code}"
|
18 |
print(debug_message)
|
19 |
return debug_message
|
20 |
|
21 |
+
# Step 3: BeautifulSoup을 사용하여 HTML 파싱
|
22 |
+
print("[DEBUG] Step 3: Parsing HTML with BeautifulSoup")
|
23 |
soup = BeautifulSoup(response.text, 'html.parser')
|
24 |
|
25 |
+
# Step 4: 제목 크롤링
|
26 |
+
print("[DEBUG] Step 4: Crawling Title")
|
27 |
try:
|
28 |
title_element = soup.select_one(
|
29 |
"body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(2) > div > p > span"
|
30 |
)
|
31 |
title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없습니다."
|
32 |
+
print(f"[DEBUG] Title: {title}")
|
33 |
except Exception as e:
|
34 |
debug_message = f"제목 크롤링 중 오류 발생: {e}"
|
35 |
print(debug_message)
|
36 |
title = debug_message
|
37 |
|
38 |
+
# Step 5: 내용 크롤링
|
39 |
+
print("[DEBUG] Step 5: Crawling Content")
|
40 |
try:
|
41 |
content_element = soup.select_one(
|
42 |
"body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4) > div > div > div > p:nth-of-type(1) > span"
|
43 |
)
|
44 |
content = content_element.get_text(strip=True) if content_element else "내용을 찾을 수 없습니다."
|
45 |
+
print(f"[DEBUG] Content: {content}")
|
46 |
except Exception as e:
|
47 |
debug_message = f"내용 크롤링 중 오류 발생: {e}"
|
48 |
print(debug_message)
|
49 |
content = debug_message
|
50 |
|
51 |
+
# Step 6: 결과 출력
|
52 |
+
print("[DEBUG] Step 6: Returning Results")
|
53 |
return {"제목": title, "내용": content}
|
54 |
|
55 |
except Exception as e:
|
|
|
58 |
return debug_message
|
59 |
|
60 |
def gradio_interface(url):
|
61 |
+
print(f"[DEBUG] Gradio Input URL: {url}")
|
62 |
result = scrape_naver_blog(url)
|
63 |
+
print(f"[DEBUG] Crawling Result: {result}")
|
64 |
return f"제목: {result['제목']}\n내용: {result['내용']}"
|
65 |
|
66 |
# Gradio 인터페이스 구성
|