AIRider commited on
Commit
3731b33
·
verified ·
1 Parent(s): 34aa4af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -4,44 +4,52 @@ from bs4 import BeautifulSoup
4
 
5
  def scrape_naver_blog(url):
6
  try:
7
- # User-Agent 설정
 
8
  headers = {
9
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
10
  }
11
  response = requests.get(url, headers=headers)
12
 
13
- # 요청 성공 여부 확인
 
14
  if response.status_code != 200:
15
  debug_message = f"HTTP 요청 실패. 상태 코드: {response.status_code}"
16
  print(debug_message)
17
  return debug_message
18
 
19
- # BeautifulSoup을 사용하여 HTML 파싱
 
20
  soup = BeautifulSoup(response.text, 'html.parser')
21
 
22
- # 제목 크롤링
 
23
  try:
24
  title_element = soup.select_one(
25
  "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(2) > div > p > span"
26
  )
27
  title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없습니다."
 
28
  except Exception as e:
29
  debug_message = f"제목 크롤링 중 오류 발생: {e}"
30
  print(debug_message)
31
  title = debug_message
32
 
33
- # 내용 크롤링
 
34
  try:
35
  content_element = soup.select_one(
36
  "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4) > div > div > div > p:nth-of-type(1) > span"
37
  )
38
  content = content_element.get_text(strip=True) if content_element else "내용을 찾을 수 없습니다."
 
39
  except Exception as e:
40
  debug_message = f"내용 크롤링 중 오류 발생: {e}"
41
  print(debug_message)
42
  content = debug_message
43
 
44
- # 결과 출력
 
45
  return {"제목": title, "내용": content}
46
 
47
  except Exception as e:
@@ -50,9 +58,9 @@ def scrape_naver_blog(url):
50
  return debug_message
51
 
52
  def gradio_interface(url):
53
- print(f"입력된 URL: {url}")
54
  result = scrape_naver_blog(url)
55
- print(f"크롤링 결과: {result}")
56
  return f"제목: {result['제목']}\n내용: {result['내용']}"
57
 
58
  # Gradio 인터페이스 구성
 
4
 
5
  def scrape_naver_blog(url):
6
  try:
7
+ # Step 1: User-Agent 설정
8
+ print("[DEBUG] Step 1: Setting User-Agent")
9
  headers = {
10
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
11
  }
12
  response = requests.get(url, headers=headers)
13
 
14
+ # Step 2: HTTP 요청 성공 여부 확인
15
+ print(f"[DEBUG] Step 2: HTTP Response Code: {response.status_code}")
16
  if response.status_code != 200:
17
  debug_message = f"HTTP 요청 실패. 상태 코드: {response.status_code}"
18
  print(debug_message)
19
  return debug_message
20
 
21
+ # Step 3: BeautifulSoup을 사용하여 HTML 파싱
22
+ print("[DEBUG] Step 3: Parsing HTML with BeautifulSoup")
23
  soup = BeautifulSoup(response.text, 'html.parser')
24
 
25
+ # Step 4: 제목 크롤링
26
+ print("[DEBUG] Step 4: Crawling Title")
27
  try:
28
  title_element = soup.select_one(
29
  "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(2) > div > p > span"
30
  )
31
  title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없습니다."
32
+ print(f"[DEBUG] Title: {title}")
33
  except Exception as e:
34
  debug_message = f"제목 크롤링 중 오류 발생: {e}"
35
  print(debug_message)
36
  title = debug_message
37
 
38
+ # Step 5: 내용 크롤링
39
+ print("[DEBUG] Step 5: Crawling Content")
40
  try:
41
  content_element = soup.select_one(
42
  "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4) > div > div > div > p:nth-of-type(1) > span"
43
  )
44
  content = content_element.get_text(strip=True) if content_element else "내용을 찾을 수 없습니다."
45
+ print(f"[DEBUG] Content: {content}")
46
  except Exception as e:
47
  debug_message = f"내용 크롤링 중 오류 발생: {e}"
48
  print(debug_message)
49
  content = debug_message
50
 
51
+ # Step 6: 결과 출력
52
+ print("[DEBUG] Step 6: Returning Results")
53
  return {"제목": title, "내용": content}
54
 
55
  except Exception as e:
 
58
  return debug_message
59
 
60
  def gradio_interface(url):
61
+ print(f"[DEBUG] Gradio Input URL: {url}")
62
  result = scrape_naver_blog(url)
63
+ print(f"[DEBUG] Crawling Result: {result}")
64
  return f"제목: {result['제목']}\n내용: {result['내용']}"
65
 
66
  # Gradio 인터페이스 구성