Spaces:

AIRider
/

blogcr111111

Sleeping

App Files Files Community

AIRider commited on Jan 13

Commit

3731b33

verified ·

1 Parent(s): 34aa4af

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -8

app.py CHANGED Viewed

@@ -4,44 +4,52 @@ from bs4 import BeautifulSoup
 def scrape_naver_blog(url):
     try:
-        # User-Agent 설정
         headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
         }
         response = requests.get(url, headers=headers)
-        # 요청 성공 여부 확인
         if response.status_code != 200:
             debug_message = f"HTTP 요청 실패. 상태 코드: {response.status_code}"
             print(debug_message)
             return debug_message
-        # BeautifulSoup을 사용하여 HTML 파싱
         soup = BeautifulSoup(response.text, 'html.parser')
-        # 제목 크롤링
         try:
             title_element = soup.select_one(
                 "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(2) > div > p > span"
             )
             title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없습니다."
         except Exception as e:
             debug_message = f"제목 크롤링 중 오류 발생: {e}"
             print(debug_message)
             title = debug_message
-        # 내용 크롤링
         try:
             content_element = soup.select_one(
                 "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4) > div > div > div > p:nth-of-type(1) > span"
             )
             content = content_element.get_text(strip=True) if content_element else "내용을 찾을 수 없습니다."
         except Exception as e:
             debug_message = f"내용 크롤링 중 오류 발생: {e}"
             print(debug_message)
             content = debug_message
-        # 결과 출력
         return {"제목": title, "내용": content}
     except Exception as e:
@@ -50,9 +58,9 @@ def scrape_naver_blog(url):
         return debug_message
 def gradio_interface(url):
-    print(f"입력된 URL: {url}")
     result = scrape_naver_blog(url)
-    print(f"크롤링 결과: {result}")
     return f"제목: {result['제목']}\n내용: {result['내용']}"
 # Gradio 인터페이스 구성

 def scrape_naver_blog(url):
     try:
+        # Step 1: User-Agent 설정
+        print("[DEBUG] Step 1: Setting User-Agent")
         headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
         }
         response = requests.get(url, headers=headers)
+        # Step 2: HTTP 요청 성공 여부 확인
+        print(f"[DEBUG] Step 2: HTTP Response Code: {response.status_code}")
         if response.status_code != 200:
             debug_message = f"HTTP 요청 실패. 상태 코드: {response.status_code}"
             print(debug_message)
             return debug_message
+        # Step 3: BeautifulSoup을 사용하여 HTML 파싱
+        print("[DEBUG] Step 3: Parsing HTML with BeautifulSoup")
         soup = BeautifulSoup(response.text, 'html.parser')
+        # Step 4: 제목 크롤링
+        print("[DEBUG] Step 4: Crawling Title")
         try:
             title_element = soup.select_one(
                 "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(2) > div > p > span"
             )
             title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없습니다."
+            print(f"[DEBUG] Title: {title}")
         except Exception as e:
             debug_message = f"제목 크롤링 중 오류 발생: {e}"
             print(debug_message)
             title = debug_message
+        # Step 5: 내용 크롤링
+        print("[DEBUG] Step 5: Crawling Content")
         try:
             content_element = soup.select_one(
                 "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4) > div > div > div > p:nth-of-type(1) > span"
             )
             content = content_element.get_text(strip=True) if content_element else "내용을 찾을 수 없습니다."
+            print(f"[DEBUG] Content: {content}")
         except Exception as e:
             debug_message = f"내용 크롤링 중 오류 발생: {e}"
             print(debug_message)
             content = debug_message
+        # Step 6: 결과 출력
+        print("[DEBUG] Step 6: Returning Results")
         return {"제목": title, "내용": content}
     except Exception as e:
         return debug_message
 def gradio_interface(url):
+    print(f"[DEBUG] Gradio Input URL: {url}")
     result = scrape_naver_blog(url)
+    print(f"[DEBUG] Crawling Result: {result}")
     return f"제목: {result['제목']}\n내용: {result['내용']}"
 # Gradio 인터페이스 구성