Spaces:

aliceblue11
/

naver_blog_00

Sleeping

App Files Files Community

aliceblue11 commited on Jan 13

Commit

15112ab

verified ·

1 Parent(s): 97b8c76

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -34

app.py CHANGED Viewed

@@ -4,54 +4,40 @@ import gradio as gr
 def scrape_naver_blog(url):
     try:
-        # 디버깅 로그: URL 확인
-        print(f"[DEBUG] 스크래핑할 URL: {url}")
-        # 네이버 블로그 HTML 가져오기
-        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}
-        response = requests.get(url, headers=headers)
-        # HTTP 상태 코드 확인
-        print(f"[DEBUG] HTTP 응답 상태 코드: {response.status_code}")
-        if response.status_code != 200:
-            return f"오류: 페이지에 접근할 수 없습니다. HTTP 상태 코드: {response.status_code}"
-        # HTML 파싱
-        soup = BeautifulSoup(response.text, "html.parser")
-        # 제목과 내용 스크래핑
-        title_xpath = "div > div > div > div:nth-of-type(10) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div > div > div > div:nth-of-type(1) > div > div > div:nth-of-type(2)"
-        content_xpath = "div > div > div > div:nth-of-type(10) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div > div > div > div:nth-of-type(2) > div:nth-of-type(2) > div > div"
-        # CSS 선택자 변환
-        title_element = soup.select_one(title_xpath.replace(" > ", " > "))
-        content_element = soup.select_one(content_xpath.replace(" > ", " > "))
-        if not title_element or not content_element:
-            return "오류: 제공된 XPath로 제목이나 내용을 찾을 수 없습니다."
-        # 텍스트 추출
-        title = title_element.get_text(strip=True)
-        content = content_element.get_text(strip=True)
-        # 결과 반환
         return f"제목: {title}\n내용: {content}"
     except Exception as e:
-        # 디버깅 로그: 예외 발생 시
-        print(f"[DEBUG] 예외 발생: {str(e)}")
-        return f"오류가 발생했습니다: {str(e)}"
-# Gradio 인터페이스 설정
 def gradio_interface(url):
     return scrape_naver_blog(url)
 iface = gr.Interface(
     fn=gradio_interface,
-    inputs=gr.Textbox(label="네이버 블로그 URL"),
-    outputs=gr.Textbox(label="스크래핑 결과"),
-    title="네이버 블로그 스크래퍼",
-    description="네이버 블로그 URL을 입력하면 제목과 내용을 스크래핑하여 출력합니다.",
 )
 if __name__ == "__main__":

 def scrape_naver_blog(url):
     try:
+        # Check if the URL is a mobile URL
+        if not url.startswith("https://m.blog.naver.com"):
+            return "Error: Please provide a valid mobile URL (https://m.blog.naver.com)."
+        # Send a GET request to the URL
+        response = requests.get(url)
+        response.raise_for_status()  # Raise an error for HTTP issues
+        # Parse the HTML content
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # Extract the title
+        title_div = soup.find('div', class_='se-module se-module-text se-title-text')
+        title = title_div.get_text(strip=True) if title_div else "Title not found."
+        # Extract the content
+        content_divs = soup.find_all('div', class_='se-module se-module-text se-quote')
+        content = "\n".join(div.get_text(strip=True) for div in content_divs) if content_divs else "Content not found."
         return f"제목: {title}\n내용: {content}"
     except Exception as e:
+        return f"Error occurred: {e}"
+# Gradio interface
 def gradio_interface(url):
     return scrape_naver_blog(url)
 iface = gr.Interface(
     fn=gradio_interface,
+    inputs=gr.Textbox(label="Enter Naver Blog Mobile URL"),
+    outputs=gr.Textbox(label="Scraped Blog Content"),
+    title="Naver Blog Scraper",
+    description="Enter a Naver Blog mobile URL to scrape the title and content (text only).",
 )
 if __name__ == "__main__":