Spaces:

AIRider
/

blogcr111111

Sleeping

App Files Files Community

AIRider commited on Jan 13

Commit

5e53174

verified ·

1 Parent(s): 2b61a85

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -23

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import gradio as gr
 def convert_to_mobile_url(url):
     """
-    입력된 URL을 모바일 URL로 변환.
     """
     if "m.blog.naver.com" not in url:
         if "blog.naver.com" in url:
@@ -16,41 +16,45 @@ def convert_to_mobile_url(url):
     return url
 def scrape_naver_blog(url):
     try:
-        # Debugging: URL 확인
-        print(f"Original URL: {url}")
-        # 모바일 URL로 변환
         mobile_url = convert_to_mobile_url(url)
         print(f"Converted Mobile URL: {mobile_url}")
         response = requests.get(mobile_url)
         response.raise_for_status()
-        # Debugging: HTTP 응답 상태 확인
-        print(f"Response Status Code: {response.status_code}")
         soup = BeautifulSoup(response.text, 'html.parser')
         # 제목 스크래핑
         title_element = soup.find("div", class_="se-module se-module-text se-title-text")
         title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없음"
-        # Debugging: 제목 확인
-        print(f"Scraped Title: {title}")
-        # 내용 스크래핑
-        content_elements = soup.find_all("div", class_="se-module se-module-text se-quote")
         content = "\n".join(
             elem.get_text(strip=True) for elem in content_elements
         ) if content_elements else "내용을 찾을 수 없음"
-        # Debugging: 내용 확인
         print(f"Scraped Content: {content}")
-        return f"제목: {title}\n내용: {content}"
     except Exception as e:
-        # Debugging: 오류 메시지 출력
         print(f"Error: {e}")
         return f"Error: {e}"
@@ -63,7 +67,7 @@ interface = gr.Interface(
     inputs=gr.Textbox(label="네이버 블로그 URL"),
     outputs=gr.Textbox(label="스크래핑 결과"),
     title="네이버 블로그 스크래핑",
-    description="네이버 블로그 URL을 입력하면 모바일 URL로 변환 후 제목과 텍스트 내용을 스크래핑합니다."
 )
 if __name__ == "__main__":

 def convert_to_mobile_url(url):
     """
+    PC URL을 모바일 URL로 변환.
     """
     if "m.blog.naver.com" not in url:
         if "blog.naver.com" in url:
     return url
 def scrape_naver_blog(url):
+    """
+    네이버 블로그의 제목, 내용, 이미지 URL 스크래핑.
+    """
     try:
+        # 모바일 URL 변환
         mobile_url = convert_to_mobile_url(url)
         print(f"Converted Mobile URL: {mobile_url}")
         response = requests.get(mobile_url)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, 'html.parser')
         # 제목 스크래핑
         title_element = soup.find("div", class_="se-module se-module-text se-title-text")
         title = title_element.get_text(strip=True) if title_element else "제목을 찾을 수 없음"
+        # 본문 내용 스크래핑
+        content_elements = soup.find_all("div", class_="se-module se-module-text")
         content = "\n".join(
             elem.get_text(strip=True) for elem in content_elements
         ) if content_elements else "내용을 찾을 수 없음"
+        # 이미지 URL 스크래핑
+        image_elements = soup.find_all("img", class_="se-image-resource")
+        image_urls = [
+            img["src"] for img in image_elements if "src" in img.attrs
+        ]
+        # 디버깅 메시지 출력
+        print(f"Scraped Title: {title}")
         print(f"Scraped Content: {content}")
+        print(f"Scraped Images: {image_urls}")
+        # 결과 반환
+        result = f"제목: {title}\n\n내용: {content}\n\n이미지 URL:\n" + "\n".join(image_urls)
+        return result
     except Exception as e:
         print(f"Error: {e}")
         return f"Error: {e}"
     inputs=gr.Textbox(label="네이버 블로그 URL"),
     outputs=gr.Textbox(label="스크래핑 결과"),
     title="네이버 블로그 스크래핑",
+    description="네이버 블로그의 제목, 내용, 이미지 URL을 스크래핑합니다."
 )
 if __name__ == "__main__":