Spaces:

aliceblue11
/

naver_blog_00

Sleeping

App Files Files Community

aliceblue11 commited on Jan 13

Commit

2f00f7a

verified ·

1 Parent(s): 15112ab

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -9

app.py CHANGED Viewed

@@ -2,11 +2,22 @@ import requests
 from bs4 import BeautifulSoup
 import gradio as gr
 def scrape_naver_blog(url):
     try:
-        # Check if the URL is a mobile URL
-        if not url.startswith("https://m.blog.naver.com"):
-            return "Error: Please provide a valid mobile URL (https://m.blog.naver.com)."
         # Send a GET request to the URL
         response = requests.get(url)
@@ -19,9 +30,9 @@ def scrape_naver_blog(url):
         title_div = soup.find('div', class_='se-module se-module-text se-title-text')
         title = title_div.get_text(strip=True) if title_div else "Title not found."
-        # Extract the content
-        content_divs = soup.find_all('div', class_='se-module se-module-text se-quote')
-        content = "\n".join(div.get_text(strip=True) for div in content_divs) if content_divs else "Content not found."
         return f"제목: {title}\n내용: {content}"
@@ -34,10 +45,13 @@ def gradio_interface(url):
 iface = gr.Interface(
     fn=gradio_interface,
-    inputs=gr.Textbox(label="Enter Naver Blog Mobile URL"),
     outputs=gr.Textbox(label="Scraped Blog Content"),
-    title="Naver Blog Scraper",
-    description="Enter a Naver Blog mobile URL to scrape the title and content (text only).",
 )
 if __name__ == "__main__":

 from bs4 import BeautifulSoup
 import gradio as gr
+def convert_to_mobile_url(url):
+    """
+    Converts a standard Naver blog URL to its mobile version.
+    """
+    if url.startswith("https://blog.naver.com/"):
+        url_parts = url.split("/")
+        blog_id = url_parts[-2]
+        post_id = url_parts[-1]
+        mobile_url = f"https://m.blog.naver.com/{blog_id}/{post_id}"
+        return mobile_url
+    return url  # Return the original URL if it's already in mobile format
 def scrape_naver_blog(url):
     try:
+        # Convert URL to mobile format if necessary
+        url = convert_to_mobile_url(url)
         # Send a GET request to the URL
         response = requests.get(url)
         title_div = soup.find('div', class_='se-module se-module-text se-title-text')
         title = title_div.get_text(strip=True) if title_div else "Title not found."
+        # Extract text content excluding images
+        text_components = soup.find_all('div', class_='se-module se-module-text')
+        content = "\n".join(component.get_text(strip=True) for component in text_components if component)
         return f"제목: {title}\n내용: {content}"
 iface = gr.Interface(
     fn=gradio_interface,
+    inputs=gr.Textbox(label="Enter Naver Blog URL (Standard or Mobile)"),
     outputs=gr.Textbox(label="Scraped Blog Content"),
+    title="Naver Blog Scraper (Text Only)",
+    description=(
+        "Enter a Naver Blog URL (standard or mobile) to scrape the title and text content only. "
+        "The script will automatically convert standard URLs to mobile format."
+    ),
 )
 if __name__ == "__main__":