Spaces:

aliceblue11
/

naver_blog_00

Sleeping

App Files Files Community

aliceblue11 commited on Jan 13

Commit

9e50054

verified ·

1 Parent(s): 17330bc

Create app.py

Browse files

Files changed (1) hide show

app.py +59 -0

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import gradio as gr
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from webdriver_manager.chrome import ChromeDriverManager
+import time
+# Function to scrape Naver blog title and content
+def scrape_naver_blog(url):
+    # Initialize Chrome WebDriver
+    try:
+        options = webdriver.ChromeOptions()
+        options.add_argument('--headless')  # Run in headless mode
+        options.add_argument('--no-sandbox')
+        options.add_argument('--disable-dev-shm-usage')
+        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
+        driver.get(url)
+        time.sleep(3)  # Wait for the page to load
+        try:
+            # Extract title
+            title_xpath = "/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[1]/div/div/div[2]/div/p/span"
+            title_element = driver.find_element(By.XPATH, title_xpath)
+            title = title_element.text.strip()
+        except Exception as e:
+            title = f"Error extracting title: {e}"
+        try:
+            # Extract content
+            content_xpath = "/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[3]/div[4]/div/div/div/p[1]/span"
+            content_element = driver.find_element(By.XPATH, content_xpath)
+            content = content_element.text.strip()
+        except Exception as e:
+            content = f"Error extracting content: {e}"
+        driver.quit()
+        # Return the results
+        return f"제목: {title}\n내용: {content}"
+    except Exception as e:
+        return f"Error initializing WebDriver: {e}"
+# Gradio Interface
+def scrape_interface(url):
+    result = scrape_naver_blog(url)
+    return result
+interface = gr.Interface(
+    fn=scrape_interface,
+    inputs=gr.Textbox(label="Naver Blog URL"),
+    outputs=gr.Textbox(label="Scraped Content"),
+    title="Naver Blog Scraper",
+    description="Enter the URL of a Naver blog to scrape its title and content."
+)
+if __name__ == "__main__":
+    interface.launch(debug=True)