Spaces:

AIRider
/

blogcr111111

Sleeping

App Files Files Community

AIRider commited on Jan 13

Commit

4431f41

verified ·

1 Parent(s): 633dacd

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -0

app.py CHANGED Viewed

	@@ -0,0 +1,78 @@

+import gradio as gr
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options
+from webdriver_manager.chrome import ChromeDriverManager
+import time
+def scrape_blog(url):
+    debug_logs = []  # 디버깅 메시지 저장용
+    # Selenium WebDriver 설정
+    chrome_options = Options()
+    chrome_options.add_argument("--headless")  # 브라우저 창을 띄우지 않음
+    chrome_options.add_argument("--no-sandbox")
+    chrome_options.add_argument("--disable-dev-shm-usage")
+    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
+    debug_logs.append("WebDriver initialized.")
+    try:
+        driver.get(url)
+        debug_logs.append(f"Navigated to {url}")
+        time.sleep(3)  # 페이지 로드 대기
+        debug_logs.append("Waited for page to load.")
+        # 제목 크롤링
+        try:
+            title_xpath = "/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[1]/div/div/div[2]/div/p/span"
+            title_element = driver.find_element(By.XPATH, title_xpath)
+            title = title_element.text.strip()
+            debug_logs.append(f"Title extracted: {title}")
+        except Exception as e:
+            title = "Error extracting title"
+            debug_logs.append(f"Error extracting title: {e}")
+        # 내용 크롤링
+        try:
+            content_xpath = "/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[3]/div[4]/div/div/div/p[1]/span"
+            content_element = driver.find_element(By.XPATH, content_xpath)
+            content = content_element.text.strip()
+            debug_logs.append(f"Content extracted: {content}")
+        except Exception as e:
+            content = "Error extracting content"
+            debug_logs.append(f"Error extracting content: {e}")
+    except Exception as e:
+        title = "Error accessing blog"
+        content = "Error accessing blog"
+        debug_logs.append(f"Error accessing blog: {e}")
+    finally:
+        driver.quit()
+        debug_logs.append("WebDriver closed.")
+    return {"title": title, "content": content, "debug_logs": debug_logs}
+def interface_function(url):
+    result = scrape_blog(url)
+    debug_output = "\n".join(result["debug_logs"])
+    return f"제목: {result['title']}\n\n내용: {result['content']}\n\n[Debug Logs]\n{debug_output}"
+# Gradio 인터페이스 설정
+with gr.Blocks() as demo:
+    gr.Markdown("# 네이버 블로그 크롤러")
+    gr.Markdown("블로그 URL을 입력하면 제목과 내용을 추출합니다.")
+    with gr.Row():
+        url_input = gr.Textbox(label="네이버 블로그 URL")
+        submit_button = gr.Button("크롤링 시작")
+    output = gr.Textbox(label="결과")
+    submit_button.click(interface_function, inputs=url_input, outputs=output)
+# 앱 실행
+demo.launch()