AIRider commited on
Commit
4431f41
·
verified ·
1 Parent(s): 633dacd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py CHANGED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from selenium import webdriver
3
+ from selenium.webdriver.chrome.service import Service
4
+ from selenium.webdriver.common.by import By
5
+ from selenium.webdriver.chrome.options import Options
6
+ from webdriver_manager.chrome import ChromeDriverManager
7
+ import time
8
+
9
+ def scrape_blog(url):
10
+ debug_logs = [] # 디버깅 메시지 저장용
11
+
12
+ # Selenium WebDriver 설정
13
+ chrome_options = Options()
14
+ chrome_options.add_argument("--headless") # 브라우저 창을 띄우지 않음
15
+ chrome_options.add_argument("--no-sandbox")
16
+ chrome_options.add_argument("--disable-dev-shm-usage")
17
+
18
+ driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
19
+ debug_logs.append("WebDriver initialized.")
20
+
21
+ try:
22
+ driver.get(url)
23
+ debug_logs.append(f"Navigated to {url}")
24
+
25
+ time.sleep(3) # 페이지 로드 대기
26
+ debug_logs.append("Waited for page to load.")
27
+
28
+ # 제목 크롤링
29
+ try:
30
+ title_xpath = "/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[1]/div/div/div[2]/div/p/span"
31
+ title_element = driver.find_element(By.XPATH, title_xpath)
32
+ title = title_element.text.strip()
33
+ debug_logs.append(f"Title extracted: {title}")
34
+ except Exception as e:
35
+ title = "Error extracting title"
36
+ debug_logs.append(f"Error extracting title: {e}")
37
+
38
+ # 내용 크롤링
39
+ try:
40
+ content_xpath = "/html/body/div[7]/div[1]/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[8]/div[1]/div/table[2]/tbody/tr/td[2]/div[1]/div/div[3]/div[4]/div/div/div/p[1]/span"
41
+ content_element = driver.find_element(By.XPATH, content_xpath)
42
+ content = content_element.text.strip()
43
+ debug_logs.append(f"Content extracted: {content}")
44
+ except Exception as e:
45
+ content = "Error extracting content"
46
+ debug_logs.append(f"Error extracting content: {e}")
47
+
48
+ except Exception as e:
49
+ title = "Error accessing blog"
50
+ content = "Error accessing blog"
51
+ debug_logs.append(f"Error accessing blog: {e}")
52
+
53
+ finally:
54
+ driver.quit()
55
+ debug_logs.append("WebDriver closed.")
56
+
57
+ return {"title": title, "content": content, "debug_logs": debug_logs}
58
+
59
+ def interface_function(url):
60
+ result = scrape_blog(url)
61
+ debug_output = "\n".join(result["debug_logs"])
62
+ return f"제목: {result['title']}\n\n내용: {result['content']}\n\n[Debug Logs]\n{debug_output}"
63
+
64
+ # Gradio 인터페이스 설정
65
+ with gr.Blocks() as demo:
66
+ gr.Markdown("# 네이버 블로그 크롤러")
67
+ gr.Markdown("블로그 URL을 입력하면 제목과 내용을 추출합니다.")
68
+
69
+ with gr.Row():
70
+ url_input = gr.Textbox(label="네이버 블로그 URL")
71
+ submit_button = gr.Button("크롤링 시작")
72
+
73
+ output = gr.Textbox(label="결과")
74
+
75
+ submit_button.click(interface_function, inputs=url_input, outputs=output)
76
+
77
+ # 앱 실행
78
+ demo.launch()