aliceblue11 commited on
Commit
2f00f7a
·
verified ·
1 Parent(s): 15112ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -9
app.py CHANGED
@@ -2,11 +2,22 @@ import requests
2
  from bs4 import BeautifulSoup
3
  import gradio as gr
4
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  def scrape_naver_blog(url):
6
  try:
7
- # Check if the URL is a mobile URL
8
- if not url.startswith("https://m.blog.naver.com"):
9
- return "Error: Please provide a valid mobile URL (https://m.blog.naver.com)."
10
 
11
  # Send a GET request to the URL
12
  response = requests.get(url)
@@ -19,9 +30,9 @@ def scrape_naver_blog(url):
19
  title_div = soup.find('div', class_='se-module se-module-text se-title-text')
20
  title = title_div.get_text(strip=True) if title_div else "Title not found."
21
 
22
- # Extract the content
23
- content_divs = soup.find_all('div', class_='se-module se-module-text se-quote')
24
- content = "\n".join(div.get_text(strip=True) for div in content_divs) if content_divs else "Content not found."
25
 
26
  return f"제목: {title}\n내용: {content}"
27
 
@@ -34,10 +45,13 @@ def gradio_interface(url):
34
 
35
  iface = gr.Interface(
36
  fn=gradio_interface,
37
- inputs=gr.Textbox(label="Enter Naver Blog Mobile URL"),
38
  outputs=gr.Textbox(label="Scraped Blog Content"),
39
- title="Naver Blog Scraper",
40
- description="Enter a Naver Blog mobile URL to scrape the title and content (text only).",
 
 
 
41
  )
42
 
43
  if __name__ == "__main__":
 
2
  from bs4 import BeautifulSoup
3
  import gradio as gr
4
 
5
+ def convert_to_mobile_url(url):
6
+ """
7
+ Converts a standard Naver blog URL to its mobile version.
8
+ """
9
+ if url.startswith("https://blog.naver.com/"):
10
+ url_parts = url.split("/")
11
+ blog_id = url_parts[-2]
12
+ post_id = url_parts[-1]
13
+ mobile_url = f"https://m.blog.naver.com/{blog_id}/{post_id}"
14
+ return mobile_url
15
+ return url # Return the original URL if it's already in mobile format
16
+
17
  def scrape_naver_blog(url):
18
  try:
19
+ # Convert URL to mobile format if necessary
20
+ url = convert_to_mobile_url(url)
 
21
 
22
  # Send a GET request to the URL
23
  response = requests.get(url)
 
30
  title_div = soup.find('div', class_='se-module se-module-text se-title-text')
31
  title = title_div.get_text(strip=True) if title_div else "Title not found."
32
 
33
+ # Extract text content excluding images
34
+ text_components = soup.find_all('div', class_='se-module se-module-text')
35
+ content = "\n".join(component.get_text(strip=True) for component in text_components if component)
36
 
37
  return f"제목: {title}\n내용: {content}"
38
 
 
45
 
46
  iface = gr.Interface(
47
  fn=gradio_interface,
48
+ inputs=gr.Textbox(label="Enter Naver Blog URL (Standard or Mobile)"),
49
  outputs=gr.Textbox(label="Scraped Blog Content"),
50
+ title="Naver Blog Scraper (Text Only)",
51
+ description=(
52
+ "Enter a Naver Blog URL (standard or mobile) to scrape the title and text content only. "
53
+ "The script will automatically convert standard URLs to mobile format."
54
+ ),
55
  )
56
 
57
  if __name__ == "__main__":