AIRider commited on
Commit
5e53174
ยท
verified ยท
1 Parent(s): 2b61a85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -23
app.py CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
4
 
5
  def convert_to_mobile_url(url):
6
  """
7
- ์ž…๋ ฅ๋œ URL์„ ๋ชจ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜.
8
  """
9
  if "m.blog.naver.com" not in url:
10
  if "blog.naver.com" in url:
@@ -16,41 +16,45 @@ def convert_to_mobile_url(url):
16
  return url
17
 
18
  def scrape_naver_blog(url):
 
 
 
19
  try:
20
- # Debugging: URL ํ™•์ธ
21
- print(f"Original URL: {url}")
22
-
23
- # ๋ชจ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜
24
  mobile_url = convert_to_mobile_url(url)
25
  print(f"Converted Mobile URL: {mobile_url}")
26
-
27
  response = requests.get(mobile_url)
28
  response.raise_for_status()
29
-
30
- # Debugging: HTTP ์‘๋‹ต ์ƒํƒœ ํ™•์ธ
31
- print(f"Response Status Code: {response.status_code}")
32
-
33
  soup = BeautifulSoup(response.text, 'html.parser')
34
-
35
  # ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
36
  title_element = soup.find("div", class_="se-module se-module-text se-title-text")
37
  title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
38
-
39
- # Debugging: ์ œ๋ชฉ ํ™•์ธ
40
- print(f"Scraped Title: {title}")
41
-
42
- # ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
43
- content_elements = soup.find_all("div", class_="se-module se-module-text se-quote")
44
  content = "\n".join(
45
  elem.get_text(strip=True) for elem in content_elements
46
  ) if content_elements else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
47
-
48
- # Debugging: ๋‚ด์šฉ ํ™•์ธ
 
 
 
 
 
 
 
49
  print(f"Scraped Content: {content}")
50
-
51
- return f"์ œ๋ชฉ: {title}\n๋‚ด์šฉ: {content}"
 
 
 
 
52
  except Exception as e:
53
- # Debugging: ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
54
  print(f"Error: {e}")
55
  return f"Error: {e}"
56
 
@@ -63,7 +67,7 @@ interface = gr.Interface(
63
  inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL"),
64
  outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
65
  title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘",
66
- description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL์„ ์ž…๋ ฅํ•˜๋ฉด ๋ชจ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜ ํ›„ ์ œ๋ชฉ๊ณผ ํ…์ŠคํŠธ ๋‚ด์šฉ์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค."
67
  )
68
 
69
  if __name__ == "__main__":
 
4
 
5
  def convert_to_mobile_url(url):
6
  """
7
+ PC URL์„ ๋ชจ๋ฐ”์ผ URL๋กœ ๋ณ€ํ™˜.
8
  """
9
  if "m.blog.naver.com" not in url:
10
  if "blog.naver.com" in url:
 
16
  return url
17
 
18
  def scrape_naver_blog(url):
19
+ """
20
+ ๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ, ๋‚ด์šฉ, ์ด๋ฏธ์ง€ URL ์Šคํฌ๋ž˜ํ•‘.
21
+ """
22
  try:
23
+ # ๋ชจ๋ฐ”์ผ URL ๋ณ€ํ™˜
 
 
 
24
  mobile_url = convert_to_mobile_url(url)
25
  print(f"Converted Mobile URL: {mobile_url}")
26
+
27
  response = requests.get(mobile_url)
28
  response.raise_for_status()
29
+
 
 
 
30
  soup = BeautifulSoup(response.text, 'html.parser')
31
+
32
  # ์ œ๋ชฉ ์Šคํฌ๋ž˜ํ•‘
33
  title_element = soup.find("div", class_="se-module se-module-text se-title-text")
34
  title = title_element.get_text(strip=True) if title_element else "์ œ๋ชฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
35
+
36
+ # ๋ณธ๋ฌธ ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘
37
+ content_elements = soup.find_all("div", class_="se-module se-module-text")
 
 
 
38
  content = "\n".join(
39
  elem.get_text(strip=True) for elem in content_elements
40
  ) if content_elements else "๋‚ด์šฉ์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ"
41
+
42
+ # ์ด๋ฏธ์ง€ URL ์Šคํฌ๋ž˜ํ•‘
43
+ image_elements = soup.find_all("img", class_="se-image-resource")
44
+ image_urls = [
45
+ img["src"] for img in image_elements if "src" in img.attrs
46
+ ]
47
+
48
+ # ๋””๋ฒ„๊น… ๋ฉ”์‹œ์ง€ ์ถœ๋ ฅ
49
+ print(f"Scraped Title: {title}")
50
  print(f"Scraped Content: {content}")
51
+ print(f"Scraped Images: {image_urls}")
52
+
53
+ # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
54
+ result = f"์ œ๋ชฉ: {title}\n\n๋‚ด์šฉ: {content}\n\n์ด๋ฏธ์ง€ URL:\n" + "\n".join(image_urls)
55
+ return result
56
+
57
  except Exception as e:
 
58
  print(f"Error: {e}")
59
  return f"Error: {e}"
60
 
 
67
  inputs=gr.Textbox(label="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ URL"),
68
  outputs=gr.Textbox(label="์Šคํฌ๋ž˜ํ•‘ ๊ฒฐ๊ณผ"),
69
  title="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ ์Šคํฌ๋ž˜ํ•‘",
70
+ description="๋„ค์ด๋ฒ„ ๋ธ”๋กœ๊ทธ์˜ ์ œ๋ชฉ, ๋‚ด์šฉ, ์ด๋ฏธ์ง€ URL์„ ์Šคํฌ๋ž˜ํ•‘ํ•ฉ๋‹ˆ๋‹ค."
71
  )
72
 
73
  if __name__ == "__main__":