Spaces:

aliceblue11
/

naver_blog_00

Sleeping

App Files Files Community

aliceblue11 commited on Jan 13

Commit

8ec2a9c

verified ·

1 Parent(s): 154ebc5

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -0

app.py CHANGED Viewed

@@ -5,43 +5,57 @@ from bs4 import BeautifulSoup
 # Function to scrape Naver blog title and content
 def scrape_naver_blog(url):
     try:
         # Send a GET request to the URL
         headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
         }
         response = requests.get(url, headers=headers)
         response.raise_for_status()  # Raise an exception for HTTP errors
         # Parse the HTML using BeautifulSoup
         soup = BeautifulSoup(response.text, 'html.parser')
         try:
             # Extract title
             title_element = soup.select_one(
                 "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(2) > div > p > span"
             )
             title = title_element.get_text(strip=True) if title_element else "Title not found"
         except Exception as e:
             title = f"Error extracting title: {e}"
         try:
             # Extract content
             content_element = soup.select_one(
                 "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4) > div > div > div > p:nth-of-type(1) > span"
             )
             content = content_element.get_text(strip=True) if content_element else "Content not found"
         except Exception as e:
             content = f"Error extracting content: {e}"
         # Return the results
         return f"제목: {title}\n내용: {content}"
     except Exception as e:
         return f"Error fetching the page: {e}"
 # Gradio Interface
 def scrape_interface(url):
     result = scrape_naver_blog(url)
     return result
 interface = gr.Interface(
@@ -53,4 +67,5 @@ interface = gr.Interface(
 )
 if __name__ == "__main__":
     interface.launch(debug=True)

 # Function to scrape Naver blog title and content
 def scrape_naver_blog(url):
     try:
+        print("Starting the scraping process...")  # Debugging
+        print(f"Target URL: {url}")  # Debugging
         # Send a GET request to the URL
         headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
         }
         response = requests.get(url, headers=headers)
         response.raise_for_status()  # Raise an exception for HTTP errors
+        print("Successfully fetched the page content.")  # Debugging
         # Parse the HTML using BeautifulSoup
         soup = BeautifulSoup(response.text, 'html.parser')
+        print("HTML parsing completed.")  # Debugging
         try:
             # Extract title
+            print("Attempting to extract the title...")  # Debugging
             title_element = soup.select_one(
                 "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div > div:nth-of-type(2) > div > p > span"
             )
             title = title_element.get_text(strip=True) if title_element else "Title not found"
+            print(f"Extracted Title: {title}")  # Debugging
         except Exception as e:
             title = f"Error extracting title: {e}"
+            print(f"Title extraction error: {e}")  # Debugging
         try:
             # Extract content
+            print("Attempting to extract the content...")  # Debugging
             content_element = soup.select_one(
                 "body > div:nth-of-type(7) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div > div:nth-of-type(8) > div:nth-of-type(1) > div > table:nth-of-type(2) > tbody > tr > td:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(3) > div:nth-of-type(4) > div > div > div > p:nth-of-type(1) > span"
             )
             content = content_element.get_text(strip=True) if content_element else "Content not found"
+            print(f"Extracted Content: {content}")  # Debugging
         except Exception as e:
             content = f"Error extracting content: {e}"
+            print(f"Content extraction error: {e}")  # Debugging
         # Return the results
         return f"제목: {title}\n내용: {content}"
     except Exception as e:
+        print(f"Error fetching the page: {e}")  # Debugging
         return f"Error fetching the page: {e}"
 # Gradio Interface
 def scrape_interface(url):
+    print("Scrape interface triggered.")  # Debugging
     result = scrape_naver_blog(url)
+    print("Scrape process completed.")  # Debugging
     return result
 interface = gr.Interface(
 )
 if __name__ == "__main__":
+    print("Launching Gradio interface...")  # Debugging
     interface.launch(debug=True)