Spaces:

KrishanRao
/

URL

Sleeping

KrishanRao commited on Jan 14

Commit

171a83a

verified ·

1 Parent(s): c3af3c6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,19 +5,31 @@
 import gradio as gr
-from urllib.request import urlopen, Request
 from bs4 import BeautifulSoup
 from transformers import pipeline
 import os
-# Function to extract text from the URL
 def extract_text(url):
     try:
-        req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
-        html = urlopen(req).read()
-        text = ' '.join(BeautifulSoup(html, "html.parser").stripped_strings)
         return text
-    except Exception as e:
         return f"Error extracting text from URL: {str(e)}"
 # Load Hugging Face model (for extracting named entities or QA)
@@ -106,3 +118,4 @@ demo = gr.Interface(
 if __name__ == "__main__":
     demo.launch(show_api=False)

 import gradio as gr
+import requests
 from bs4 import BeautifulSoup
 from transformers import pipeline
 import os
+# Function to extract text from the URL using requests
 def extract_text(url):
     try:
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive'
+        }
+        # Sending GET request with headers
+        response = requests.get(url, headers=headers)
+        # Check if the response is successful
+        response.raise_for_status()  # Raise an error for bad status codes
+        # Parse HTML and extract text
+        soup = BeautifulSoup(response.text, "html.parser")
+        text = ' '.join(soup.stripped_strings)
         return text
+    except requests.exceptions.RequestException as e:
         return f"Error extracting text from URL: {str(e)}"
 # Load Hugging Face model (for extracting named entities or QA)
 if __name__ == "__main__":
     demo.launch(show_api=False)