ocr_api2

Sleeping

Arafath10 commited on Aug 7, 2024

Commit

5f74ea1

verified ·

1 Parent(s): 4454b97

Update scraper.py

Files changed (1) hide show

scraper.py CHANGED Viewed

@@ -97,16 +97,20 @@ class Scraper:
     @staticmethod
     async def scrape(url):
-        headers = {'User-Agent': 'Mozilla/5.0'}
-        response = requests.get(url)
-        soup = BeautifulSoup(response.content, 'html.parser')
-        title = Scraper.get_title(soup)
-        links = Scraper.get_links(soup)
-        text_content = Scraper.get_text_content(soup)
-        if not links:
-            print("Running alternative scrapper")
             links, text_content = await Scraper.power_scrapper_2(url)
-        return {"title": title, "URL": links, "Content": text_content}

     @staticmethod
     async def scrape(url):
+        try:
+            headers = {'User-Agent': 'Mozilla/5.0'}
+            response = requests.get(url)
+            soup = BeautifulSoup(response.content, 'html.parser')
+            title = Scraper.get_title(soup)
+            links = Scraper.get_links(soup)
+            text_content = Scraper.get_text_content(soup)
+            if not links:
+                print("Running alternative scrapper")
+                links, text_content = await Scraper.power_scrapper_2(url)
+            return {"title": title, "URL": links, "Content": text_content}
+        except:
             links, text_content = await Scraper.power_scrapper_2(url)
+            return {"title": title, "URL": links, "Content": text_content}