Shreyas094
commited on
Commit
•
90e8b4f
1
Parent(s):
8428e23
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ import os
|
|
21 |
from dotenv import load_dotenv
|
22 |
import certifi
|
23 |
from bs4 import BeautifulSoup
|
24 |
-
|
25 |
|
26 |
# Load environment variables from a .env file
|
27 |
load_dotenv()
|
@@ -90,10 +90,6 @@ def scrape_with_bs4(url, session, max_chars=None):
|
|
90 |
logger.error(f"Error scraping {url} with BeautifulSoup: {e}")
|
91 |
return ""
|
92 |
|
93 |
-
from bs4 import BeautifulSoup
|
94 |
-
from trafilatura import extract
|
95 |
-
import requests
|
96 |
-
|
97 |
def scrape_with_trafilatura(url, max_chars=None, timeout=5, use_beautifulsoup=False):
|
98 |
try:
|
99 |
response = requests.get(url, timeout=timeout)
|
|
|
21 |
from dotenv import load_dotenv
|
22 |
import certifi
|
23 |
from bs4 import BeautifulSoup
|
24 |
+
import requests
|
25 |
|
26 |
# Load environment variables from a .env file
|
27 |
load_dotenv()
|
|
|
90 |
logger.error(f"Error scraping {url} with BeautifulSoup: {e}")
|
91 |
return ""
|
92 |
|
|
|
|
|
|
|
|
|
93 |
def scrape_with_trafilatura(url, max_chars=None, timeout=5, use_beautifulsoup=False):
|
94 |
try:
|
95 |
response = requests.get(url, timeout=timeout)
|