Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -20,6 +20,7 @@ import json
|
|
20 |
import pickle
|
21 |
from nltk.tokenize import word_tokenize
|
22 |
from nltk.stem.lancaster import LancasterStemmer
|
|
|
23 |
|
24 |
# Ensure necessary NLTK resources are downloaded
|
25 |
nltk.download('punkt')
|
@@ -120,13 +121,20 @@ def install_chrome_and_driver():
|
|
120 |
os.system("google-chrome-stable --version")
|
121 |
|
122 |
# Fix ownership of /etc/sudo.conf
|
123 |
-
os.system("chown root:root /etc/sudo.conf")
|
124 |
|
125 |
# Verify Chrome installation
|
126 |
os.system("which google-chrome-stable")
|
127 |
if not os.path.exists("/usr/bin/google-chrome-stable"):
|
128 |
raise RuntimeError("Google Chrome was not installed correctly")
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
# Install Chromedriver (if not already installed)
|
131 |
chromedriver_autoinstaller.install()
|
132 |
|
@@ -210,7 +218,7 @@ def scrape_website_for_contact_info(website):
|
|
210 |
if phone_match:
|
211 |
phone_number = phone_match.group()
|
212 |
|
213 |
-
|
214 |
if email_match:
|
215 |
email = email_match.group()
|
216 |
|
|
|
20 |
import pickle
|
21 |
from nltk.tokenize import word_tokenize
|
22 |
from nltk.stem.lancaster import LancasterStemmer
|
23 |
+
import subprocess
|
24 |
|
25 |
# Ensure necessary NLTK resources are downloaded
|
26 |
nltk.download('punkt')
|
|
|
121 |
os.system("google-chrome-stable --version")
|
122 |
|
123 |
# Fix ownership of /etc/sudo.conf
|
124 |
+
os.system("sudo chown root:root /etc/sudo.conf")
|
125 |
|
126 |
# Verify Chrome installation
|
127 |
os.system("which google-chrome-stable")
|
128 |
if not os.path.exists("/usr/bin/google-chrome-stable"):
|
129 |
raise RuntimeError("Google Chrome was not installed correctly")
|
130 |
|
131 |
+
# Check if CUDA libraries are available and install them if present
|
132 |
+
try:
|
133 |
+
os.system("apt-get install -y cuda")
|
134 |
+
os.system("apt-get install -y libcudart.so.11.0")
|
135 |
+
except subprocess.CalledProcessError:
|
136 |
+
print("CUDA libraries not found or installation failed. Proceeding without GPU support.")
|
137 |
+
|
138 |
# Install Chromedriver (if not already installed)
|
139 |
chromedriver_autoinstaller.install()
|
140 |
|
|
|
218 |
if phone_match:
|
219 |
phone_number = phone_match.group()
|
220 |
|
221 |
+
email_match = re.search(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', soup.get_text())
|
222 |
if email_match:
|
223 |
email = email_match.group()
|
224 |
|