Update app2.py
Browse files
app2.py
CHANGED
@@ -1333,6 +1333,11 @@ CITIES_BY_STATE = {
|
|
1333 |
"Wyoming": ["Cheyenne", "Casper", "Laramie", "Gillette", "Rock Springs", "Sheridan", "Green River", "Evanston", "Riverton", "Jackson", "Cody", "Rawlins", "Lander", "Torrington", "Powell", "Douglas", "Worland", "Buffalo", "Wheatland", "Newcastle"],
|
1334 |
"Washington, DC": ["Washington"]
|
1335 |
}
|
|
|
|
|
|
|
|
|
|
|
1336 |
def find_lawyers(practice_area, state, city=None):
|
1337 |
base_url = "https://www.justia.com/lawyers"
|
1338 |
formatted_practice_area = format_url_component(practice_area)
|
@@ -1340,36 +1345,57 @@ def find_lawyers(practice_area, state, city=None):
|
|
1340 |
|
1341 |
if city:
|
1342 |
formatted_city = format_url_component(city)
|
1343 |
-
search_url = f"{base_url}/{formatted_practice_area}/{formatted_state}/{formatted_city}"
|
1344 |
else:
|
1345 |
-
search_url = f"{base_url}/{formatted_practice_area}/{formatted_state}"
|
|
|
|
|
|
|
|
|
1346 |
|
1347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1348 |
soup = BeautifulSoup(response.content, 'html.parser')
|
1349 |
|
1350 |
lawyers = []
|
1351 |
lawyer_cards = soup.find_all('div', class_='lawyer-card')
|
1352 |
|
|
|
|
|
|
|
|
|
1353 |
for card in lawyer_cards[:5]: # Limit to top 5 results
|
1354 |
-
|
1355 |
-
|
1356 |
-
|
1357 |
-
|
1358 |
-
|
1359 |
-
|
1360 |
-
|
1361 |
-
|
1362 |
-
|
1363 |
-
|
1364 |
-
|
1365 |
-
|
1366 |
-
|
1367 |
-
|
1368 |
-
|
1369 |
-
|
1370 |
-
|
1371 |
-
|
1372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1373 |
|
1374 |
return lawyers
|
1375 |
|
|
|
1333 |
"Wyoming": ["Cheyenne", "Casper", "Laramie", "Gillette", "Rock Springs", "Sheridan", "Green River", "Evanston", "Riverton", "Jackson", "Cody", "Rawlins", "Lander", "Torrington", "Powell", "Douglas", "Worland", "Buffalo", "Wheatland", "Newcastle"],
|
1334 |
"Washington, DC": ["Washington"]
|
1335 |
}
|
1336 |
+
import requests
|
1337 |
+
from bs4 import BeautifulSoup
|
1338 |
+
import time
|
1339 |
+
import random
|
1340 |
+
|
1341 |
def find_lawyers(practice_area, state, city=None):
|
1342 |
base_url = "https://www.justia.com/lawyers"
|
1343 |
formatted_practice_area = format_url_component(practice_area)
|
|
|
1345 |
|
1346 |
if city:
|
1347 |
formatted_city = format_url_component(city)
|
1348 |
+
search_url = f"{base_url}/{formatted_practice_area}/{formatted_state}/{formatted_city}/"
|
1349 |
else:
|
1350 |
+
search_url = f"{base_url}/{formatted_practice_area}/{formatted_state}/"
|
1351 |
+
|
1352 |
+
headers = {
|
1353 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
1354 |
+
}
|
1355 |
|
1356 |
+
try:
|
1357 |
+
response = requests.get(search_url, headers=headers, timeout=10)
|
1358 |
+
response.raise_for_status()
|
1359 |
+
except requests.RequestException as e:
|
1360 |
+
print(f"Error fetching the webpage: {e}")
|
1361 |
+
return []
|
1362 |
+
|
1363 |
soup = BeautifulSoup(response.content, 'html.parser')
|
1364 |
|
1365 |
lawyers = []
|
1366 |
lawyer_cards = soup.find_all('div', class_='lawyer-card')
|
1367 |
|
1368 |
+
if not lawyer_cards:
|
1369 |
+
print(f"No lawyer cards found on the page. URL: {search_url}")
|
1370 |
+
return []
|
1371 |
+
|
1372 |
for card in lawyer_cards[:5]: # Limit to top 5 results
|
1373 |
+
try:
|
1374 |
+
name = card.find('h3', class_='lawyer-name').text.strip()
|
1375 |
+
location = card.find('div', class_='lawyer-location').text.strip()
|
1376 |
+
practice_areas = card.find('div', class_='lawyer-practice-areas').text.strip()
|
1377 |
+
profile_url = card.find('a', class_='lawyer-name')['href']
|
1378 |
+
|
1379 |
+
phone_elem = card.find('div', class_='lawyer-phone')
|
1380 |
+
phone = phone_elem.text.strip() if phone_elem else "N/A"
|
1381 |
+
|
1382 |
+
rating_elem = card.find('div', class_='lawyer-rating')
|
1383 |
+
rating = rating_elem.text.strip() if rating_elem else "N/A"
|
1384 |
+
|
1385 |
+
lawyers.append({
|
1386 |
+
'name': name,
|
1387 |
+
'location': location,
|
1388 |
+
'practice_areas': practice_areas,
|
1389 |
+
'phone': phone,
|
1390 |
+
'rating': rating,
|
1391 |
+
'profile_url': profile_url
|
1392 |
+
})
|
1393 |
+
except AttributeError as e:
|
1394 |
+
print(f"Error parsing lawyer card: {e}")
|
1395 |
+
continue
|
1396 |
+
|
1397 |
+
if not lawyers:
|
1398 |
+
print(f"No lawyers found on the page. URL: {search_url}")
|
1399 |
|
1400 |
return lawyers
|
1401 |
|