Spaces:
Sleeping
Sleeping
File size: 7,175 Bytes
9e6917b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import json
import requests
from bs4 import BeautifulSoup
import pandas as pd
def fetch_restaurant_links(city, location):
base_url = "https://deliveroo.ae"
url = f"{base_url}/restaurants/{city}/{location}/?collection=restaurants"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Cookie': '__cf_bm=oakl46sJ3V9vwmnIIbfXWfkHbGmmC2pH56GyTI33b4U-1715931048-1.0.1.1-4XOcSGSThZV_INfpn3aptlo8jpZtLFbYoLsZxP9BpQ8LIjq3wBIe8CPlSf0AomuniXy4TZWyVlBQBTlrm.CPiSfI1jzx18y9zxwc9GX0fmo; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=2e989653-2776-4ede-a52e-b610f1ad64a2'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
if "We couldn't find" in soup.text or "No restaurants" in soup.text:
print("No restaurants found for the specified location.")
return []
divs = soup.find_all('div', class_=["HomeFeedScrollTracker-bd9a6ffea8a4b4b7", "HomeFeedUICard-157f7be5d7b2fa7b"])
hrefs = [a_tag['href'] for div in divs for a_tag in div.find_all('a', href=True)]
hrefs = hrefs[:20]
return [f"{base_url}{href}" for href in hrefs]
else:
print("Response timed out.")
return []
def Excel_final(urls):
def fetch_restaurant_data(url):
headers = {
'Cookie': '__cf_bm=_AOZtAiObnqBHPy4zhGRgBLW9xg9WiaDCRzg5E0sbMk-1715757967-1.0.1.1-xZNMBsnAqy_tfjUveujgfzT4Usw5ur4u7L0JlCcNXAQIC6Cq6wj46vPH7RLTh0Gq90JENxl7kbzjyOUFaBr8yCkmRGmt7APITEk0kkXzLTs; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=5846d6f0-5b7f-4598-8c6d-82b8023fd4fc'
}
response = requests.get(url, headers=headers)
if response.status_code != 200:
print(f"Failed to fetch the URL: {url}")
return None
soup = BeautifulSoup(response.content, 'html.parser')
script_tag = soup.find('script', id='__NEXT_DATA__')
if not script_tag:
print("Script tag not found")
return None
json_data = json.loads(script_tag.string)
json_data = json_data['props']['initialState']['menuPage']['menu']['meta']
items = json_data['items']
categories = json_data['categories']
category_map = {category['id']: category['name'] for category in categories}
modifier_groups = json_data['modifierGroups']
modifier_groups_dict = {modifier_group['id']: modifier_group for modifier_group in modifier_groups}
items_with_modifiers = []
current_category = None
current_category_position = 0
for item in items:
category_id = item['categoryId']
category_name = category_map.get(category_id, 'Unknown')
if category_name == "Unknown":
continue
if category_name != current_category:
current_category = category_name
current_category_position += 1
item_position = 1
else:
item_position += 1
item_with_modifiers = {
"id": item['id'],
"category_id": category_id,
"category_name": category_name,
"category_position": current_category_position,
"item_position": item_position,
"name": item['name'],
"description": item.get('description', ''),
"price": item['price']['formatted'],
"img_url": item.get('image').get('url', '') if item.get('image') else '',
"modifier_groups": [modifier_groups_dict.get(modifier_group_id, {}) for modifier_group_id in item.get('modifierGroupIds', [])],
}
items_with_modifiers.append(item_with_modifiers)
return items_with_modifiers
def save_data_to_excel(data, sheet_name, writer):
rows = []
max_options = 0
# Find the maximum number of options for any modifier group
for item in data:
for modifier_group in item['modifier_groups']:
num_options = len(modifier_group.get('modifierOptions', []))
if num_options > max_options:
max_options = num_options
for item in data:
base_row = [
item['category_name'],
item['category_position'],
item['item_position'],
item['name'],
item['description'],
item['price'],
item['img_url'],
]
first_modifier_group = True
for modifier_group in item['modifier_groups']:
modifier_group_row = base_row + [
modifier_group.get('name', ''),
modifier_group.get('minSelection', ''),
modifier_group.get('maxSelection', '')
]
options = modifier_group.get('modifierOptions', [])
for option in options:
modifier_group_row += [
option.get('name', ''),
option['price']['formatted'] if option.get('price') else ''
]
# Fill in the remaining columns with empty strings if there are fewer options than max_options
modifier_group_row += [''] * (max_options * 2 - len(options) * 2)
if first_modifier_group:
rows.append(modifier_group_row)
first_modifier_group = False
else:
rows.append([''] * len(base_row) + modifier_group_row[len(base_row):])
if not item['modifier_groups']:
rows.append(base_row + [''] * (max_options * 2 + 3))
# Create column headers
columns = [
'Category Name', 'Category Position', 'Item Position', 'Item Name', 'Description', 'Item Price', 'Image URL', 'Modifier Group Name', 'Min Selection', 'Max Selection'
]
for i in range(1, max_options + 1):
columns += [f'Option {i} Name', f'Option {i} Price']
df = pd.DataFrame(rows, columns=columns)
if 'Max Selection' in df.columns:
max_column_index = df.columns.get_loc('Max Selection')
for i in range(max_column_index + 1, len(df.columns)):
df.rename(columns={df.columns[i]: ''}, inplace=True)
df.to_excel(writer, sheet_name=sheet_name, index=False)
with pd.ExcelWriter("restaurant_data.xlsx", engine='xlsxwriter') as writer:
for idx, url in enumerate(urls):
data = fetch_restaurant_data(url)
if data:
save_data_to_excel(data, f'Sheet{idx+1}', writer)
print("Data saved to restaurant_data.xlsx")
if __name__ == "__main__":
city = input("Enter the city: ")
location = input("Enter the location: ")
urls = fetch_restaurant_links(city, location)
if urls:
Excel_final(urls)
else:
print("No restaurant links found or unable to fetch data.")
|