Spaces:
Sleeping
Sleeping
File size: 5,476 Bytes
9e6917b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import json
import requests
from bs4 import BeautifulSoup
import pandas as pd
def Excel_final(url):
def fetch_restaurant_data(url):
headers = {
'Cookie': '__cf_bm=_AOZtAiObnqBHPy4zhGRgBLW9xg9WiaDCRzg5E0sbMk-1715757967-1.0.1.1-xZNMBsnAqy_tfjUveujgfzT4Usw5ur4u7L0JlCcNXAQIC6Cq6wj46vPH7RLTh0Gq90JENxl7kbzjyOUFaBr8yCkmRGmt7APITEk0kkXzLTs; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=5846d6f0-5b7f-4598-8c6d-82b8023fd4fc'
}
response = requests.get(url, headers=headers)
if response.status_code != 200:
print(f"Failed to fetch the URL: {url}")
return None
soup = BeautifulSoup(response.content, 'html.parser')
script_tag = soup.find('script', id='__NEXT_DATA__')
if not script_tag:
print("Script tag not found")
return None
json_data = json.loads(script_tag.string)
json_data = json_data['props']['initialState']['menuPage']['menu']['meta']
items = json_data['items']
categories = json_data['categories']
category_map = {category['id']: category['name'] for category in categories}
modifier_groups = json_data['modifierGroups']
modifier_groups_dict = {modifier_group['id']: modifier_group for modifier_group in modifier_groups}
items_with_modifiers = []
current_category = None
current_category_position = 0
for item in items:
category_id = item['categoryId']
category_name = category_map.get(category_id, 'Unknown')
if category_name == "Unknown":
continue
if category_name != current_category:
current_category = category_name
current_category_position += 1
item_position = 1
else:
item_position += 1
item_with_modifiers = {
"id": item['id'],
"category_id": category_id,
"category_name": category_name,
"category_position": current_category_position,
"item_position": item_position,
"name": item['name'],
"description": item.get('description', ''),
"price": item['price']['formatted'],
"img_url": item.get('image').get('url', '') if item.get('image') else '',
"modifier_groups": [modifier_groups_dict.get(modifier_group_id, {}) for modifier_group_id in item.get('modifierGroupIds', [])],
}
items_with_modifiers.append(item_with_modifiers)
return items_with_modifiers
def save_data_to_excel(data):
writer = pd.ExcelWriter("restaurant_data.xlsx", engine='xlsxwriter')
rows = []
max_options = 0
# Find the maximum number of options for any modifier group
for item in data:
for modifier_group in item['modifier_groups']:
num_options = len(modifier_group.get('modifierOptions', []))
if num_options > max_options:
max_options = num_options
for item in data:
base_row = [
item['category_name'],
item['category_position'],
item['item_position'],
item['name'],
item['description'],
item['price'],
item['img_url'],
]
first_modifier_group = True
for modifier_group in item['modifier_groups']:
modifier_group_row = base_row + [
modifier_group.get('name', ''),
modifier_group.get('minSelection', ''),
modifier_group.get('maxSelection', '')
]
options = modifier_group.get('modifierOptions', [])
for option in options:
modifier_group_row += [
option.get('name', ''),
option['price']['formatted'] if option.get('price') else ''
]
# Fill in the remaining columns with empty strings if there are fewer options than max_options
modifier_group_row += [''] * (max_options * 2 - len(options) * 2)
if first_modifier_group:
rows.append(modifier_group_row)
first_modifier_group = False
else:
rows.append([''] * len(base_row) + modifier_group_row[len(base_row):])
if not item['modifier_groups']:
rows.append(base_row + [''] * (max_options * 2 + 3))
# Create column headers
columns = [
'Category Name', 'Category Position', 'Item Position', 'Item Name', 'Description', 'Item Price', 'Image URL', 'Modifier Group Name', 'Min Selection', 'Max Selection'
]
for i in range(1, max_options + 1):
columns += [f'Option {i} Name', f'Option {i} Price']
df = pd.DataFrame(rows, columns=columns)
if 'Max Selection' in df.columns:
max_column_index = df.columns.get_loc('Max_quantity')
for i in range(max_column_index + 1, len(df.columns)):
df.rename(columns={df.columns[i]: ''}, inplace=True)
df.to_excel(writer, sheet_name='Sheet1', index=False)
writer.close()
print("Data saved to restaurant_data.xlsx")
data = fetch_restaurant_data(url)
save_data_to_excel(data) |