import json import requests from bs4 import BeautifulSoup import pandas as pd def Excel_final(url): def fetch_restaurant_data(url): headers = { 'Cookie': '__cf_bm=_AOZtAiObnqBHPy4zhGRgBLW9xg9WiaDCRzg5E0sbMk-1715757967-1.0.1.1-xZNMBsnAqy_tfjUveujgfzT4Usw5ur4u7L0JlCcNXAQIC6Cq6wj46vPH7RLTh0Gq90JENxl7kbzjyOUFaBr8yCkmRGmt7APITEk0kkXzLTs; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=5846d6f0-5b7f-4598-8c6d-82b8023fd4fc' } response = requests.get(url, headers=headers) if response.status_code != 200: print(f"Failed to fetch the URL: {url}") return None soup = BeautifulSoup(response.content, 'html.parser') script_tag = soup.find('script', id='__NEXT_DATA__') if not script_tag: print("Script tag not found") return None json_data = json.loads(script_tag.string) json_data = json_data['props']['initialState']['menuPage']['menu']['meta'] items = json_data['items'] categories = json_data['categories'] category_map = {category['id']: category['name'] for category in categories} modifier_groups = json_data['modifierGroups'] modifier_groups_dict = {modifier_group['id']: modifier_group for modifier_group in modifier_groups} items_with_modifiers = [] current_category = None current_category_position = 0 for item in items: category_id = item['categoryId'] category_name = category_map.get(category_id, 'Unknown') if category_name == "Unknown": continue if category_name != current_category: current_category = category_name current_category_position += 1 item_position = 1 else: item_position += 1 item_with_modifiers = { "id": item['id'], "category_id": category_id, "category_name": category_name, "category_position": current_category_position, "item_position": item_position, "name": item['name'], "description": item.get('description', ''), "price": item['price']['formatted'], "img_url": item.get('image').get('url', '') if item.get('image') else '', "modifier_groups": [modifier_groups_dict.get(modifier_group_id, {}) for modifier_group_id in item.get('modifierGroupIds', [])], } items_with_modifiers.append(item_with_modifiers) return items_with_modifiers def save_data_to_excel(data): writer = pd.ExcelWriter("restaurant_data.xlsx", engine='xlsxwriter') rows = [] max_options = 0 # Find the maximum number of options for any modifier group for item in data: for modifier_group in item['modifier_groups']: num_options = len(modifier_group.get('modifierOptions', [])) if num_options > max_options: max_options = num_options for item in data: base_row = [ item['category_name'], item['category_position'], item['item_position'], item['name'], item['description'], item['price'], item['img_url'], ] first_modifier_group = True for modifier_group in item['modifier_groups']: modifier_group_row = base_row + [ modifier_group.get('name', ''), modifier_group.get('minSelection', ''), modifier_group.get('maxSelection', '') ] options = modifier_group.get('modifierOptions', []) for option in options: modifier_group_row += [ option.get('name', ''), option['price']['formatted'] if option.get('price') else '' ] # Fill in the remaining columns with empty strings if there are fewer options than max_options modifier_group_row += [''] * (max_options * 2 - len(options) * 2) if first_modifier_group: rows.append(modifier_group_row) first_modifier_group = False else: rows.append([''] * len(base_row) + modifier_group_row[len(base_row):]) if not item['modifier_groups']: rows.append(base_row + [''] * (max_options * 2 + 3)) # Create column headers columns = [ 'Category Name', 'Category Position', 'Item Position', 'Item Name', 'Description', 'Item Price', 'Image URL', 'Modifier Group Name', 'Min Selection', 'Max Selection' ] for i in range(1, max_options + 1): columns += [f'Option {i} Name', f'Option {i} Price'] df = pd.DataFrame(rows, columns=columns) if 'Max Selection' in df.columns: max_column_index = df.columns.get_loc('Max_quantity') for i in range(max_column_index + 1, len(df.columns)): df.rename(columns={df.columns[i]: ''}, inplace=True) df.to_excel(writer, sheet_name='Sheet1', index=False) writer.close() print("Data saved to restaurant_data.xlsx") data = fetch_restaurant_data(url) save_data_to_excel(data)