def Talabat_excel_extract(url): import requests import json import pandas as pd from bs4 import BeautifulSoup from urllib.parse import urlparse from io import BytesIO def extract_choices(item_id, restaurant_id): choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices" response = requests.get(choice_url, headers=headers) if response.status_code == 200: choice_data = response.json() return choice_data else: print("Failed to retrieve choices for item ID:", item_id) return None # url = input("Enter restaurant URL: ") parsed_url = urlparse(url) path_segments = parsed_url.path.split('/') restaurant_id = path_segments[-2] restaurant_name = path_segments[-1] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } response = requests.get(url, headers=headers) j = 0 category_name_list = [] if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') script_tag = soup.find('script', id='__NEXT_DATA__') if script_tag: json_content = json.loads(script_tag.string.strip()) menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items'] menu_items_list = [] for i,item in enumerate(menu_data): item_id = item['id'] name = item['name'] description = item['description'] price = item['price'] original_image = item['originalImage'] original_section = item['originalSection'] has_choices = item['hasChoices'] if original_section not in category_name_list: category_name_list.append(original_section) j = j+1 Category_position = j else: Category_position = j menu_item = { "Category": original_section, "Category_position": Category_position, "Item_name": name, "Item_position": i+1, "Image": original_image, "description": description, "price": price, "id": item_id } menu_items_list.append(menu_item) if has_choices: choice_data = extract_choices(item_id, restaurant_id) if choice_data: choice_for_item = choice_data["result"].get('choiceForItem', [])[0] # Accessing the first element of the list if exists choice_sections = choice_for_item.get('choiceSections', []) grouped_data = {} for option_group in choice_sections: option_group_name = option_group.get('nm', '') min_quantity = option_group.get('mnq', '') max_quantity = option_group.get('mxq', '') options = option_group.get('ich', []) for option_index, option in enumerate(options, start=1): option_name = option.get('nm', '') option_price = option.get('pr', '') grouped_data.setdefault(option_group_name, { "Option_group_name": option_group_name, "Min_quantity": min_quantity, "Max_quantity": max_quantity }) grouped_data[option_group_name][f"Option_{option_index}_Name"] = option_name grouped_data[option_group_name][f"Option_{option_index}_Price"] = option_price menu_items_list.extend(grouped_data.values()) df = pd.DataFrame(menu_items_list) if 'Max_quantity' in df.columns: max_column_index = df.columns.get_loc('Max_quantity') for i in range(max_column_index + 1, len(df.columns)): df.rename(columns={df.columns[i]: ''}, inplace=True) option_group_name_index = df.columns.get_loc('Option_group_name') for i in range(option_group_name_index, len(df.columns)): df.iloc[:, i] = df.iloc[:, i].shift(-1) df_cleaned = df.dropna(how='all') # excel_file = f"{restaurant_name}_menu.xlsx" # df.to_excel(excel_file, index=False) # print(f"Menu items saved to {excel_file}") excel_file = BytesIO() df_cleaned.to_excel(excel_file, index=False) excel_file.seek(0) # Move to the beginning of the BytesIO stream return excel_file, f"{restaurant_name}_menu.xlsx" else: print("Script tag with id '__NEXT_DATA__' not found.") else: print(f"Failed to get menu items. Status code: {response.status_code}")