def EatEasy_excel_extract(url_input): import requests import json import pandas as pd from bs4 import BeautifulSoup from urllib.parse import urlparse import math from io import BytesIO payload1 = {'restId': '17902'} files = [] headers2 = { 'Content-Type': 'application/x-www-form-urlencoded', 'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g' } headers = { 'Content-Type': 'application/x-www-form-urlencoded', } # url_input = input("Enter the restaurant URL: ") parsed_url = urlparse(url_input) restaurant_code = parsed_url.path.strip('/').split('/')[-1] url = "https://www.eateasy.ae/dubai/food/getFilteredMenus" data = { "restCode": restaurant_code } response = requests.post(url, data=data) menu_items_list = [] category_name_list = [] j = 0 if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box' for i, item_box in enumerate(menu_item_boxes): # Iterate over each menu item box img = item_box.find('img') # Find the img tag within the div if img: # Check if img tag exists image_url = img.get('data-image') # Get the value of 'data-image' attribute else: image_url = None # If image URL is not found menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div if menu_item_details: type_ = menu_item_details.find('p', class_='type').text.strip() name = menu_item_details.find('h5', class_='menu-food-title').text.strip() description = menu_item_details.find('p', itemprop='description').text.strip() price = menu_item_details.find('div', class_='menu-item-price').text.strip() else: # If menu-item-details not found, set defaults type_ = "" name = "" description = "" price = "" menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag if menu_list_options: value = menu_list_options.get('value') # Get the value attribute else: value = None if type_ not in category_name_list: category_name_list.append(type_) j = j+1 Category_position = j else: Category_position = j menu_item = { "Category": type_, "Category_position": Category_position, "Item_name": name, "Item_position": i, "Image": image_url, "description": description, "price": price, "id": value, } menu_items_list.append(menu_item) # Append menu item before the request if value is not None: option_url = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{value}/1/" option_response = requests.post(option_url, headers=headers2, data=payload1) if option_response.status_code == 200: try: json_data = json.loads(option_response.text) extracted_data = [] if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']: for choice in json_data['arrResult']['arrFoodChoice']: extracted_data.append({ 'Option_group_name': choice['choice_name'], 'Option_name': choice['name'], 'Extra_price': choice['price'], 'Min': choice.get('mandatory', 0), 'Max': choice.get('max_choice', 1) }) grouped_data = {} for choice in extracted_data: group_name = choice['Option_group_name'] if group_name not in grouped_data: grouped_data[group_name] = { 'Option_group_name': group_name, 'Min': choice['Min'], 'Max': max(choice['Max'], '1'), } num_options = sum(key.startswith('Option ') for key in grouped_data[group_name]) option_index = num_options + 1 # Index for the new option grouped_data[group_name][f"Option {option_index} Name"] = choice['Option_name'] grouped_data[group_name][f"Option {option_index} Price"] = choice['Extra_price'] for group_data in grouped_data.values(): menu_items_list.append(group_data) except json.JSONDecodeError: print("JSON decoding error. Response content may not be in valid JSON format.") else: print(f"Failed to get data for item with value {value}. Status code: {option_response.status_code}") df = pd.DataFrame(menu_items_list) # Ensure 'Max' column exists before renaming or shifting if 'Max' in df.columns: max_column_index = df.columns.get_loc('Max') for i in range(max_column_index + 1, len(df.columns)): df.rename(columns={df.columns[i]: ''}, inplace=True) option_group_name_index = df.columns.get_loc('Option_group_name') for i in range(option_group_name_index, len(df.columns)): df.iloc[:, i] = df.iloc[:, i].shift(-1) df_cleaned = df.dropna(how='all') output = BytesIO() with pd.ExcelWriter(output, engine='openpyxl') as writer: df_cleaned.to_excel(writer, index=False, sheet_name='Menu') output.seek(0) return output, f"{restaurant_code}_menu.xlsx" # excel_file = f"{restaurant_code}_menu.xlsx" # df.to_excel(excel_file, index=False) # print(f"Menu items saved to {excel_file}") else: print(f"Failed to get menu items. Status code: {response.status_code}") return True