Spaces:
Sleeping
Sleeping
File size: 6,674 Bytes
e79fbb1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
def EatEasy_excel_extract(url_input):
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import math
from io import BytesIO
payload1 = {'restId': '17902'}
files = []
headers2 = {
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
}
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
}
# url_input = input("Enter the restaurant URL: ")
parsed_url = urlparse(url_input)
restaurant_code = parsed_url.path.strip('/').split('/')[-1]
url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
data = {
"restCode": restaurant_code
}
response = requests.post(url, data=data)
menu_items_list = []
category_name_list = []
j = 0
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
for i, item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
img = item_box.find('img') # Find the img tag within the div
if img: # Check if img tag exists
image_url = img.get('data-image') # Get the value of 'data-image' attribute
else:
image_url = None # If image URL is not found
menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
if menu_item_details:
type_ = menu_item_details.find('p', class_='type').text.strip()
name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
description = menu_item_details.find('p', itemprop='description').text.strip()
price = menu_item_details.find('div', class_='menu-item-price').text.strip()
else:
# If menu-item-details not found, set defaults
type_ = ""
name = ""
description = ""
price = ""
menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
if menu_list_options:
value = menu_list_options.get('value') # Get the value attribute
else:
value = None
if type_ not in category_name_list:
category_name_list.append(type_)
j = j+1
Category_position = j
else:
Category_position = j
menu_item = {
"Category": type_,
"Category_position": Category_position,
"Item_name": name,
"Item_position": i,
"Image": image_url,
"description": description,
"price": price,
"id": value,
}
menu_items_list.append(menu_item) # Append menu item before the request
if value is not None:
option_url = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{value}/1/"
option_response = requests.post(option_url, headers=headers2, data=payload1)
if option_response.status_code == 200:
try:
json_data = json.loads(option_response.text)
extracted_data = []
if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
for choice in json_data['arrResult']['arrFoodChoice']:
extracted_data.append({
'Option_group_name': choice['choice_name'],
'Option_name': choice['name'],
'Extra_price': choice['price'],
'Min': choice.get('mandatory', 0),
'Max': choice.get('max_choice', 1)
})
grouped_data = {}
for choice in extracted_data:
group_name = choice['Option_group_name']
if group_name not in grouped_data:
grouped_data[group_name] = {
'Option_group_name': group_name,
'Min': choice['Min'],
'Max': max(choice['Max'], '1'),
}
num_options = sum(key.startswith('Option ') for key in grouped_data[group_name])
option_index = num_options + 1 # Index for the new option
grouped_data[group_name][f"Option {option_index} Name"] = choice['Option_name']
grouped_data[group_name][f"Option {option_index} Price"] = choice['Extra_price']
for group_data in grouped_data.values():
menu_items_list.append(group_data)
except json.JSONDecodeError:
print("JSON decoding error. Response content may not be in valid JSON format.")
else:
print(f"Failed to get data for item with value {value}. Status code: {option_response.status_code}")
df = pd.DataFrame(menu_items_list)
# Ensure 'Max' column exists before renaming or shifting
if 'Max' in df.columns:
max_column_index = df.columns.get_loc('Max')
for i in range(max_column_index + 1, len(df.columns)):
df.rename(columns={df.columns[i]: ''}, inplace=True)
option_group_name_index = df.columns.get_loc('Option_group_name')
for i in range(option_group_name_index, len(df.columns)):
df.iloc[:, i] = df.iloc[:, i].shift(-1)
df_cleaned = df.dropna(how='all')
output = BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df_cleaned.to_excel(writer, index=False, sheet_name='Menu')
output.seek(0)
return output, f"{restaurant_code}_menu.xlsx"
# excel_file = f"{restaurant_code}_menu.xlsx"
# df.to_excel(excel_file, index=False)
# print(f"Menu items saved to {excel_file}")
else:
print(f"Failed to get menu items. Status code: {response.status_code}")
return True
|