Spaces:

Viraj2307
/

TE-Scrapper

Sleeping

TE-Scrapper / EatEasy_excel_only.py

viraj

Initial Commit

e79fbb1 7 months ago

6.67 kB

	def EatEasy_excel_extract(url_input):

	import requests
	import json
	import pandas as pd
	from bs4 import BeautifulSoup
	from urllib.parse import urlparse
	import math
	from io import BytesIO

	payload1 = {'restId': '17902'}
	files = []

	headers2 = {
	'Content-Type': 'application/x-www-form-urlencoded',
	'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
	}
	headers = {
	'Content-Type': 'application/x-www-form-urlencoded',
	}

	# url_input = input("Enter the restaurant URL: ")

	parsed_url = urlparse(url_input)
	restaurant_code = parsed_url.path.strip('/').split('/')[-1]

	url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
	data = {
	"restCode": restaurant_code
	}

	response = requests.post(url, data=data)

	menu_items_list = []
	category_name_list = []
	j = 0

	if response.status_code == 200:
	soup = BeautifulSoup(response.text, 'html.parser')
	menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'

	for i, item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
	img = item_box.find('img') # Find the img tag within the div
	if img: # Check if img tag exists
	image_url = img.get('data-image') # Get the value of 'data-image' attribute
	else:
	image_url = None # If image URL is not found

	menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
	if menu_item_details:
	type_ = menu_item_details.find('p', class_='type').text.strip()
	name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
	description = menu_item_details.find('p', itemprop='description').text.strip()
	price = menu_item_details.find('div', class_='menu-item-price').text.strip()
	else:
	# If menu-item-details not found, set defaults
	type_ = ""
	name = ""
	description = ""
	price = ""

	menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
	if menu_list_options:
	value = menu_list_options.get('value') # Get the value attribute
	else:
	value = None

	if type_ not in category_name_list:
	category_name_list.append(type_)
	j = j+1
	Category_position = j

	else:
	Category_position = j

	menu_item = {
	"Category": type_,
	"Category_position": Category_position,
	"Item_name": name,
	"Item_position": i,
	"Image": image_url,
	"description": description,
	"price": price,
	"id": value,
	}

	menu_items_list.append(menu_item) # Append menu item before the request
	if value is not None:
	option_url = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{value}/1/"
	option_response = requests.post(option_url, headers=headers2, data=payload1)
	if option_response.status_code == 200:
	try:
	json_data = json.loads(option_response.text)
	extracted_data = []

	if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
	for choice in json_data['arrResult']['arrFoodChoice']:
	extracted_data.append({
	'Option_group_name': choice['choice_name'],
	'Option_name': choice['name'],
	'Extra_price': choice['price'],
	'Min': choice.get('mandatory', 0),
	'Max': choice.get('max_choice', 1)
	})

	grouped_data = {}
	for choice in extracted_data:
	group_name = choice['Option_group_name']
	if group_name not in grouped_data:
	grouped_data[group_name] = {
	'Option_group_name': group_name,
	'Min': choice['Min'],
	'Max': max(choice['Max'], '1'),
	}

	num_options = sum(key.startswith('Option ') for key in grouped_data[group_name])
	option_index = num_options + 1 # Index for the new option
	grouped_data[group_name][f"Option {option_index} Name"] = choice['Option_name']
	grouped_data[group_name][f"Option {option_index} Price"] = choice['Extra_price']

	for group_data in grouped_data.values():
	menu_items_list.append(group_data)

	except json.JSONDecodeError:
	print("JSON decoding error. Response content may not be in valid JSON format.")
	else:
	print(f"Failed to get data for item with value {value}. Status code: {option_response.status_code}")

	df = pd.DataFrame(menu_items_list)

	# Ensure 'Max' column exists before renaming or shifting
	if 'Max' in df.columns:
	max_column_index = df.columns.get_loc('Max')
	for i in range(max_column_index + 1, len(df.columns)):
	df.rename(columns={df.columns[i]: ''}, inplace=True)

	option_group_name_index = df.columns.get_loc('Option_group_name')
	for i in range(option_group_name_index, len(df.columns)):
	df.iloc[:, i] = df.iloc[:, i].shift(-1)

	df_cleaned = df.dropna(how='all')

	output = BytesIO()
	with pd.ExcelWriter(output, engine='openpyxl') as writer:
	df_cleaned.to_excel(writer, index=False, sheet_name='Menu')
	output.seek(0)

	return output, f"{restaurant_code}_menu.xlsx"
	# excel_file = f"{restaurant_code}_menu.xlsx"
	# df.to_excel(excel_file, index=False)
	# print(f"Menu items saved to {excel_file}")

	else:
	print(f"Failed to get menu items. Status code: {response.status_code}")

	return True