TE-Scrapper / EatEasy_Json_only.py
viraj
Initial Commit
e79fbb1
def EatEasy_Json_extract(url_input):
import requests
import json
from bs4 import BeautifulSoup
from urllib.parse import urlparse
payload1 = {'restId': '17902'}
files = []
headers2 = {
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
}
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
}
parsed_url = urlparse(url_input)
restaurant_code = parsed_url.path.strip('/').split('/')[-1]
url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
data = {
"restCode": restaurant_code
}
response = requests.post(url_input, data=data)
menu_items_list = []
category_name_list = []
j = 0
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
location_area = soup.find('div', class_='location-area').text
location_name = soup.find('div', class_='location').text
for i,item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
img = item_box.find('img') # Find the img tag within the div
if img: # Check if img tag exists
image_url = img.get('data-image') # Get the value of 'data-image' attribute
else:
image_url = None # If image URL is not found
menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
if menu_item_details:
category = menu_item_details.find('p', class_='type').text.strip()
name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
description = menu_item_details.find('p', itemprop='description').text.strip()
price = menu_item_details.find('div', class_='menu-item-price').text.strip()
else:
# If menu-item-details not found, set defaults
category = ""
name = ""
description = ""
price = ""
menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
if menu_list_options:
item_id = menu_list_options.get('value') # Get the value attribute
else:
item_id = None
if category not in category_name_list:
category_name_list.append(category)
j = j+1
Category_position = j
else:
Category_position = j
menu_item = {
"Category": category,
"Category_position": Category_position,
"Item_name": name,
"Item_position": i,
"Image": image_url,
"Description": description,
"Price": price,
"ID": item_id,
"Option_groups": []
}
if item_id is not None:
url1 = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{item_id}/1/"
response = requests.request("POST", url1, headers=headers2, data=payload1, files=files)
if response.status_code == 200:
try:
json_data = response.json()
option_group_data = {}
if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
for option_group in json_data['arrResult']['arrFoodChoice']:
group_name = option_group['choice_name']
min_quantity = option_group['mandatory']
max_quantity = max((option_group['max_choice']), "1")
option_group_item = next((x for x in menu_item['Option_groups'] if x['Option_group_name'] == group_name), None)
if option_group_item:
option_group_item['Option_group_names'].append({
"Option_name": option_group['name'],
"Option_price": option_group['price']
})
else:
menu_item['Option_groups'].append({
"Option_group_name": group_name,
"Min": min_quantity,
"Max": max_quantity,
"Option_group_names": [{
"Option_name": option_group['name'],
"Option_price": option_group['price']
}]
})
except json.JSONDecodeError:
print("JSON decoding error. Response content may not be in valid JSON format.")
else:
print(f"Failed to get data for item with value {item_id}. Status code: {response.status_code}")
if not menu_item['Option_groups']:
menu_item["Has_choice"] = False
else:
menu_item["Has_choice"] = True
menu_items_list.append(menu_item)
else:
print(f"Failed to get menu items. Status code: {response.status_code}")
# Convert the list of menu items into JSON
# with open(f'{restaurant_code}.json', 'w') as json_file:
# json.dump(menu_items_list, json_file, indent=4)
print(f"Menu items saved to {restaurant_code}.json file.")
return json.dumps(menu_items_list, indent=4) , restaurant_code