def Talabat_Json_extract(url): import json import requests import json from bs4 import BeautifulSoup import pandas as pd from urllib.parse import urlparse from io import BytesIO def extract_choices(item_id): choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices" response = requests.get(choice_url, headers=headers) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') choice_data = json.loads(soup.string.strip()) return choice_data else: print("Failed to retrieve choices for item ID:", item_id) return None # url = input("enter retro URL : ") parsed_url = urlparse(url) path_segments = parsed_url.path.split('/') restaurant_id = path_segments[-2] restaurant_name = path_segments[-1] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } response = requests.get(url, headers=headers) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') script_tag = soup.find('script', id='__NEXT_DATA__') if script_tag: json_content = json.loads(script_tag.string.strip()) menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items'] items = [] for item in menu_data: item_id = item['id'] name = item['name'] description = item['description'] price = item['price'] original_image = item['originalImage'] original_section = item['originalSection'] has_choices = item['hasChoices'] item_info = { 'category': original_section, 'category_postion': 1, 'item_name': name, 'item_position': 1, 'original_image': original_image, 'description': description, 'price': price, 'item_id': item_id, } if has_choices: option_groups_info = [] choice_data = extract_choices(item_id,restaurant_id) if choice_data: choice_for_item = choice_data["result"]['choiceForItem'][0] # Accessing the first element of the list choice_sections = choice_for_item['choiceSections'] for option_group in choice_sections: option_group_info = { 'option_group_name': option_group['nm'], 'min_quantity': option_group['mnq'], 'max_quantity': option_group['mxq'], 'option_group_names': [] } if 'ich' in option_group: option_group_names = option_group['ich'] for option_group_name in option_group_names: option_group_name_info = { 'option_name': option_group_name['nm'], 'option_price': option_group_name['pr'] } option_group_info['option_group_names'].append(option_group_name_info) option_groups_info.append(option_group_info) item_info['option_groups'] = option_groups_info items.append(item_info) # with open(f"{restaurant_name}.json", "w") as json_file: # json.dump(items, json_file, indent=4) json_content = json.dumps(items, indent=4) # Create BytesIO object to hold the JSON content output = BytesIO() output.write(json_content.encode('utf-8')) output.seek(0) return restaurant_name,output else: print("Script tag with id '__NEXT_DATA__' not found.") else: print("Failed to retrieve the webpage. Status code:", response.status_code) return True