File size: 7,175 Bytes
9e6917b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import json
import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_restaurant_links(city, location):
    base_url = "https://deliveroo.ae"
    url = f"{base_url}/restaurants/{city}/{location}/?collection=restaurants"

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Cookie': '__cf_bm=oakl46sJ3V9vwmnIIbfXWfkHbGmmC2pH56GyTI33b4U-1715931048-1.0.1.1-4XOcSGSThZV_INfpn3aptlo8jpZtLFbYoLsZxP9BpQ8LIjq3wBIe8CPlSf0AomuniXy4TZWyVlBQBTlrm.CPiSfI1jzx18y9zxwc9GX0fmo; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=2e989653-2776-4ede-a52e-b610f1ad64a2'
    }

    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        if "We couldn't find" in soup.text or "No restaurants" in soup.text:
            print("No restaurants found for the specified location.")
            return []

        divs = soup.find_all('div', class_=["HomeFeedScrollTracker-bd9a6ffea8a4b4b7", "HomeFeedUICard-157f7be5d7b2fa7b"])

        hrefs = [a_tag['href'] for div in divs for a_tag in div.find_all('a', href=True)]
        hrefs = hrefs[:20]
        return [f"{base_url}{href}" for href in hrefs]
    else:
        print("Response timed out.")
        return []

def Excel_final(urls):
    def fetch_restaurant_data(url):
        headers = {
            'Cookie': '__cf_bm=_AOZtAiObnqBHPy4zhGRgBLW9xg9WiaDCRzg5E0sbMk-1715757967-1.0.1.1-xZNMBsnAqy_tfjUveujgfzT4Usw5ur4u7L0JlCcNXAQIC6Cq6wj46vPH7RLTh0Gq90JENxl7kbzjyOUFaBr8yCkmRGmt7APITEk0kkXzLTs; roo_guid=c40617a7-76f7-432c-b780-f2653cd2edfe; roo_session_guid=5846d6f0-5b7f-4598-8c6d-82b8023fd4fc'
        }
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            print(f"Failed to fetch the URL: {url}")
            return None
        soup = BeautifulSoup(response.content, 'html.parser')
        script_tag = soup.find('script', id='__NEXT_DATA__')
        if not script_tag:
            print("Script tag not found")
            return None
        json_data = json.loads(script_tag.string)
        json_data = json_data['props']['initialState']['menuPage']['menu']['meta']
        items = json_data['items']
        categories = json_data['categories']
        category_map = {category['id']: category['name'] for category in categories}
        modifier_groups = json_data['modifierGroups']
        modifier_groups_dict = {modifier_group['id']: modifier_group for modifier_group in modifier_groups}
        items_with_modifiers = []
        current_category = None
        current_category_position = 0
        for item in items:
            category_id = item['categoryId']
            category_name = category_map.get(category_id, 'Unknown')
            if category_name == "Unknown":
                continue
            if category_name != current_category:
                current_category = category_name
                current_category_position += 1
                item_position = 1
            else:
                item_position += 1
            item_with_modifiers = {
                "id": item['id'],
                "category_id": category_id,
                "category_name": category_name,
                "category_position": current_category_position,
                "item_position": item_position,
                "name": item['name'],
                "description": item.get('description', ''),
                "price": item['price']['formatted'],
                "img_url": item.get('image').get('url', '') if item.get('image') else '',
                "modifier_groups": [modifier_groups_dict.get(modifier_group_id, {}) for modifier_group_id in item.get('modifierGroupIds', [])],
            }
            items_with_modifiers.append(item_with_modifiers)
        return items_with_modifiers

    def save_data_to_excel(data, sheet_name, writer):
        rows = []
        max_options = 0
        # Find the maximum number of options for any modifier group
        for item in data:
            for modifier_group in item['modifier_groups']:
                num_options = len(modifier_group.get('modifierOptions', []))
                if num_options > max_options:
                    max_options = num_options
        for item in data:
            base_row = [
                item['category_name'],
                item['category_position'],
                item['item_position'],
                item['name'],
                item['description'],
                item['price'],
                item['img_url'],
            ]
            first_modifier_group = True
            for modifier_group in item['modifier_groups']:
                modifier_group_row = base_row + [
                    modifier_group.get('name', ''),
                    modifier_group.get('minSelection', ''),
                    modifier_group.get('maxSelection', '')
                ]
                options = modifier_group.get('modifierOptions', [])
                for option in options:
                    modifier_group_row += [
                        option.get('name', ''),
                        option['price']['formatted'] if option.get('price') else ''
                    ]
                # Fill in the remaining columns with empty strings if there are fewer options than max_options
                modifier_group_row += [''] * (max_options * 2 - len(options) * 2)
                if first_modifier_group:
                    rows.append(modifier_group_row)
                    first_modifier_group = False
                else:
                    rows.append([''] * len(base_row) + modifier_group_row[len(base_row):])
            if not item['modifier_groups']:
                rows.append(base_row + [''] * (max_options * 2 + 3))
        # Create column headers
        columns = [
            'Category Name', 'Category Position', 'Item Position', 'Item Name', 'Description', 'Item Price', 'Image URL', 'Modifier Group Name', 'Min Selection', 'Max Selection'
        ]
        for i in range(1, max_options + 1):
            columns += [f'Option {i} Name', f'Option {i} Price']
        df = pd.DataFrame(rows, columns=columns)
        if 'Max Selection' in df.columns:
            max_column_index = df.columns.get_loc('Max Selection')
            for i in range(max_column_index + 1, len(df.columns)):
                df.rename(columns={df.columns[i]: ''}, inplace=True)
        df.to_excel(writer, sheet_name=sheet_name, index=False)

    with pd.ExcelWriter("restaurant_data.xlsx", engine='xlsxwriter') as writer:
        for idx, url in enumerate(urls):
            data = fetch_restaurant_data(url)
            if data:
                save_data_to_excel(data, f'Sheet{idx+1}', writer)
    print("Data saved to restaurant_data.xlsx")

if __name__ == "__main__":
    city = input("Enter the city: ")
    location = input("Enter the location: ")

    urls = fetch_restaurant_links(city, location)
    
    if urls:
        Excel_final(urls)
    else:
        print("No restaurant links found or unable to fetch data.")