ND-Scrapper / Excel /Noonfood_excel_final.py
viraj
Initial Commit
9e6917b
import requests
import json
import pandas as pd
import numpy as np
from fastapi import FastAPI, Query
app=FastAPI()
latitude=int(input("Enter latitude:"))
longitude=int(input("Enter longitude:"))
url1=input("Enter the restaurant url:")
outlet_code = url1.split('/')[-2]
def extract_items_with_categories(menu):
items_list = []
categories_seen = set() # Set to keep track of categories that have been added
for category in menu['categories']:
category_name = category['name']
category_position = category['position'] if category['position'] != -1 else 0
if category_name not in categories_seen:
items_list.append({
'category': category_name,
'item': None, # Placeholder for item
'itemCode': None,
'item-position': None,
'img-url': None,
'price': None,
'Description': None,
'position': category_position
})
categories_seen.add(category_name)
for item_code in category['items']:
item = next((item for item in menu['items'] if item['itemCode'] == item_code), None)
if item:
items_list.append({
'category': '', # Empty string for subsequent items in the same category
'item': item['name'],
'itemCode': item['itemCode'],
'item-position': item['position'],
'img-url': "https://f.nooncdn.com/food_production/"+item['image'],
'price': item['price'],
'Description': item['itemDesc'],
'position': category['position']
})
return items_list
def extract_options(menu):
options_dict = {}
for item in menu['items']:
if 'modifiers' in item:
for modifier_code in item['modifiers']:
modifier = next((modifier for modifier in menu['modifiers'] if modifier['modifierCode'] == modifier_code), None)
if modifier:
if item['itemCode'] not in options_dict:
options_dict[item['itemCode']] = {}
if modifier['name'] not in options_dict[item['itemCode']]:
options_dict[item['itemCode']][modifier['name']] = {
'Min': modifier.get('minTotalOptions'),
'Max': modifier.get('maxTotalOptions'),
'Options': []
}
for option in modifier['options']:
option_item = next((i for i in menu['items'] if i['itemCode'] == option['itemCode']), None)
if option_item:
options_dict[item['itemCode']][modifier['name']]['Options'].append({
'Option name': option_item['name'],
'Option price': option['price']
})
return options_dict
# Make the request
url = "https://food.noon.com/_svc/mp-food-api-mpnoon/consumer/restaurant/outlet/details/guest"
payload = {
"addressLat": latitude,
"addressLng": longitude,
"deliveryType": "default",
"outletCode": outlet_code
}
headers = {
'Connection': 'keep-alive',
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"Cookie": "bm_mi=791533C8E67CE8E7DA98E80ADED70F69~YAAQRK0cuOep9tGPAQAAUYKw3RcGDAVhD+mtWU8IH76wZL29zl4qqCjMwGv8sKtYlQWZNaFftSOvHFOvQU4+3CY2uHZyjjK6I3GeNdKEn+XHupISeNc0K16GOXLqcPOwu4sADTmxE7PYQvSQE7eimhqsBiJVRd96R8W0D2hl31FlY/4rl+NPZvM3iXjrn2GO50VMv+HhGfCnDMBwApBxgpMWFLfs0u6EYy44mg/FXbom5s5pa3cro8AS35nYHbdUbi61K9fnWRVaF8L/4z0xh7V1AEQETevb5fdGF8aB9m2UG29p2W6KSMb8DyFZLpG3vl5+IRECqZdFxaUMnykO8G/ynRHG~1; Domain=.noon.com; Path=/; Expires=Mon, 03 Jun 2024 12:41:22 GMT; Max-Age=7199; Secure"
}
response = requests.post(url, headers=headers, json=payload)
json_data = response.json()
# Extract items and options
items = extract_items_with_categories(json_data['data']['menu'])
options = extract_options(json_data['data']['menu'])
# Create a DataFrame for items
items_df = pd.DataFrame(items)
options_list = []
for item_code, option_groups in options.items():
for group_name, group_data in option_groups.items():
row = {
'itemCode': item_code,
'Option Group Name': group_name,
'Min': group_data.get('Min'),
'Max': group_data.get('Max')
}
for i, option in enumerate(group_data['Options']):
row[f'Option name {i+1}'] = option['Option name']
row[f'Option price {i+1}'] = option['Option price']
options_list.append(row)
# Create DataFrame for options
options_df = pd.DataFrame(options_list)
# Merge DataFrames on 'itemCode'
merged_df = items_df.merge(options_df, on='itemCode', how='left')
merged_df['category'] = merged_df['category'].replace('', np.nan).ffill()
merged_df['item'] = merged_df['item'].replace('', np.nan)
#merged_df.iloc[:, :7] = merged_df.groupby('category').apply(lambda x: x.ffill().mask(x.duplicated(), '')).reset_index(level=0, drop=True)
#merged_df['category'] = merged_df['category'].replace('', pd.NA).ffill()
merged_df.iloc[:, :7] = merged_df.iloc[:, :7].mask(merged_df.iloc[:, :7].duplicated(), '')
merged_df = merged_df.dropna(subset=['item', 'itemCode', 'item-position', 'img-url', 'price', 'Description'], how='all')
merged_df.to_excel("output3.xlsx", index=False)