ND-Scrapper / Excel /Noonfood_excel.py
viraj
Initial Commit
9e6917b
from fastapi import FastAPI, File, UploadFile,HTTPException
from fastapi.responses import StreamingResponse
import pandas as pd
import numpy as np
import requests
import os
import io
import zipfile
from urllib.parse import urljoin
from typing import List
from pydantic import BaseModel
from io import BytesIO
# app = FastAPI()
# Input model
# Extract items with categories
def extract_items_with_categories(menu):
items_list = []
categories_seen = set() # Set to keep track of categories that have been added
for category in menu['categories']:
category_name = category['name']
category_position = category['position'] if category['position'] != -1 else 0
if category_name not in categories_seen:
items_list.append({
'category': category_name,
'position': category_position,
'Item': None, # Placeholder for item
'itemCode': None,
'item_position': None,
'img_url': None,
'Description': None,
'price_dine': None
})
categories_seen.add(category_name)
for item_code in category['items']:
item = next((item for item in menu['items'] if item['itemCode'] == item_code), None)
if item:
items_list.append({
'category': '', # Empty string for subsequent items in the same category
'position': category['position'],
'Item': item['name'],
'itemCode': item.get("itemCode",""),
'item_position': item['position'],
'img_url': "https://f.nooncdn.com/food_production/"+item['image'],
'Description': item['itemDesc'],
'price_dine': item['price']
})
return items_list
# Extract options
def extract_options(menu):
options_dict = {}
for item in menu['items']:
if 'modifiers' in item:
for modifier_code in item['modifiers']:
modifier = next((modifier for modifier in menu['modifiers'] if modifier['modifierCode'] == modifier_code), None)
if modifier:
if item['itemCode'] not in options_dict:
options_dict[item['itemCode']] = {}
if modifier['name'] not in options_dict[item['itemCode']]:
options_dict[item['itemCode']][modifier['name']] = {
'Min': modifier.get('minTotalOptions'),
'Max': modifier.get('maxTotalOptions'),
'Options': []
}
for option in modifier['options']:
option_item = next((i for i in menu['items'] if i['itemCode'] == option['itemCode']), None)
if option_item:
options_dict[item['itemCode']][modifier['name']]['Options'].append({
'Option name': option_item['name'],
'Option price': option['price']
})
return options_dict
# Process data for a single URL
def process_url(url, latitude, longitude):
outlet_code = url.split('/')[-2]
# Make the request
api_url = "https://food.noon.com/_svc/mp-food-api-mpnoon/consumer/restaurant/outlet/details/guest"
payload = {
"addressLat": latitude,
"addressLng": longitude,
"deliveryType": "default",
"outletCode": outlet_code
}
headers = {
'Connection': 'keep-alive',
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"Cookie": "bm_mi=791533C8E67CE8E7DA98E80ADED70F69~YAAQRK0cuOep9tGPAQAAUYKw3RcGDAVhD+mtWU8IH76wZL29zl4qqCjMwGv8sKtYlQWZNaFftSOvHFOvQU4+3CY2uHZyjjK6I3GeNdKEn+XHupISeNc0K16GOXLqcPOwu4sADTmxE7PYQvSQE7eimhqsBiJVRd96R8W0D2hl31FlY/4rl+NPZvM3iXjrn2GO50VMv+HhGfCnDMBwApBxgpMWFLfs0u6EYy44mg/FXbom5s5pa3cro8AS35nYHbdUbi61K9fnWRVaF8L/4z0xh7V1AEQETevb5fdGF8aB9m2UG29p2W6KSMb8DyFZLpG3vl5+IRECqZdFxaUMnykO8G/ynRHG~1; Domain=.noon.com; Path=/; Expires=Mon, 03 Jun 2024 12:41:22 GMT; Max-Age=7199; Secure"
}
response = requests.post(api_url, headers=headers, json=payload)
json_data = response.json()
# Extract items and options
items = extract_items_with_categories(json_data['data']['menu'])
options = extract_options(json_data['data']['menu'])
# Create a DataFrame for items
items_df = pd.DataFrame(items)
options_list = []
for item_code, option_groups in options.items():
for group_name, group_data in option_groups.items():
row = {
'itemCode': item_code,
'Option Group Name': group_name,
'Min': group_data.get('Min'),
'Max': group_data.get('Max')
}
for i, option in enumerate(group_data['Options']):
row[f'Option name {i+1}'] = option['Option name']
row[f'Option price {i+1}'] = option['Option price']
options_list.append(row)
# Create DataFrame for options
options_df = pd.DataFrame(options_list)
# Merge DataFrames on 'itemCode'
merged_df = items_df.merge(options_df, on='itemCode', how='left')
merged_df['category'] = merged_df['category'].replace('', np.nan).ffill()
merged_df['Item'] = merged_df['Item'].replace('', np.nan)
merged_df.iloc[:, :7] = merged_df.iloc[:, :7].mask(merged_df.iloc[:, :7].duplicated(), '')
merged_df = merged_df.dropna(how='all')
non_cat_pos_columns = merged_df.columns.difference(['category', 'position'])
mask = merged_df[non_cat_pos_columns].isna().all(axis=1) & merged_df[['category', 'position']].notna().all(axis=1)
merged_df = merged_df[~mask]
if 'Max' in merged_df.columns:
max_column_index = merged_df.columns.get_loc('Max')
for i in range(max_column_index + 1, len(merged_df.columns)):
merged_df.rename(columns={merged_df.columns[i]: ''}, inplace=True)
merged_df = merged_df.drop(columns=['itemCode'])
output = BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
merged_df.to_excel(writer, index=False, sheet_name='Menu')
output.seek(0)
return output,f"{outlet_code}_menu.xlsx"
# @app.post("/generate_and_download", response_class=StreamingResponse)
# def generate_and_download(details: RestaurantDetailsRequest):
# files = []
# for url in details.urls:
# output_filename = process_url(url, details.latitude, details.longitude)
# files.append(output_filename)
# zip_buffer = io.BytesIO()
# with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
# for file in files:
# zip_file.write(file, os.path.basename(file))
# zip_buffer.seek(0)
# return StreamingResponse(zip_buffer, media_type="application/x-zip-compressed", headers={"Content-Disposition": "attachment;filename=output_files.zip"})
# if __name__ == "__main__":
# import uvicorn
# uvicorn.run(app, host="127.0.0.1", port=8000)