ND-Scrapper / Mongo /Noonfood_mongo_Single_URL.py
viraj
Initial Commit
9e6917b
import pandas as pd
import requests
import json
import logging
import mongoengine as me
from mongoengine import connect
import certifi
from bson.objectid import ObjectId
from fastapi import FastAPI, Query
from pydantic import BaseModel
# Define the connection settings
db_name = 'NoonFood'
host = 'ac-ut7ghe7-shard-00-00.299tz43.mongodb.net'
port = 27017
username = 'sahlhubnoonfood'
password = '5JiDKBl6IPyH9Ff4'
alias = 'default'
me.disconnect(alias)
me.connect(db_name, host=host, port=port, username=username, password=password, ssl=True, tlsCAFile=certifi.where())
class NoonFoodLink(me.Document):
initial_response = me.DictField(required=True)
url_list = me.ListField(me.StringField(), required=True)
class MenuOption(me.Document):
name = me.StringField(required=True)
price = me.FloatField(required=True)
#location = me.StringField(required=True)
#restro_ref_id = me.ReferenceField(NoonFoodLink)
class MenuOptionGroup(me.Document):
group_name = me.StringField(required=True)
options = me.ListField(me.ReferenceField(MenuOption))
min_quantity = me.IntField(required=True)
max_quantity = me.IntField(required=True)
#restro_ref_id = me.ReferenceField(NoonFoodLink)
class MenuItem(me.Document):
category_name = me.StringField(required=True)
category_position = me.IntField(required=True)
name = me.StringField(required=True)
position = me.IntField(required=True)
description = me.StringField()
image_url = me.StringField()
price = me.FloatField(required=True)
option_groups = me.ListField(me.ReferenceField(MenuOptionGroup))
#restro_ref_id = me.ReferenceField(NoonFoodLink)
class MenuCategory(me.Document):
name = me.StringField(required=True)
items = me.ListField(me.ReferenceField(MenuItem))
#restro_ref_id = me.ReferenceField(NoonFoodLink)
# app = FastAPI()
class RestaurantDetailsRequest(BaseModel):
latitude: float
longitude: float
restaurant_url: str
class StoreDataRequest(BaseModel):
initial_response: dict
url_list: list
# Extract items with categories
def extract_items_with_categories(menu):
items_list = []
categories_seen = set()
for category in menu['categories']:
category_name = category['name']
if category_name not in categories_seen:
items_list.append({
'category': category_name,
'item': None,
'itemCode': None,
'item-position': None,
'img-url': None,
'price': None,
'Description': None,
'position': category['position']
})
categories_seen.add(category_name)
for item_code in category['items']:
item = next((item for item in menu['items'] if item['itemCode'] == item_code), None)
if item:
items_list.append({
'category': category_name,
'item': item['name'],
'itemCode': item['itemCode'],
'item-position': item['position'],
'img-url': item.get('image', ''),
'price': item.get('price', 0.0),
'Description': item.get('itemDesc', ''),
'position': category['position']
})
return items_list
# Extract options with proper formatting
def extract_options(menu):
options_dict = {}
for item in menu['items']:
if 'modifiers' in item:
for modifier_code in item['modifiers']:
modifier = next((modifier for modifier in menu['modifiers'] if modifier['modifierCode'] == modifier_code), None)
if modifier:
if item['itemCode'] not in options_dict:
options_dict[item['itemCode']] = {}
if modifier['name'] not in options_dict[item['itemCode']]:
options_dict[item['itemCode']][modifier['name']] = {
'Min': modifier.get('minTotalOptions'),
'Max': modifier.get('maxTotalOptions'),
'Options': []
}
for option in modifier['options']:
option_item = next((i for i in menu['items'] if i['itemCode'] == option['itemCode']), None)
if option_item:
options_dict[item['itemCode']][modifier['name']]['Options'].append({
'Option name': option_item['name'],
'Option price': option.get('price', 0.0)
})
return options_dict
def process_json(data, options, outlet_code):
def process_item(item):
common_fields = {
'category': item['category'],
'category-position': item['position'],
'item': item['item'],
'item-position': item['item-position'],
'Description': item['Description'],
'img-url': item['img-url'],
'price': item['price']
}
item_options = options.get(item['itemCode'], {})
option_groups = []
for group_name, group_data in item_options.items():
min_value = group_data['Min']
max_value = group_data['Max']
options_list = []
for option in group_data['Options']:
options_list.append({
'name': option['Option name'],
'price': option['Option price'],
})
option_groups.append({
'group_name': group_name,
'min_quantity': min_value,
'max_quantity': max_value,
'options': options_list,
})
return {
'item': common_fields,
'option_groups': option_groups
}
processed_items = []
for item in data:
processed_item = process_item(item)
processed_items.append(processed_item)
return processed_items
def store_in_db(processed_items):
categories_dict = {}
for item in processed_items:
item_common_fields = item['item']
option_groups = item['option_groups']
# Ensure required fields are present
if not item_common_fields['item'] or not item_common_fields['item-position'] or not item_common_fields['price']:
logging.error(f"Missing required fields in item: {item_common_fields}")
continue
# Create MenuItem
item_document = MenuItem(
category_name=item_common_fields['category'],
category_position=item_common_fields['category-position'],
name=item_common_fields['item'],
position=item_common_fields['item-position'],
description=item_common_fields['Description'],
image_url=item_common_fields['img-url'],
price=item_common_fields['price'],
option_groups=[]
)
item_document.save()
for group in option_groups:
options_list = []
for option_data in group['options']:
menu_option = MenuOption(
name=option_data['name'],
price=option_data['price'],
)
menu_option.save()
options_list.append(menu_option)
option_group = MenuOptionGroup(
group_name=group['group_name'],
min_quantity=group['min_quantity'],
max_quantity=group['max_quantity'],
options=options_list,
)
option_group.save()
item_document.option_groups.append(option_group)
item_document.save()
# Manage MenuCategory
category_name = item_common_fields['category']
if category_name not in categories_dict:
categories_dict[category_name] = MenuCategory(name=category_name, items=[])
categories_dict[category_name].items.append(item_document)
for category in categories_dict.values():
category.save()
def get_restaurant_details(latitude,longitude,url1):
outlet_code = url1.split('/')[-2]
# Make the request to fetch restaurant details
url = "https://food.noon.com/_svc/mp-food-api-mpnoon/consumer/restaurant/outlet/details/guest"
payload = {
"addressLat": latitude,
"addressLng": longitude,
"deliveryType": "default",
"outletCode": outlet_code
}
headers = {
'Connection': 'keep-alive',
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "en-GB,en-US;q=0.9,en;q=0.8,gu;q=0.7",
"Cache-Control": "no-cache, max-age=0, must-revalidate, no-store",
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"Cookie": "bm_mi=791533C8E67CE8E7DA98E80ADED70F69~YAAQRK0cuOep9tGPAQAAUYKw3RcGDAVhD+mtWU8IH76wZL29zl4qqCjMwGv8sKtYlQWZNaFftSOvHFOvQU4+3CY2uHZyjjK6I3GeNdKEn+XHupISeNc0K16GOXLqcPOwu4sADTmxE7PYQvSQE7eimhqsBiJVRd96R8W0D2hl31FlY/4rl+NPZvM3iXjrn2GO50VMv+HhGfCnDMBwApBxgpMWFLfs0u6EYy44mg/FXbom5s5pa3cro8AS35nYHbdUbi61K9fnWRVaF8L/4z0xh7V1AEQETevb5fdGF8aB9m2UG29p2W6KSMb8DyFZLpG3vl5+IRECqZdFxaUMnykO8G/ynRHG~1; Domain=.noon.com; Path=/; Expires=Mon, 03 Jun 2024 12:41:22 GMT; Max-Age=7199; Secure"
}
try:
response = requests.post(url, headers=headers, json=payload)
json_data = response.json()
items = extract_items_with_categories(json_data['data']['menu'])
options = extract_options(json_data['data']['menu'])
processed_items = process_json(items, options, outlet_code)
store_in_db(processed_items)
return {"message": "Restaurant details extracted and stored successfully."}
except Exception as e:
logging.error(f"An error occurred: {str(e)}")
return {"error": "An error occurred while processing the request."}
# @app.post("/store_data")
# def store_data(details: StoreDataRequest):
# try:
# # Create a document from the request data
# document = NoonFoodLink(
# initial_response=details.initial_response,
# url_list=details.url_list
# )
# document.save()
# return {"message": "Data stored successfully."}
# except Exception as e:
# return {"error": str(e)}
# if __name__ == "__main__":
# import uvicorn
# # Run FastAPI application using Uvicorn
# uvicorn.run(app, host="127.0.0.1", port=8000)